]>
Commit | Line | Data |
---|---|---|
5802c0cb | 1 | ;; GCC machine description for SSE instructions |
f1717362 | 2 | ;; Copyright (C) 2005-2016 Free Software Foundation, Inc. |
5802c0cb | 3 | ;; |
4 | ;; This file is part of GCC. | |
5 | ;; | |
6 | ;; GCC is free software; you can redistribute it and/or modify | |
7 | ;; it under the terms of the GNU General Public License as published by | |
038d1e19 | 8 | ;; the Free Software Foundation; either version 3, or (at your option) |
5802c0cb | 9 | ;; any later version. |
10 | ;; | |
11 | ;; GCC is distributed in the hope that it will be useful, | |
12 | ;; but WITHOUT ANY WARRANTY; without even the implied warranty of | |
13 | ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
14 | ;; GNU General Public License for more details. | |
15 | ;; | |
16 | ;; You should have received a copy of the GNU General Public License | |
038d1e19 | 17 | ;; along with GCC; see the file COPYING3. If not see |
18 | ;; <http://www.gnu.org/licenses/>. | |
5802c0cb | 19 | |
dbe83d2d | 20 | (define_c_enum "unspec" [ |
21 | ;; SSE | |
22 | UNSPEC_MOVNT | |
00820ea0 | 23 | UNSPEC_LOADU |
24 | UNSPEC_STOREU | |
dbe83d2d | 25 | |
26 | ;; SSE3 | |
27 | UNSPEC_LDDQU | |
28 | ||
29 | ;; SSSE3 | |
30 | UNSPEC_PSHUFB | |
31 | UNSPEC_PSIGN | |
32 | UNSPEC_PALIGNR | |
33 | ||
34 | ;; For SSE4A support | |
35 | UNSPEC_EXTRQI | |
36 | UNSPEC_EXTRQ | |
37 | UNSPEC_INSERTQI | |
38 | UNSPEC_INSERTQ | |
39 | ||
40 | ;; For SSE4.1 support | |
41 | UNSPEC_BLENDV | |
42 | UNSPEC_INSERTPS | |
43 | UNSPEC_DP | |
44 | UNSPEC_MOVNTDQA | |
45 | UNSPEC_MPSADBW | |
46 | UNSPEC_PHMINPOSUW | |
47 | UNSPEC_PTEST | |
48 | ||
49 | ;; For SSE4.2 support | |
50 | UNSPEC_PCMPESTR | |
51 | UNSPEC_PCMPISTR | |
52 | ||
53 | ;; For FMA4 support | |
54 | UNSPEC_FMADDSUB | |
55 | UNSPEC_XOP_UNSIGNED_CMP | |
56 | UNSPEC_XOP_TRUEFALSE | |
57 | UNSPEC_XOP_PERMUTE | |
58 | UNSPEC_FRCZ | |
59 | ||
60 | ;; For AES support | |
61 | UNSPEC_AESENC | |
62 | UNSPEC_AESENCLAST | |
63 | UNSPEC_AESDEC | |
64 | UNSPEC_AESDECLAST | |
65 | UNSPEC_AESIMC | |
66 | UNSPEC_AESKEYGENASSIST | |
67 | ||
68 | ;; For PCLMUL support | |
69 | UNSPEC_PCLMUL | |
70 | ||
71 | ;; For AVX support | |
72 | UNSPEC_PCMP | |
73 | UNSPEC_VPERMIL | |
74 | UNSPEC_VPERMIL2 | |
75 | UNSPEC_VPERMIL2F128 | |
76 | UNSPEC_CAST | |
77 | UNSPEC_VTESTP | |
78 | UNSPEC_VCVTPH2PS | |
79 | UNSPEC_VCVTPS2PH | |
80 | ||
81 | ;; For AVX2 support | |
8da8a06b | 82 | UNSPEC_VPERMVAR |
dbe83d2d | 83 | UNSPEC_VPERMTI |
84 | UNSPEC_GATHER | |
85 | UNSPEC_VSIBADDR | |
8e6b975f | 86 | |
87 | ;; For AVX512F support | |
88 | UNSPEC_VPERMI2 | |
89 | UNSPEC_VPERMT2 | |
5220cab6 | 90 | UNSPEC_VPERMI2_MASK |
697a43f8 | 91 | UNSPEC_UNSIGNED_FIX_NOTRUNC |
d2ff59d6 | 92 | UNSPEC_UNSIGNED_PCMP |
93 | UNSPEC_TESTM | |
94 | UNSPEC_TESTNM | |
8e6b975f | 95 | UNSPEC_SCATTER |
85065932 | 96 | UNSPEC_RCP14 |
97 | UNSPEC_RSQRT14 | |
98 | UNSPEC_FIXUPIMM | |
99 | UNSPEC_SCALEF | |
d2ff59d6 | 100 | UNSPEC_VTERNLOG |
85065932 | 101 | UNSPEC_GETEXP |
102 | UNSPEC_GETMANT | |
d2ff59d6 | 103 | UNSPEC_ALIGN |
104 | UNSPEC_CONFLICT | |
5220cab6 | 105 | UNSPEC_COMPRESS |
106 | UNSPEC_COMPRESS_STORE | |
107 | UNSPEC_EXPAND | |
d2ff59d6 | 108 | UNSPEC_MASKED_EQ |
109 | UNSPEC_MASKED_GT | |
110 | ||
5220cab6 | 111 | ;; For embed. rounding feature |
112 | UNSPEC_EMBEDDED_ROUNDING | |
113 | ||
d2ff59d6 | 114 | ;; For AVX512PF support |
115 | UNSPEC_GATHER_PREFETCH | |
116 | UNSPEC_SCATTER_PREFETCH | |
85065932 | 117 | |
118 | ;; For AVX512ER support | |
119 | UNSPEC_EXP2 | |
120 | UNSPEC_RCP28 | |
121 | UNSPEC_RSQRT28 | |
fc975a40 | 122 | |
123 | ;; For SHA support | |
124 | UNSPEC_SHA1MSG1 | |
125 | UNSPEC_SHA1MSG2 | |
126 | UNSPEC_SHA1NEXTE | |
127 | UNSPEC_SHA1RNDS4 | |
128 | UNSPEC_SHA256MSG1 | |
129 | UNSPEC_SHA256MSG2 | |
130 | UNSPEC_SHA256RNDS2 | |
6164575a | 131 | |
7b988cc3 | 132 | ;; For AVX512BW support |
d58134c2 | 133 | UNSPEC_DBPSADBW |
134 | UNSPEC_PMADDUBSW512 | |
2d71b728 | 135 | UNSPEC_PMADDWD512 |
7b988cc3 | 136 | UNSPEC_PSHUFHW |
137 | UNSPEC_PSHUFLW | |
54f53cd0 | 138 | UNSPEC_CVTINT2MASK |
7b988cc3 | 139 | |
6164575a | 140 | ;; For AVX512DQ support |
141 | UNSPEC_REDUCE | |
142 | UNSPEC_FPCLASS | |
143 | UNSPEC_RANGE | |
8a12b665 | 144 | |
145 | ;; For AVX512IFMA support | |
146 | UNSPEC_VPMADD52LUQ | |
147 | UNSPEC_VPMADD52HUQ | |
afee0628 | 148 | |
149 | ;; For AVX512VBMI support | |
150 | UNSPEC_VPMULTISHIFT | |
dbe83d2d | 151 | ]) |
152 | ||
153 | (define_c_enum "unspecv" [ | |
154 | UNSPECV_LDMXCSR | |
155 | UNSPECV_STMXCSR | |
156 | UNSPECV_CLFLUSH | |
157 | UNSPECV_MONITOR | |
158 | UNSPECV_MWAIT | |
159 | UNSPECV_VZEROALL | |
160 | UNSPECV_VZEROUPPER | |
161 | ]) | |
162 | ||
058e4e29 | 163 | ;; All vector modes including V?TImode, used in move patterns. |
8671b6cc | 164 | (define_mode_iterator VMOVE |
6a3f5f59 | 165 | [(V64QI "TARGET_AVX512F") (V32QI "TARGET_AVX") V16QI |
166 | (V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX") V8HI | |
167 | (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX") V4SI | |
168 | (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") V2DI | |
3f4222c1 | 169 | (V4TI "TARGET_AVX512BW") (V2TI "TARGET_AVX") V1TI |
6a3f5f59 | 170 | (V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF |
171 | (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") V2DF]) | |
5802c0cb | 172 | |
dbddc172 | 173 | ;; All AVX-512{F,VL} vector modes. Supposed TARGET_AVX512F baseline. |
174 | (define_mode_iterator V48_AVX512VL | |
175 | [V16SI (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL") | |
176 | V8DI (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL") | |
177 | V16SF (V8SF "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL") | |
178 | V8DF (V4DF "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")]) | |
179 | ||
180 | ;; 1,2 byte AVX-512{BW,VL} vector modes. Supposed TARGET_AVX512BW baseline. | |
181 | (define_mode_iterator VI12_AVX512VL | |
182 | [V64QI (V16QI "TARGET_AVX512VL") (V32QI "TARGET_AVX512VL") | |
183 | V32HI (V16HI "TARGET_AVX512VL") (V8HI "TARGET_AVX512VL")]) | |
3f4222c1 | 184 | |
afee0628 | 185 | (define_mode_iterator VI1_AVX512VL |
186 | [V64QI (V16QI "TARGET_AVX512VL") (V32QI "TARGET_AVX512VL")]) | |
187 | ||
6fe5844b | 188 | ;; All vector modes |
189 | (define_mode_iterator V | |
190 | [(V32QI "TARGET_AVX") V16QI | |
191 | (V16HI "TARGET_AVX") V8HI | |
6615b722 | 192 | (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX") V4SI |
193 | (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") V2DI | |
194 | (V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF | |
195 | (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")]) | |
6fe5844b | 196 | |
197 | ;; All 128bit vector modes | |
198 | (define_mode_iterator V_128 | |
199 | [V16QI V8HI V4SI V2DI V4SF (V2DF "TARGET_SSE2")]) | |
200 | ||
201 | ;; All 256bit vector modes | |
202 | (define_mode_iterator V_256 | |
203 | [V32QI V16HI V8SI V4DI V8SF V4DF]) | |
204 | ||
f23a3158 | 205 | ;; All 512bit vector modes |
206 | (define_mode_iterator V_512 [V64QI V32HI V16SI V8DI V16SF V8DF]) | |
207 | ||
6615b722 | 208 | ;; All 256bit and 512bit vector modes |
209 | (define_mode_iterator V_256_512 | |
210 | [V32QI V16HI V8SI V4DI V8SF V4DF | |
211 | (V64QI "TARGET_AVX512F") (V32HI "TARGET_AVX512F") (V16SI "TARGET_AVX512F") | |
212 | (V8DI "TARGET_AVX512F") (V16SF "TARGET_AVX512F") (V8DF "TARGET_AVX512F")]) | |
213 | ||
27e5502d | 214 | ;; All vector float modes |
215 | (define_mode_iterator VF | |
6a3f5f59 | 216 | [(V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF |
217 | (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")]) | |
218 | ||
219 | ;; 128- and 256-bit float vector modes | |
220 | (define_mode_iterator VF_128_256 | |
6fe5844b | 221 | [(V8SF "TARGET_AVX") V4SF |
222 | (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")]) | |
27e5502d | 223 | |
224 | ;; All SFmode vector float modes | |
225 | (define_mode_iterator VF1 | |
03ae25dc | 226 | [(V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF]) |
227 | ||
228 | ;; 128- and 256-bit SF vector modes | |
229 | (define_mode_iterator VF1_128_256 | |
6fe5844b | 230 | [(V8SF "TARGET_AVX") V4SF]) |
27e5502d | 231 | |
d3d65e42 | 232 | (define_mode_iterator VF1_128_256VL |
233 | [V8SF (V4SF "TARGET_AVX512VL")]) | |
234 | ||
27e5502d | 235 | ;; All DFmode vector float modes |
236 | (define_mode_iterator VF2 | |
6615b722 | 237 | [(V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") V2DF]) |
238 | ||
239 | ;; 128- and 256-bit DF vector modes | |
240 | (define_mode_iterator VF2_128_256 | |
6fe5844b | 241 | [(V4DF "TARGET_AVX") V2DF]) |
27e5502d | 242 | |
6615b722 | 243 | (define_mode_iterator VF2_512_256 |
d3d65e42 | 244 | [(V8DF "TARGET_AVX512F") V4DF]) |
245 | ||
246 | (define_mode_iterator VF2_512_256VL | |
247 | [V8DF (V4DF "TARGET_AVX512VL")]) | |
6615b722 | 248 | |
27e5502d | 249 | ;; All 128bit vector float modes |
250 | (define_mode_iterator VF_128 | |
6fe5844b | 251 | [V4SF (V2DF "TARGET_SSE2")]) |
252 | ||
253 | ;; All 256bit vector float modes | |
254 | (define_mode_iterator VF_256 | |
255 | [V8SF V4DF]) | |
27e5502d | 256 | |
6a3f5f59 | 257 | ;; All 512bit vector float modes |
258 | (define_mode_iterator VF_512 | |
259 | [V16SF V8DF]) | |
260 | ||
97173adf | 261 | (define_mode_iterator VI48_AVX512VL |
262 | [V16SI (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL") | |
263 | V8DI (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")]) | |
264 | ||
4f3da779 | 265 | (define_mode_iterator VF_AVX512VL |
266 | [V16SF (V8SF "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL") | |
267 | V8DF (V4DF "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")]) | |
268 | ||
05ecc201 | 269 | (define_mode_iterator VF2_AVX512VL |
270 | [V8DF (V4DF "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")]) | |
271 | ||
040236d9 | 272 | (define_mode_iterator VF1_AVX512VL |
273 | [V16SF (V8SF "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL")]) | |
274 | ||
ba2558f8 | 275 | ;; All vector integer modes |
276 | (define_mode_iterator VI | |
03ae25dc | 277 | [(V16SI "TARGET_AVX512F") (V8DI "TARGET_AVX512F") |
94c0db54 | 278 | (V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX") V16QI |
279 | (V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX") V8HI | |
ba2558f8 | 280 | (V8SI "TARGET_AVX") V4SI |
281 | (V4DI "TARGET_AVX") V2DI]) | |
282 | ||
c4530783 | 283 | (define_mode_iterator VI_AVX2 |
12803fe0 | 284 | [(V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX2") V16QI |
285 | (V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX2") V8HI | |
03ae25dc | 286 | (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX2") V4SI |
287 | (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX2") V2DI]) | |
c4530783 | 288 | |
e5f53f2a | 289 | ;; All QImode vector integer modes |
290 | (define_mode_iterator VI1 | |
291 | [(V32QI "TARGET_AVX") V16QI]) | |
292 | ||
97173adf | 293 | (define_mode_iterator VI_ULOADSTORE_BW_AVX512VL |
294 | [V64QI | |
295 | V32HI (V8HI "TARGET_AVX512VL") (V16HI "TARGET_AVX512VL")]) | |
296 | ||
297 | (define_mode_iterator VI_ULOADSTORE_F_AVX512VL | |
298 | [V16SI (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL") | |
299 | V8DI (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")]) | |
6a3f5f59 | 300 | |
e5f53f2a | 301 | ;; All DImode vector integer modes |
37407f90 | 302 | (define_mode_iterator V_AVX |
303 | [V16QI V8HI V4SI V2DI V4SF V2DF | |
304 | (V32QI "TARGET_AVX") (V16HI "TARGET_AVX") | |
305 | (V8SI "TARGET_AVX") (V4DI "TARGET_AVX") | |
306 | (V8SF "TARGET_AVX") (V4DF"TARGET_AVX")]) | |
307 | ||
e5f53f2a | 308 | (define_mode_iterator VI8 |
03ae25dc | 309 | [(V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") V2DI]) |
e5f53f2a | 310 | |
1ffb4a9e | 311 | (define_mode_iterator VI8_AVX512VL |
312 | [V8DI (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")]) | |
313 | ||
9bb6f354 | 314 | (define_mode_iterator VI8_256_512 |
315 | [V8DI (V4DI "TARGET_AVX512VL")]) | |
316 | ||
5deb404d | 317 | (define_mode_iterator VI1_AVX2 |
318 | [(V32QI "TARGET_AVX2") V16QI]) | |
319 | ||
201f262d | 320 | (define_mode_iterator VI1_AVX512 |
321 | [(V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX2") V16QI]) | |
322 | ||
5deb404d | 323 | (define_mode_iterator VI2_AVX2 |
2d71b728 | 324 | [(V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX2") V8HI]) |
5deb404d | 325 | |
9abbf9e6 | 326 | (define_mode_iterator VI2_AVX512F |
327 | [(V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX2") V8HI]) | |
328 | ||
bf8e1ae3 | 329 | (define_mode_iterator VI4_AVX |
330 | [(V8SI "TARGET_AVX") V4SI]) | |
331 | ||
5deb404d | 332 | (define_mode_iterator VI4_AVX2 |
333 | [(V8SI "TARGET_AVX2") V4SI]) | |
334 | ||
c6cff444 | 335 | (define_mode_iterator VI4_AVX512F |
336 | [(V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX2") V4SI]) | |
337 | ||
7da26bee | 338 | (define_mode_iterator VI4_AVX512VL |
339 | [V16SI (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL")]) | |
340 | ||
341 | (define_mode_iterator VI48_AVX512F_AVX512VL | |
342 | [V4SI V8SI (V16SI "TARGET_AVX512F") | |
343 | (V2DI "TARGET_AVX512VL") (V4DI "TARGET_AVX512VL") (V8DI "TARGET_AVX512F")]) | |
344 | ||
345 | (define_mode_iterator VI2_AVX512VL | |
346 | [(V8HI "TARGET_AVX512VL") (V16HI "TARGET_AVX512VL") V32HI]) | |
f062acd7 | 347 | |
5f3ec3a3 | 348 | (define_mode_iterator VI8_AVX2_AVX512BW |
349 | [(V8DI "TARGET_AVX512BW") (V4DI "TARGET_AVX2") V2DI]) | |
350 | ||
5deb404d | 351 | (define_mode_iterator VI8_AVX2 |
352 | [(V4DI "TARGET_AVX2") V2DI]) | |
353 | ||
c6cff444 | 354 | (define_mode_iterator VI8_AVX2_AVX512F |
355 | [(V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX2") V2DI]) | |
356 | ||
da982d5c | 357 | (define_mode_iterator VI4_128_8_256 |
358 | [V4SI V4DI]) | |
359 | ||
8e9989b0 | 360 | ;; All V8D* modes |
361 | (define_mode_iterator V8FI | |
362 | [V8DF V8DI]) | |
363 | ||
364 | ;; All V16S* modes | |
365 | (define_mode_iterator V16FI | |
366 | [V16SF V16SI]) | |
367 | ||
058e4e29 | 368 | ;; ??? We should probably use TImode instead. |
5deb404d | 369 | (define_mode_iterator VIMAX_AVX2 |
fd6b07be | 370 | [(V4TI "TARGET_AVX512BW") (V2TI "TARGET_AVX2") V1TI]) |
5deb404d | 371 | |
058e4e29 | 372 | ;; ??? This should probably be dropped in favor of VIMAX_AVX2. |
5deb404d | 373 | (define_mode_iterator SSESCALARMODE |
d49df830 | 374 | [(V4TI "TARGET_AVX512BW") (V2TI "TARGET_AVX2") TI]) |
5deb404d | 375 | |
376 | (define_mode_iterator VI12_AVX2 | |
293fd15f | 377 | [(V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX2") V16QI |
378 | (V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX2") V8HI]) | |
5deb404d | 379 | |
380 | (define_mode_iterator VI24_AVX2 | |
381 | [(V16HI "TARGET_AVX2") V8HI | |
382 | (V8SI "TARGET_AVX2") V4SI]) | |
383 | ||
8f83f53e | 384 | (define_mode_iterator VI124_AVX2_24_AVX512F_1_AVX512BW |
385 | [(V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX2") V16QI | |
9abbf9e6 | 386 | (V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX2") V8HI |
387 | (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX2") V4SI]) | |
388 | ||
5deb404d | 389 | (define_mode_iterator VI124_AVX2 |
390 | [(V32QI "TARGET_AVX2") V16QI | |
391 | (V16HI "TARGET_AVX2") V8HI | |
392 | (V8SI "TARGET_AVX2") V4SI]) | |
393 | ||
4055e076 | 394 | (define_mode_iterator VI2_AVX2_AVX512BW |
395 | [(V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX2") V8HI]) | |
396 | ||
397 | (define_mode_iterator VI48_AVX2 | |
398 | [(V8SI "TARGET_AVX2") V4SI | |
5deb404d | 399 | (V4DI "TARGET_AVX2") V2DI]) |
400 | ||
8f83f53e | 401 | (define_mode_iterator VI248_AVX2_8_AVX512F_24_AVX512BW |
402 | [(V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX2") V8HI | |
403 | (V16SI "TARGET_AVX512BW") (V8SI "TARGET_AVX2") V4SI | |
2257113d | 404 | (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX2") V2DI]) |
405 | ||
4f545baf | 406 | (define_mode_iterator VI248_AVX512BW_AVX512VL |
407 | [(V32HI "TARGET_AVX512BW") | |
408 | (V4DI "TARGET_AVX512VL") V16SI V8DI]) | |
409 | ||
410 | ;; Suppose TARGET_AVX512VL as baseline | |
411 | (define_mode_iterator VI24_AVX512BW_1 | |
412 | [(V16HI "TARGET_AVX512BW") (V8HI "TARGET_AVX512BW") | |
413 | V8SI V4SI]) | |
414 | ||
db3a6e9c | 415 | (define_mode_iterator VI48_AVX512F |
416 | [(V16SI "TARGET_AVX512F") V8SI V4SI | |
417 | (V8DI "TARGET_AVX512F") V4DI V2DI]) | |
5deb404d | 418 | |
e9b578bf | 419 | (define_mode_iterator VI48_AVX_AVX512F |
420 | [(V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX") V4SI | |
421 | (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") V2DI]) | |
422 | ||
423 | (define_mode_iterator VI12_AVX_AVX512F | |
424 | [ (V64QI "TARGET_AVX512F") (V32QI "TARGET_AVX") V16QI | |
425 | (V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX") V8HI]) | |
426 | ||
5deb404d | 427 | (define_mode_iterator V48_AVX2 |
c4530783 | 428 | [V4SF V2DF |
429 | V8SF V4DF | |
5deb404d | 430 | (V4SI "TARGET_AVX2") (V2DI "TARGET_AVX2") |
431 | (V8SI "TARGET_AVX2") (V4DI "TARGET_AVX2")]) | |
432 | ||
18b7eecb | 433 | (define_mode_attr avx512 |
434 | [(V16QI "avx512vl") (V32QI "avx512vl") (V64QI "avx512bw") | |
435 | (V8HI "avx512vl") (V16HI "avx512vl") (V32HI "avx512bw") | |
436 | (V4SI "avx512vl") (V8SI "avx512vl") (V16SI "avx512f") | |
437 | (V2DI "avx512vl") (V4DI "avx512vl") (V8DI "avx512f") | |
438 | (V4SF "avx512vl") (V8SF "avx512vl") (V16SF "avx512f") | |
439 | (V2DF "avx512vl") (V4DF "avx512vl") (V8DF "avx512f")]) | |
440 | ||
6a3f5f59 | 441 | (define_mode_attr sse2_avx_avx512f |
442 | [(V16QI "sse2") (V32QI "avx") (V64QI "avx512f") | |
18b7eecb | 443 | (V8HI "avx512vl") (V16HI "avx512vl") (V32HI "avx512bw") |
6a3f5f59 | 444 | (V4SI "sse2") (V8SI "avx") (V16SI "avx512f") |
18b7eecb | 445 | (V2DI "avx512vl") (V4DI "avx512vl") (V8DI "avx512f") |
6a3f5f59 | 446 | (V16SF "avx512f") (V8SF "avx") (V4SF "avx") |
447 | (V8DF "avx512f") (V4DF "avx") (V2DF "avx")]) | |
448 | ||
5deb404d | 449 | (define_mode_attr sse2_avx2 |
18b7eecb | 450 | [(V16QI "sse2") (V32QI "avx2") (V64QI "avx512bw") |
451 | (V8HI "sse2") (V16HI "avx2") (V32HI "avx512bw") | |
6a3f5f59 | 452 | (V4SI "sse2") (V8SI "avx2") (V16SI "avx512f") |
453 | (V2DI "sse2") (V4DI "avx2") (V8DI "avx512f") | |
18b7eecb | 454 | (V1TI "sse2") (V2TI "avx2") (V4TI "avx512bw")]) |
5deb404d | 455 | |
456 | (define_mode_attr ssse3_avx2 | |
18b7eecb | 457 | [(V16QI "ssse3") (V32QI "avx2") (V64QI "avx512bw") |
458 | (V4HI "ssse3") (V8HI "ssse3") (V16HI "avx2") (V32HI "avx512bw") | |
5deb404d | 459 | (V4SI "ssse3") (V8SI "avx2") |
460 | (V2DI "ssse3") (V4DI "avx2") | |
18b7eecb | 461 | (TI "ssse3") (V2TI "avx2") (V4TI "avx512bw")]) |
5deb404d | 462 | |
463 | (define_mode_attr sse4_1_avx2 | |
18b7eecb | 464 | [(V16QI "sse4_1") (V32QI "avx2") (V64QI "avx512bw") |
465 | (V8HI "sse4_1") (V16HI "avx2") (V32HI "avx512bw") | |
6a3f5f59 | 466 | (V4SI "sse4_1") (V8SI "avx2") (V16SI "avx512f") |
18b7eecb | 467 | (V2DI "sse4_1") (V4DI "avx2") (V8DI "avx512dq")]) |
5deb404d | 468 | |
469 | (define_mode_attr avx_avx2 | |
470 | [(V4SF "avx") (V2DF "avx") | |
471 | (V8SF "avx") (V4DF "avx") | |
472 | (V4SI "avx2") (V2DI "avx2") | |
473 | (V8SI "avx2") (V4DI "avx2")]) | |
474 | ||
9fb8de40 | 475 | (define_mode_attr vec_avx2 |
476 | [(V16QI "vec") (V32QI "avx2") | |
477 | (V8HI "vec") (V16HI "avx2") | |
478 | (V4SI "vec") (V8SI "avx2") | |
479 | (V2DI "vec") (V4DI "avx2")]) | |
480 | ||
fcb19554 | 481 | (define_mode_attr avx2_avx512 |
18b7eecb | 482 | [(V4SI "avx2") (V8SI "avx2") (V16SI "avx512f") |
483 | (V2DI "avx2") (V4DI "avx2") (V8DI "avx512f") | |
484 | (V4SF "avx2") (V8SF "avx2") (V16SF "avx512f") | |
485 | (V2DF "avx2") (V4DF "avx2") (V8DF "avx512f") | |
486 | (V8HI "avx512vl") (V16HI "avx512vl") (V32HI "avx512bw")]) | |
487 | ||
e13e1b39 | 488 | (define_mode_attr shuffletype |
489 | [(V16SF "f") (V16SI "i") (V8DF "f") (V8DI "i") | |
490 | (V8SF "f") (V8SI "i") (V4DF "f") (V4DI "i") | |
491 | (V4SF "f") (V4SI "i") (V2DF "f") (V2DI "i") | |
492 | (V32QI "i") (V16HI "u") (V16QI "i") (V8HI "i") | |
493 | (V64QI "i") (V1TI "i") (V2TI "i")]) | |
494 | ||
8e9989b0 | 495 | (define_mode_attr ssequartermode |
496 | [(V16SF "V4SF") (V8DF "V2DF") (V16SI "V4SI") (V8DI "V2DI")]) | |
497 | ||
18b7eecb | 498 | (define_mode_attr ssedoublemodelower |
499 | [(V16QI "v16hi") (V32QI "v32hi") (V64QI "v64hi") | |
500 | (V8HI "v8si") (V16HI "v16si") (V32HI "v32si") | |
501 | (V4SI "v4di") (V8SI "v8di") (V16SI "v16di")]) | |
502 | ||
5deb404d | 503 | (define_mode_attr ssedoublemode |
06af5c80 | 504 | [(V4SF "V8SF") (V8SF "V16SF") (V16SF "V32SF") |
505 | (V2DF "V4DF") (V4DF "V8DF") (V8DF "V16DF") | |
506 | (V16QI "V16HI") (V32QI "V32HI") (V64QI "V64HI") | |
507 | (V4HI "V4SI") (V8HI "V8SI") (V16HI "V16SI") (V32HI "V32SI") | |
508 | (V4SI "V4DI") (V8SI "V16SI") (V16SI "V32SI") | |
509 | (V4DI "V8DI") (V8DI "V16DI")]) | |
5deb404d | 510 | |
511 | (define_mode_attr ssebytemode | |
18b7eecb | 512 | [(V8DI "V64QI") (V4DI "V32QI") (V2DI "V16QI")]) |
5deb404d | 513 | |
d8f82f6b | 514 | ;; All 128bit vector integer modes |
515 | (define_mode_iterator VI_128 [V16QI V8HI V4SI V2DI]) | |
516 | ||
5deb404d | 517 | ;; All 256bit vector integer modes |
518 | (define_mode_iterator VI_256 [V32QI V16HI V8SI V4DI]) | |
519 | ||
f23a3158 | 520 | ;; All 512bit vector integer modes |
521 | (define_mode_iterator VI_512 [V64QI V32HI V16SI V8DI]) | |
522 | ||
523 | ;; Various 128bit vector integer mode combinations | |
d8f82f6b | 524 | (define_mode_iterator VI12_128 [V16QI V8HI]) |
525 | (define_mode_iterator VI14_128 [V16QI V4SI]) | |
526 | (define_mode_iterator VI124_128 [V16QI V8HI V4SI]) | |
527 | (define_mode_iterator VI24_128 [V8HI V4SI]) | |
528 | (define_mode_iterator VI248_128 [V8HI V4SI V2DI]) | |
7d079352 | 529 | (define_mode_iterator VI48_128 [V4SI V2DI]) |
27e5502d | 530 | |
2257113d | 531 | ;; Various 256bit and 512 vector integer mode combinations |
5dd4f649 | 532 | (define_mode_iterator VI124_256 [V32QI V16HI V8SI]) |
533 | (define_mode_iterator VI124_256_AVX512F_AVX512BW | |
534 | [V32QI V16HI V8SI | |
535 | (V64QI "TARGET_AVX512BW") | |
536 | (V32HI "TARGET_AVX512BW") | |
537 | (V16SI "TARGET_AVX512F")]) | |
7d079352 | 538 | (define_mode_iterator VI48_256 [V8SI V4DI]) |
d2ff59d6 | 539 | (define_mode_iterator VI48_512 [V16SI V8DI]) |
0daf3bbe | 540 | (define_mode_iterator VI4_256_8_512 [V8SI V8DI]) |
9c9987c5 | 541 | (define_mode_iterator VI_AVX512BW |
542 | [V16SI V8DI (V32HI "TARGET_AVX512BW") (V64QI "TARGET_AVX512BW")]) | |
5deb404d | 543 | |
6fe5844b | 544 | ;; Int-float size matches |
545 | (define_mode_iterator VI4F_128 [V4SI V4SF]) | |
546 | (define_mode_iterator VI8F_128 [V2DI V2DF]) | |
547 | (define_mode_iterator VI4F_256 [V8SI V8SF]) | |
548 | (define_mode_iterator VI8F_256 [V4DI V4DF]) | |
feadfe94 | 549 | (define_mode_iterator VI8F_256_512 |
550 | [V4DI V4DF (V8DI "TARGET_AVX512F") (V8DF "TARGET_AVX512F")]) | |
bf24193f | 551 | (define_mode_iterator VI48F_256_512 |
552 | [V8SI V8SF | |
553 | (V16SI "TARGET_AVX512F") (V16SF "TARGET_AVX512F") | |
fcb19554 | 554 | (V8DI "TARGET_AVX512F") (V8DF "TARGET_AVX512F") |
555 | (V4DI "TARGET_AVX512VL") (V4DF "TARGET_AVX512VL")]) | |
da2989a5 | 556 | (define_mode_iterator VF48_I1248 |
557 | [V16SI V16SF V8DI V8DF V32HI V64QI]) | |
6ce48b02 | 558 | (define_mode_iterator VI48F |
559 | [V16SI V16SF V8DI V8DF | |
560 | (V8SI "TARGET_AVX512VL") (V8SF "TARGET_AVX512VL") | |
561 | (V4DI "TARGET_AVX512VL") (V4DF "TARGET_AVX512VL") | |
562 | (V4SI "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL") | |
563 | (V2DI "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")]) | |
fd1fee28 | 564 | (define_mode_iterator VI48F_256 [V8SI V8SF V4DI V4DF]) |
6fe5844b | 565 | |
b6fc7168 | 566 | ;; Mapping from float mode to required SSE level |
567 | (define_mode_attr sse | |
568 | [(SF "sse") (DF "sse2") | |
569 | (V4SF "sse") (V2DF "sse2") | |
6a3f5f59 | 570 | (V16SF "avx512f") (V8SF "avx") |
571 | (V8DF "avx512f") (V4DF "avx")]) | |
b6fc7168 | 572 | |
573 | (define_mode_attr sse2 | |
6a3f5f59 | 574 | [(V16QI "sse2") (V32QI "avx") (V64QI "avx512f") |
575 | (V2DI "sse2") (V4DI "avx") (V8DI "avx512f")]) | |
b6fc7168 | 576 | |
577 | (define_mode_attr sse3 | |
578 | [(V16QI "sse3") (V32QI "avx")]) | |
579 | ||
580 | (define_mode_attr sse4_1 | |
581 | [(V4SF "sse4_1") (V2DF "sse4_1") | |
6a3f5f59 | 582 | (V8SF "avx") (V4DF "avx") |
37407f90 | 583 | (V8DF "avx512f") |
584 | (V4DI "avx") (V2DI "sse4_1") | |
585 | (V8SI "avx") (V4SI "sse4_1") | |
586 | (V16QI "sse4_1") (V32QI "avx") | |
587 | (V8HI "sse4_1") (V16HI "avx")]) | |
b6fc7168 | 588 | |
63d5e521 | 589 | (define_mode_attr avxsizesuffix |
6a3f5f59 | 590 | [(V64QI "512") (V32HI "512") (V16SI "512") (V8DI "512") |
591 | (V32QI "256") (V16HI "256") (V8SI "256") (V4DI "256") | |
6fe5844b | 592 | (V16QI "") (V8HI "") (V4SI "") (V2DI "") |
6a3f5f59 | 593 | (V16SF "512") (V8DF "512") |
63d5e521 | 594 | (V8SF "256") (V4DF "256") |
595 | (V4SF "") (V2DF "")]) | |
33541f98 | 596 | |
63d5e521 | 597 | ;; SSE instruction mode |
598 | (define_mode_attr sseinsnmode | |
18b7eecb | 599 | [(V64QI "XI") (V32HI "XI") (V16SI "XI") (V8DI "XI") (V4TI "XI") |
e13e1b39 | 600 | (V32QI "OI") (V16HI "OI") (V8SI "OI") (V4DI "OI") (V2TI "OI") |
63d5e521 | 601 | (V16QI "TI") (V8HI "TI") (V4SI "TI") (V2DI "TI") (V1TI "TI") |
e13e1b39 | 602 | (V16SF "V16SF") (V8DF "V8DF") |
63d5e521 | 603 | (V8SF "V8SF") (V4DF "V4DF") |
5deb404d | 604 | (V4SF "V4SF") (V2DF "V2DF") |
28f914e9 | 605 | (TI "TI")]) |
63d5e521 | 606 | |
8e6b975f | 607 | ;; Mapping of vector modes to corresponding mask size |
608 | (define_mode_attr avx512fmaskmode | |
41564d2a | 609 | [(V64QI "DI") (V32QI "SI") (V16QI "HI") |
610 | (V32HI "SI") (V16HI "HI") (V8HI "QI") (V4HI "QI") | |
8e6b975f | 611 | (V16SI "HI") (V8SI "QI") (V4SI "QI") |
612 | (V8DI "QI") (V4DI "QI") (V2DI "QI") | |
613 | (V16SF "HI") (V8SF "QI") (V4SF "QI") | |
614 | (V8DF "QI") (V4DF "QI") (V2DF "QI")]) | |
615 | ||
dab48979 | 616 | ;; Mapping of vector modes to corresponding mask size |
617 | (define_mode_attr avx512fmaskmodelower | |
618 | [(V64QI "di") (V32QI "si") (V16QI "hi") | |
619 | (V32HI "si") (V16HI "hi") (V8HI "qi") (V4HI "qi") | |
620 | (V16SI "hi") (V8SI "qi") (V4SI "qi") | |
621 | (V8DI "qi") (V4DI "qi") (V2DI "qi") | |
622 | (V16SF "hi") (V8SF "qi") (V4SF "qi") | |
623 | (V8DF "qi") (V4DF "qi") (V2DF "qi")]) | |
624 | ||
63d5e521 | 625 | ;; Mapping of vector float modes to an integer mode of the same size |
626 | (define_mode_attr sseintvecmode | |
6a3f5f59 | 627 | [(V16SF "V16SI") (V8DF "V8DI") |
628 | (V8SF "V8SI") (V4DF "V4DI") | |
629 | (V4SF "V4SI") (V2DF "V2DI") | |
630 | (V16SI "V16SI") (V8DI "V8DI") | |
631 | (V8SI "V8SI") (V4DI "V4DI") | |
632 | (V4SI "V4SI") (V2DI "V2DI") | |
633 | (V16HI "V16HI") (V8HI "V8HI") | |
18b7eecb | 634 | (V32HI "V32HI") (V64QI "V64QI") |
c512f3a4 | 635 | (V32QI "V32QI") (V16QI "V16QI")]) |
63d5e521 | 636 | |
05ecc201 | 637 | (define_mode_attr sseintvecmode2 |
638 | [(V8DF "XI") (V4DF "OI") (V2DF "TI") | |
639 | (V8SF "OI") (V4SF "TI")]) | |
640 | ||
d6b69370 | 641 | (define_mode_attr sseintvecmodelower |
18b7eecb | 642 | [(V16SF "v16si") (V8DF "v8di") |
03ae25dc | 643 | (V8SF "v8si") (V4DF "v4di") |
d6b69370 | 644 | (V4SF "v4si") (V2DF "v2di") |
645 | (V8SI "v8si") (V4DI "v4di") | |
646 | (V4SI "v4si") (V2DI "v2di") | |
647 | (V16HI "v16hi") (V8HI "v8hi") | |
648 | (V32QI "v32qi") (V16QI "v16qi")]) | |
649 | ||
63d5e521 | 650 | ;; Mapping of vector modes to a vector mode of double size |
651 | (define_mode_attr ssedoublevecmode | |
652 | [(V32QI "V64QI") (V16HI "V32HI") (V8SI "V16SI") (V4DI "V8DI") | |
653 | (V16QI "V32QI") (V8HI "V16HI") (V4SI "V8SI") (V2DI "V4DI") | |
654 | (V8SF "V16SF") (V4DF "V8DF") | |
655 | (V4SF "V8SF") (V2DF "V4DF")]) | |
656 | ||
657 | ;; Mapping of vector modes to a vector mode of half size | |
658 | (define_mode_attr ssehalfvecmode | |
6615b722 | 659 | [(V64QI "V32QI") (V32HI "V16HI") (V16SI "V8SI") (V8DI "V4DI") |
660 | (V32QI "V16QI") (V16HI "V8HI") (V8SI "V4SI") (V4DI "V2DI") | |
661 | (V16QI "V8QI") (V8HI "V4HI") (V4SI "V2SI") | |
662 | (V16SF "V8SF") (V8DF "V4DF") | |
663 | (V8SF "V4SF") (V4DF "V2DF") | |
664 | (V4SF "V2SF")]) | |
63d5e521 | 665 | |
dab25421 | 666 | ;; Mapping of vector modes ti packed single mode of the same size |
667 | (define_mode_attr ssePSmode | |
6a3f5f59 | 668 | [(V16SI "V16SF") (V8DF "V16SF") |
669 | (V16SF "V16SF") (V8DI "V16SF") | |
670 | (V64QI "V16SF") (V32QI "V8SF") (V16QI "V4SF") | |
671 | (V32HI "V16SF") (V16HI "V8SF") (V8HI "V4SF") | |
dab25421 | 672 | (V8SI "V8SF") (V4SI "V4SF") |
673 | (V4DI "V8SF") (V2DI "V4SF") | |
18b7eecb | 674 | (V4TI "V16SF") (V2TI "V8SF") (V1TI "V4SF") |
dab25421 | 675 | (V8SF "V8SF") (V4SF "V4SF") |
676 | (V4DF "V8SF") (V2DF "V4SF")]) | |
677 | ||
18b7eecb | 678 | (define_mode_attr ssePSmode2 |
679 | [(V8DI "V8SF") (V4DI "V4SF")]) | |
680 | ||
63d5e521 | 681 | ;; Mapping of vector modes back to the scalar modes |
682 | (define_mode_attr ssescalarmode | |
03ae25dc | 683 | [(V64QI "QI") (V32QI "QI") (V16QI "QI") |
684 | (V32HI "HI") (V16HI "HI") (V8HI "HI") | |
685 | (V16SI "SI") (V8SI "SI") (V4SI "SI") | |
686 | (V8DI "DI") (V4DI "DI") (V2DI "DI") | |
687 | (V16SF "SF") (V8SF "SF") (V4SF "SF") | |
688 | (V8DF "DF") (V4DF "DF") (V2DF "DF")]) | |
689 | ||
690 | ;; Mapping of vector modes to the 128bit modes | |
691 | (define_mode_attr ssexmmmode | |
692 | [(V64QI "V16QI") (V32QI "V16QI") (V16QI "V16QI") | |
693 | (V32HI "V8HI") (V16HI "V8HI") (V8HI "V8HI") | |
694 | (V16SI "V4SI") (V8SI "V4SI") (V4SI "V4SI") | |
695 | (V8DI "V2DI") (V4DI "V2DI") (V2DI "V2DI") | |
696 | (V16SF "V4SF") (V8SF "V4SF") (V4SF "V4SF") | |
697 | (V8DF "V2DF") (V4DF "V2DF") (V2DF "V2DF")]) | |
63d5e521 | 698 | |
c358a059 | 699 | ;; Pointer size override for scalar modes (Intel asm dialect) |
700 | (define_mode_attr iptr | |
701 | [(V32QI "b") (V16HI "w") (V8SI "k") (V4DI "q") | |
702 | (V16QI "b") (V8HI "w") (V4SI "k") (V2DI "q") | |
703 | (V8SF "k") (V4DF "q") | |
704 | (V4SF "k") (V2DF "q") | |
705 | (SF "k") (DF "q")]) | |
706 | ||
63d5e521 | 707 | ;; Number of scalar elements in each vector type |
708 | (define_mode_attr ssescalarnum | |
03ae25dc | 709 | [(V64QI "64") (V16SI "16") (V8DI "8") |
710 | (V32QI "32") (V16HI "16") (V8SI "8") (V4DI "4") | |
63d5e521 | 711 | (V16QI "16") (V8HI "8") (V4SI "4") (V2DI "2") |
03ae25dc | 712 | (V16SF "16") (V8DF "8") |
63d5e521 | 713 | (V8SF "8") (V4DF "4") |
714 | (V4SF "4") (V2DF "2")]) | |
715 | ||
1087c60b | 716 | ;; Mask of scalar elements in each vector type |
717 | (define_mode_attr ssescalarnummask | |
718 | [(V32QI "31") (V16HI "15") (V8SI "7") (V4DI "3") | |
719 | (V16QI "15") (V8HI "7") (V4SI "3") (V2DI "1") | |
720 | (V8SF "7") (V4DF "3") | |
721 | (V4SF "3") (V2DF "1")]) | |
722 | ||
5220cab6 | 723 | (define_mode_attr ssescalarsize |
724 | [(V8DI "64") (V4DI "64") (V2DI "64") | |
18b7eecb | 725 | (V64QI "8") (V32QI "8") (V16QI "8") |
5220cab6 | 726 | (V32HI "16") (V16HI "16") (V8HI "16") |
727 | (V16SI "32") (V8SI "32") (V4SI "32") | |
728 | (V16SF "32") (V8DF "64")]) | |
729 | ||
c512f3a4 | 730 | ;; SSE prefix for integer vector modes |
731 | (define_mode_attr sseintprefix | |
8e6b975f | 732 | [(V2DI "p") (V2DF "") |
733 | (V4DI "p") (V4DF "") | |
734 | (V8DI "p") (V8DF "") | |
735 | (V4SI "p") (V4SF "") | |
736 | (V8SI "p") (V8SF "") | |
18b7eecb | 737 | (V16SI "p") (V16SF "") |
738 | (V16QI "p") (V8HI "p") | |
739 | (V32QI "p") (V16HI "p") | |
740 | (V64QI "p") (V32HI "p")]) | |
c512f3a4 | 741 | |
63d5e521 | 742 | ;; SSE scalar suffix for vector modes |
743 | (define_mode_attr ssescalarmodesuffix | |
5deb404d | 744 | [(SF "ss") (DF "sd") |
745 | (V8SF "ss") (V4DF "sd") | |
63d5e521 | 746 | (V4SF "ss") (V2DF "sd") |
747 | (V8SI "ss") (V4DI "sd") | |
748 | (V4SI "d")]) | |
749 | ||
b6fc7168 | 750 | ;; Pack/unpack vector modes |
751 | (define_mode_attr sseunpackmode | |
5deb404d | 752 | [(V16QI "V8HI") (V8HI "V4SI") (V4SI "V2DI") |
9abbf9e6 | 753 | (V32QI "V16HI") (V16HI "V8SI") (V8SI "V4DI") |
754 | (V32HI "V16SI") (V64QI "V32HI") (V16SI "V8DI")]) | |
b6fc7168 | 755 | |
756 | (define_mode_attr ssepackmode | |
5deb404d | 757 | [(V8HI "V16QI") (V4SI "V8HI") (V2DI "V4SI") |
2257113d | 758 | (V16HI "V32QI") (V8SI "V16HI") (V4DI "V8SI") |
759 | (V32HI "V64QI") (V16SI "V32HI") (V8DI "V16SI")]) | |
b6fc7168 | 760 | |
63d5e521 | 761 | ;; Mapping of the max integer size for xop rotate immediate constraint |
762 | (define_mode_attr sserotatemax | |
763 | [(V16QI "7") (V8HI "15") (V4SI "31") (V2DI "63")]) | |
b6fc7168 | 764 | |
675d6e0d | 765 | ;; Mapping of mode to cast intrinsic name |
889d21f6 | 766 | (define_mode_attr castmode |
767 | [(V8SI "si") (V8SF "ps") (V4DF "pd") | |
768 | (V16SI "si") (V16SF "ps") (V8DF "pd")]) | |
675d6e0d | 769 | |
c868bf35 | 770 | ;; Instruction suffix for sign and zero extensions. |
771 | (define_code_attr extsuffix [(sign_extend "sx") (zero_extend "zx")]) | |
772 | ||
154d1782 | 773 | ;; i128 for integer vectors and TARGET_AVX2, f128 otherwise. |
6615b722 | 774 | ;; i64x4 or f64x4 for 512bit modes. |
154d1782 | 775 | (define_mode_attr i128 |
6615b722 | 776 | [(V16SF "f64x4") (V8SF "f128") (V8DF "f64x4") (V4DF "f128") |
777 | (V64QI "i64x4") (V32QI "%~128") (V32HI "i64x4") (V16HI "%~128") | |
778 | (V16SI "i64x4") (V8SI "%~128") (V8DI "i64x4") (V4DI "%~128")]) | |
154d1782 | 779 | |
5802c0cb | 780 | ;; Mix-n-match |
ed30e0a6 | 781 | (define_mode_iterator AVX256MODE2P [V8SI V8SF V4DF]) |
889d21f6 | 782 | (define_mode_iterator AVX512MODE2P [V16SI V16SF V8DF]) |
ed30e0a6 | 783 | |
18b7eecb | 784 | ;; Mapping for dbpsabbw modes |
785 | (define_mode_attr dbpsadbwmode | |
786 | [(V32HI "V64QI") (V16HI "V32QI") (V8HI "V16QI")]) | |
e16e10c8 | 787 | |
8e6b975f | 788 | ;; Mapping suffixes for broadcast |
789 | (define_mode_attr bcstscalarsuff | |
18b7eecb | 790 | [(V64QI "b") (V32QI "b") (V16QI "b") |
791 | (V32HI "w") (V16HI "w") (V8HI "w") | |
792 | (V16SI "d") (V8SI "d") (V4SI "d") | |
793 | (V8DI "q") (V4DI "q") (V2DI "q") | |
794 | (V16SF "ss") (V8SF "ss") (V4SF "ss") | |
795 | (V8DF "sd") (V4DF "sd") (V2DF "sd")]) | |
8e6b975f | 796 | |
b6840105 | 797 | ;; Tie mode of assembler operand to mode iterator |
798 | (define_mode_attr concat_tg_mode | |
799 | [(V32QI "t") (V16HI "t") (V8SI "t") (V4DI "t") (V8SF "t") (V4DF "t") | |
800 | (V64QI "g") (V32HI "g") (V16SI "g") (V8DI "g") (V16SF "g") (V8DF "g")]) | |
801 | ||
0852690b | 802 | ;; Half mask mode for unpacks |
803 | (define_mode_attr HALFMASKMODE | |
804 | [(DI "SI") (SI "HI")]) | |
805 | ||
806 | ;; Double mask mode for packs | |
807 | (define_mode_attr DOUBLEMASKMODE | |
808 | [(HI "SI") (SI "DI")]) | |
809 | ||
b6840105 | 810 | |
5220cab6 | 811 | ;; Include define_subst patterns for instructions with mask |
812 | (include "subst.md") | |
813 | ||
5802c0cb | 814 | ;; Patterns whose name begins with "sse{,2,3}_" are invoked by intrinsics. |
815 | ||
816 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | |
817 | ;; | |
818 | ;; Move patterns | |
819 | ;; | |
820 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | |
821 | ||
e5f53f2a | 822 | ;; All of these patterns are enabled for SSE1 as well as SSE2. |
823 | ;; This is essential for maintaining stable calling conventions. | |
824 | ||
ed30e0a6 | 825 | (define_expand "mov<mode>" |
8671b6cc | 826 | [(set (match_operand:VMOVE 0 "nonimmediate_operand") |
827 | (match_operand:VMOVE 1 "nonimmediate_operand"))] | |
e5f53f2a | 828 | "TARGET_SSE" |
ed30e0a6 | 829 | { |
830 | ix86_expand_vector_move (<MODE>mode, operands); | |
831 | DONE; | |
832 | }) | |
833 | ||
e5f53f2a | 834 | (define_insn "*mov<mode>_internal" |
e13e1b39 | 835 | [(set (match_operand:VMOVE 0 "nonimmediate_operand" "=v,v ,m") |
836 | (match_operand:VMOVE 1 "nonimmediate_or_sse_const_operand" "C ,vm,v"))] | |
e5f53f2a | 837 | "TARGET_SSE |
ed30e0a6 | 838 | && (register_operand (operands[0], <MODE>mode) |
839 | || register_operand (operands[1], <MODE>mode))" | |
840 | { | |
e13e1b39 | 841 | int mode = get_attr_mode (insn); |
ed30e0a6 | 842 | switch (which_alternative) |
843 | { | |
844 | case 0: | |
845 | return standard_sse_constant_opcode (insn, operands[1]); | |
846 | case 1: | |
847 | case 2: | |
e13e1b39 | 848 | /* There is no evex-encoded vmov* for sizes smaller than 64-bytes |
849 | in avx512f, so we need to use workarounds, to access sse registers | |
3f4222c1 | 850 | 16-31, which are evex-only. In avx512vl we don't need workarounds. */ |
996b47b0 | 851 | if (TARGET_AVX512F && <MODE_SIZE> < 64 && !TARGET_AVX512VL |
3f4222c1 | 852 | && ((REG_P (operands[0]) && EXT_REX_SSE_REGNO_P (REGNO (operands[0]))) |
853 | || (REG_P (operands[1]) && EXT_REX_SSE_REGNO_P (REGNO (operands[1]))))) | |
e13e1b39 | 854 | { |
855 | if (memory_operand (operands[0], <MODE>mode)) | |
856 | { | |
ca94bc0d | 857 | if (<MODE_SIZE> == 32) |
e13e1b39 | 858 | return "vextract<shuffletype>64x4\t{$0x0, %g1, %0|%0, %g1, 0x0}"; |
ca94bc0d | 859 | else if (<MODE_SIZE> == 16) |
e13e1b39 | 860 | return "vextract<shuffletype>32x4\t{$0x0, %g1, %0|%0, %g1, 0x0}"; |
861 | else | |
862 | gcc_unreachable (); | |
863 | } | |
864 | else if (memory_operand (operands[1], <MODE>mode)) | |
865 | { | |
ca94bc0d | 866 | if (<MODE_SIZE> == 32) |
e13e1b39 | 867 | return "vbroadcast<shuffletype>64x4\t{%1, %g0|%g0, %1}"; |
ca94bc0d | 868 | else if (<MODE_SIZE> == 16) |
e13e1b39 | 869 | return "vbroadcast<shuffletype>32x4\t{%1, %g0|%g0, %1}"; |
870 | else | |
871 | gcc_unreachable (); | |
872 | } | |
873 | else | |
874 | /* Reg -> reg move is always aligned. Just use wider move. */ | |
875 | switch (mode) | |
876 | { | |
877 | case MODE_V8SF: | |
878 | case MODE_V4SF: | |
879 | return "vmovaps\t{%g1, %g0|%g0, %g1}"; | |
880 | case MODE_V4DF: | |
881 | case MODE_V2DF: | |
882 | return "vmovapd\t{%g1, %g0|%g0, %g1}"; | |
883 | case MODE_OI: | |
884 | case MODE_TI: | |
885 | return "vmovdqa64\t{%g1, %g0|%g0, %g1}"; | |
886 | default: | |
887 | gcc_unreachable (); | |
888 | } | |
889 | } | |
890 | switch (mode) | |
5deb404d | 891 | { |
e13e1b39 | 892 | case MODE_V16SF: |
ed30e0a6 | 893 | case MODE_V8SF: |
894 | case MODE_V4SF: | |
55ec9861 | 895 | if ((TARGET_AVX || TARGET_IAMCU) |
e5f53f2a | 896 | && (misaligned_operand (operands[0], <MODE>mode) |
897 | || misaligned_operand (operands[1], <MODE>mode))) | |
55ec9861 | 898 | return "%vmovups\t{%1, %0|%0, %1}"; |
b8cb8d52 | 899 | else |
e5f53f2a | 900 | return "%vmovaps\t{%1, %0|%0, %1}"; |
901 | ||
e13e1b39 | 902 | case MODE_V8DF: |
ed30e0a6 | 903 | case MODE_V4DF: |
904 | case MODE_V2DF: | |
55ec9861 | 905 | if ((TARGET_AVX || TARGET_IAMCU) |
e5f53f2a | 906 | && (misaligned_operand (operands[0], <MODE>mode) |
907 | || misaligned_operand (operands[1], <MODE>mode))) | |
55ec9861 | 908 | return "%vmovupd\t{%1, %0|%0, %1}"; |
6fc76bb0 | 909 | else |
e5f53f2a | 910 | return "%vmovapd\t{%1, %0|%0, %1}"; |
911 | ||
912 | case MODE_OI: | |
913 | case MODE_TI: | |
55ec9861 | 914 | if ((TARGET_AVX || TARGET_IAMCU) |
e5f53f2a | 915 | && (misaligned_operand (operands[0], <MODE>mode) |
916 | || misaligned_operand (operands[1], <MODE>mode))) | |
3f4222c1 | 917 | return TARGET_AVX512VL ? "vmovdqu64\t{%1, %0|%0, %1}" |
55ec9861 | 918 | : "%vmovdqu\t{%1, %0|%0, %1}"; |
6fc76bb0 | 919 | else |
3f4222c1 | 920 | return TARGET_AVX512VL ? "vmovdqa64\t{%1, %0|%0, %1}" |
921 | : "%vmovdqa\t{%1, %0|%0, %1}"; | |
e13e1b39 | 922 | case MODE_XI: |
923 | if (misaligned_operand (operands[0], <MODE>mode) | |
924 | || misaligned_operand (operands[1], <MODE>mode)) | |
925 | return "vmovdqu64\t{%1, %0|%0, %1}"; | |
926 | else | |
927 | return "vmovdqa64\t{%1, %0|%0, %1}"; | |
5802c0cb | 928 | |
f96e219b | 929 | default: |
e5f53f2a | 930 | gcc_unreachable (); |
f96e219b | 931 | } |
5802c0cb | 932 | default: |
8c3c4cd9 | 933 | gcc_unreachable (); |
5802c0cb | 934 | } |
935 | } | |
936 | [(set_attr "type" "sselog1,ssemov,ssemov") | |
e5f53f2a | 937 | (set_attr "prefix" "maybe_vex") |
5802c0cb | 938 | (set (attr "mode") |
7d460314 | 939 | (cond [(and (match_test "<MODE_SIZE> == 16") |
940 | (ior (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL") | |
941 | (and (eq_attr "alternative" "2") | |
942 | (match_test "TARGET_SSE_TYPELESS_STORES")))) | |
dab25421 | 943 | (const_string "<ssePSmode>") |
034788fc | 944 | (match_test "TARGET_AVX") |
5deb404d | 945 | (const_string "<sseinsnmode>") |
dab25421 | 946 | (ior (not (match_test "TARGET_SSE2")) |
947 | (match_test "optimize_function_for_size_p (cfun)")) | |
f96e219b | 948 | (const_string "V4SF") |
7c93cdac | 949 | (and (eq_attr "alternative" "0") |
950 | (match_test "TARGET_SSE_LOAD0_BY_PXOR")) | |
951 | (const_string "TI") | |
f96e219b | 952 | ] |
dab25421 | 953 | (const_string "<sseinsnmode>")))]) |
5802c0cb | 954 | |
3f4222c1 | 955 | (define_insn "<avx512>_load<mode>_mask" |
dbddc172 | 956 | [(set (match_operand:V48_AVX512VL 0 "register_operand" "=v,v") |
957 | (vec_merge:V48_AVX512VL | |
958 | (match_operand:V48_AVX512VL 1 "nonimmediate_operand" "v,m") | |
959 | (match_operand:V48_AVX512VL 2 "vector_move_operand" "0C,0C") | |
a31e7f46 | 960 | (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk,Yk")))] |
5220cab6 | 961 | "TARGET_AVX512F" |
962 | { | |
dbddc172 | 963 | static char buf [64]; |
964 | ||
965 | const char *insn_op; | |
966 | const char *sse_suffix; | |
967 | const char *align; | |
968 | if (FLOAT_MODE_P (GET_MODE_INNER (<MODE>mode))) | |
5220cab6 | 969 | { |
dbddc172 | 970 | insn_op = "vmov"; |
971 | sse_suffix = "<ssemodesuffix>"; | |
972 | } | |
973 | else | |
974 | { | |
975 | insn_op = "vmovdq"; | |
976 | sse_suffix = "<ssescalarsize>"; | |
5220cab6 | 977 | } |
dbddc172 | 978 | |
979 | if (misaligned_operand (operands[1], <MODE>mode)) | |
980 | align = "u"; | |
981 | else | |
982 | align = "a"; | |
983 | ||
984 | snprintf (buf, sizeof (buf), "%s%s%s\t{%%1, %%0%%{%%3%%}%%N2|%%0%%{%%3%%}%%N2, %%1}", | |
985 | insn_op, align, sse_suffix); | |
986 | return buf; | |
5220cab6 | 987 | } |
988 | [(set_attr "type" "ssemov") | |
989 | (set_attr "prefix" "evex") | |
990 | (set_attr "memory" "none,load") | |
991 | (set_attr "mode" "<sseinsnmode>")]) | |
992 | ||
dbddc172 | 993 | (define_insn "<avx512>_load<mode>_mask" |
994 | [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v,v") | |
995 | (vec_merge:VI12_AVX512VL | |
996 | (match_operand:VI12_AVX512VL 1 "nonimmediate_operand" "v,m") | |
997 | (match_operand:VI12_AVX512VL 2 "vector_move_operand" "0C,0C") | |
998 | (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk,Yk")))] | |
999 | "TARGET_AVX512BW" | |
1000 | "vmovdqu<ssescalarsize>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}" | |
1001 | [(set_attr "type" "ssemov") | |
1002 | (set_attr "prefix" "evex") | |
1003 | (set_attr "memory" "none,load") | |
1004 | (set_attr "mode" "<sseinsnmode>")]) | |
1005 | ||
f50aa6e9 | 1006 | (define_insn "<avx512>_blendm<mode>" |
1007 | [(set (match_operand:V48_AVX512VL 0 "register_operand" "=v") | |
1008 | (vec_merge:V48_AVX512VL | |
1009 | (match_operand:V48_AVX512VL 2 "nonimmediate_operand" "vm") | |
1010 | (match_operand:V48_AVX512VL 1 "register_operand" "v") | |
a31e7f46 | 1011 | (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk")))] |
8e6b975f | 1012 | "TARGET_AVX512F" |
f50aa6e9 | 1013 | "vblendm<ssemodesuffix>\t{%2, %1, %0%{%3%}|%0%{%3%}, %1, %2}" |
1014 | [(set_attr "type" "ssemov") | |
1015 | (set_attr "prefix" "evex") | |
1016 | (set_attr "mode" "<sseinsnmode>")]) | |
1017 | ||
1018 | (define_insn "<avx512>_blendm<mode>" | |
1019 | [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v") | |
1020 | (vec_merge:VI12_AVX512VL | |
1021 | (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm") | |
1022 | (match_operand:VI12_AVX512VL 1 "register_operand" "v") | |
1023 | (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk")))] | |
1024 | "TARGET_AVX512BW" | |
1025 | "vpblendm<ssemodesuffix>\t{%2, %1, %0%{%3%}|%0%{%3%}, %1, %2}" | |
8e6b975f | 1026 | [(set_attr "type" "ssemov") |
1027 | (set_attr "prefix" "evex") | |
1028 | (set_attr "mode" "<sseinsnmode>")]) | |
1029 | ||
3f4222c1 | 1030 | (define_insn "<avx512>_store<mode>_mask" |
dbddc172 | 1031 | [(set (match_operand:V48_AVX512VL 0 "memory_operand" "=m") |
1032 | (vec_merge:V48_AVX512VL | |
1033 | (match_operand:V48_AVX512VL 1 "register_operand" "v") | |
5220cab6 | 1034 | (match_dup 0) |
a31e7f46 | 1035 | (match_operand:<avx512fmaskmode> 2 "register_operand" "Yk")))] |
5220cab6 | 1036 | "TARGET_AVX512F" |
1037 | { | |
dbddc172 | 1038 | static char buf [64]; |
1039 | ||
1040 | const char *insn_op; | |
1041 | const char *sse_suffix; | |
1042 | const char *align; | |
1043 | if (FLOAT_MODE_P (GET_MODE_INNER (<MODE>mode))) | |
5220cab6 | 1044 | { |
dbddc172 | 1045 | insn_op = "vmov"; |
1046 | sse_suffix = "<ssemodesuffix>"; | |
1047 | } | |
1048 | else | |
1049 | { | |
1050 | insn_op = "vmovdq"; | |
1051 | sse_suffix = "<ssescalarsize>"; | |
5220cab6 | 1052 | } |
dbddc172 | 1053 | |
bdda5a33 | 1054 | if (misaligned_operand (operands[0], <MODE>mode)) |
dbddc172 | 1055 | align = "u"; |
1056 | else | |
1057 | align = "a"; | |
1058 | ||
1059 | snprintf (buf, sizeof (buf), "%s%s%s\t{%%1, %%0%%{%%2%%}|%%0%%{%%2%%}, %%1}", | |
1060 | insn_op, align, sse_suffix); | |
1061 | return buf; | |
5220cab6 | 1062 | } |
1063 | [(set_attr "type" "ssemov") | |
1064 | (set_attr "prefix" "evex") | |
1065 | (set_attr "memory" "store") | |
1066 | (set_attr "mode" "<sseinsnmode>")]) | |
1067 | ||
dbddc172 | 1068 | (define_insn "<avx512>_store<mode>_mask" |
1069 | [(set (match_operand:VI12_AVX512VL 0 "memory_operand" "=m") | |
1070 | (vec_merge:VI12_AVX512VL | |
1071 | (match_operand:VI12_AVX512VL 1 "register_operand" "v") | |
1072 | (match_dup 0) | |
1073 | (match_operand:<avx512fmaskmode> 2 "register_operand" "Yk")))] | |
1074 | "TARGET_AVX512BW" | |
1075 | "vmovdqu<ssescalarsize>\t{%1, %0%{%2%}|%0%{%2%}, %1}" | |
1076 | [(set_attr "type" "ssemov") | |
1077 | (set_attr "prefix" "evex") | |
1078 | (set_attr "memory" "store") | |
1079 | (set_attr "mode" "<sseinsnmode>")]) | |
1080 | ||
e5f53f2a | 1081 | (define_insn "sse2_movq128" |
1082 | [(set (match_operand:V2DI 0 "register_operand" "=x") | |
1083 | (vec_concat:V2DI | |
1084 | (vec_select:DI | |
1085 | (match_operand:V2DI 1 "nonimmediate_operand" "xm") | |
1086 | (parallel [(const_int 0)])) | |
1087 | (const_int 0)))] | |
1088 | "TARGET_SSE2" | |
c358a059 | 1089 | "%vmovq\t{%1, %0|%0, %q1}" |
e5f53f2a | 1090 | [(set_attr "type" "ssemov") |
1091 | (set_attr "prefix" "maybe_vex") | |
1092 | (set_attr "mode" "TI")]) | |
1093 | ||
751bdb92 | 1094 | ;; Move a DI from a 32-bit register pair (e.g. %edx:%eax) to an xmm. |
1095 | ;; We'd rather avoid this entirely; if the 32-bit reg pair was loaded | |
1096 | ;; from memory, we'd prefer to load the memory directly into the %xmm | |
1097 | ;; register. To facilitate this happy circumstance, this pattern won't | |
1098 | ;; split until after register allocation. If the 64-bit value didn't | |
1099 | ;; come from memory, this is the best we can do. This is much better | |
1100 | ;; than storing %edx:%eax into a stack temporary and loading an %xmm | |
1101 | ;; from there. | |
1102 | ||
1103 | (define_insn_and_split "movdi_to_sse" | |
1104 | [(parallel | |
1105 | [(set (match_operand:V4SI 0 "register_operand" "=?x,x") | |
1106 | (subreg:V4SI (match_operand:DI 1 "nonimmediate_operand" "r,m") 0)) | |
1107 | (clobber (match_scratch:V4SI 2 "=&x,X"))])] | |
2a631a93 | 1108 | "!TARGET_64BIT && TARGET_SSE2 && TARGET_INTER_UNIT_MOVES_TO_VEC" |
751bdb92 | 1109 | "#" |
1110 | "&& reload_completed" | |
1111 | [(const_int 0)] | |
1112 | { | |
0bb48c33 | 1113 | if (register_operand (operands[1], DImode)) |
1114 | { | |
751bdb92 | 1115 | /* The DImode arrived in a pair of integral registers (e.g. %edx:%eax). |
1116 | Assemble the 64-bit DImode value in an xmm register. */ | |
1117 | emit_insn (gen_sse2_loadld (operands[0], CONST0_RTX (V4SImode), | |
b82498f8 | 1118 | gen_lowpart (SImode, operands[1]))); |
751bdb92 | 1119 | emit_insn (gen_sse2_loadld (operands[2], CONST0_RTX (V4SImode), |
b82498f8 | 1120 | gen_highpart (SImode, operands[1]))); |
d6e05290 | 1121 | emit_insn (gen_vec_interleave_lowv4si (operands[0], operands[0], |
5deb404d | 1122 | operands[2])); |
09e640e6 | 1123 | } |
0bb48c33 | 1124 | else if (memory_operand (operands[1], DImode)) |
09e640e6 | 1125 | { |
1126 | rtx tmp = gen_reg_rtx (V2DImode); | |
1127 | emit_insn (gen_vec_concatv2di (tmp, operands[1], const0_rtx)); | |
1128 | emit_move_insn (operands[0], gen_lowpart (V4SImode, tmp)); | |
1129 | } | |
0bb48c33 | 1130 | else |
d6e05290 | 1131 | gcc_unreachable (); |
751bdb92 | 1132 | }) |
1133 | ||
5802c0cb | 1134 | (define_split |
abd4f58b | 1135 | [(set (match_operand:V4SF 0 "register_operand") |
1136 | (match_operand:V4SF 1 "zero_extended_scalar_load_operand"))] | |
5802c0cb | 1137 | "TARGET_SSE && reload_completed" |
ad2c46cf | 1138 | [(set (match_dup 0) |
1139 | (vec_merge:V4SF | |
1140 | (vec_duplicate:V4SF (match_dup 1)) | |
1141 | (match_dup 2) | |
1142 | (const_int 1)))] | |
5802c0cb | 1143 | { |
ad2c46cf | 1144 | operands[1] = simplify_gen_subreg (SFmode, operands[1], V4SFmode, 0); |
1145 | operands[2] = CONST0_RTX (V4SFmode); | |
5802c0cb | 1146 | }) |
1147 | ||
5802c0cb | 1148 | (define_split |
abd4f58b | 1149 | [(set (match_operand:V2DF 0 "register_operand") |
1150 | (match_operand:V2DF 1 "zero_extended_scalar_load_operand"))] | |
5802c0cb | 1151 | "TARGET_SSE2 && reload_completed" |
ad2c46cf | 1152 | [(set (match_dup 0) (vec_concat:V2DF (match_dup 1) (match_dup 2)))] |
5802c0cb | 1153 | { |
ad2c46cf | 1154 | operands[1] = simplify_gen_subreg (DFmode, operands[1], V2DFmode, 0); |
1155 | operands[2] = CONST0_RTX (DFmode); | |
5802c0cb | 1156 | }) |
1157 | ||
ed30e0a6 | 1158 | (define_expand "movmisalign<mode>" |
8671b6cc | 1159 | [(set (match_operand:VMOVE 0 "nonimmediate_operand") |
1160 | (match_operand:VMOVE 1 "nonimmediate_operand"))] | |
5802c0cb | 1161 | "TARGET_SSE" |
1162 | { | |
1163 | ix86_expand_vector_move_misalign (<MODE>mode, operands); | |
1164 | DONE; | |
1165 | }) | |
1166 | ||
58fb74ce | 1167 | (define_expand "<sse>_loadu<ssemodesuffix><avxsizesuffix><mask_name>" |
1168 | [(set (match_operand:VF 0 "register_operand") | |
1169 | (unspec:VF [(match_operand:VF 1 "nonimmediate_operand")] | |
1170 | UNSPEC_LOADU))] | |
1171 | "TARGET_SSE && <mask_mode512bit_condition>" | |
1172 | { | |
1173 | /* For AVX, normal *mov<mode>_internal pattern will handle unaligned loads | |
1174 | just fine if misaligned_operand is true, and without the UNSPEC it can | |
1175 | be combined with arithmetic instructions. If misaligned_operand is | |
1176 | false, still emit UNSPEC_LOADU insn to honor user's request for | |
1177 | misaligned load. */ | |
1178 | if (TARGET_AVX | |
8688c545 | 1179 | && misaligned_operand (operands[1], <MODE>mode)) |
58fb74ce | 1180 | { |
8688c545 | 1181 | rtx src = operands[1]; |
1182 | if (<mask_applied>) | |
1183 | src = gen_rtx_VEC_MERGE (<MODE>mode, operands[1], | |
1184 | operands[2 * <mask_applied>], | |
1185 | operands[3 * <mask_applied>]); | |
d1f9b275 | 1186 | emit_insn (gen_rtx_SET (operands[0], src)); |
58fb74ce | 1187 | DONE; |
1188 | } | |
1189 | }) | |
1190 | ||
1191 | (define_insn "*<sse>_loadu<ssemodesuffix><avxsizesuffix><mask_name>" | |
e13e1b39 | 1192 | [(set (match_operand:VF 0 "register_operand" "=v") |
e5f53f2a | 1193 | (unspec:VF |
6a3f5f59 | 1194 | [(match_operand:VF 1 "nonimmediate_operand" "vm")] |
00820ea0 | 1195 | UNSPEC_LOADU))] |
5220cab6 | 1196 | "TARGET_SSE && <mask_mode512bit_condition>" |
034788fc | 1197 | { |
1198 | switch (get_attr_mode (insn)) | |
1199 | { | |
6a3f5f59 | 1200 | case MODE_V16SF: |
034788fc | 1201 | case MODE_V8SF: |
1202 | case MODE_V4SF: | |
5220cab6 | 1203 | return "%vmovups\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"; |
034788fc | 1204 | default: |
5220cab6 | 1205 | return "%vmovu<ssemodesuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"; |
034788fc | 1206 | } |
1207 | } | |
ed30e0a6 | 1208 | [(set_attr "type" "ssemov") |
fbfe006e | 1209 | (set_attr "movu" "1") |
8c1dfa94 | 1210 | (set_attr "ssememalign" "8") |
ed30e0a6 | 1211 | (set_attr "prefix" "maybe_vex") |
034788fc | 1212 | (set (attr "mode") |
7d460314 | 1213 | (cond [(and (match_test "<MODE_SIZE> == 16") |
1214 | (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")) | |
dab25421 | 1215 | (const_string "<ssePSmode>") |
034788fc | 1216 | (match_test "TARGET_AVX") |
1217 | (const_string "<MODE>") | |
dab25421 | 1218 | (match_test "optimize_function_for_size_p (cfun)") |
1219 | (const_string "V4SF") | |
034788fc | 1220 | ] |
dab25421 | 1221 | (const_string "<MODE>")))]) |
5802c0cb | 1222 | |
6a5f6dde | 1223 | ;; Merge movsd/movhpd to movupd for TARGET_SSE_UNALIGNED_LOAD_OPTIMAL targets. |
1224 | (define_peephole2 | |
1225 | [(set (match_operand:V2DF 0 "register_operand") | |
1226 | (vec_concat:V2DF (match_operand:DF 1 "memory_operand") | |
1227 | (match_operand:DF 4 "const0_operand"))) | |
1228 | (set (match_operand:V2DF 2 "register_operand") | |
1229 | (vec_concat:V2DF (vec_select:DF (match_dup 2) | |
1230 | (parallel [(const_int 0)])) | |
1231 | (match_operand:DF 3 "memory_operand")))] | |
1232 | "TARGET_SSE2 && TARGET_SSE_UNALIGNED_LOAD_OPTIMAL | |
1233 | && ix86_operands_ok_for_move_multiple (operands, true, DFmode)" | |
1234 | [(set (match_dup 2) | |
1235 | (unspec:V2DF [(match_dup 4)] UNSPEC_LOADU))] | |
1236 | "operands[4] = adjust_address (operands[1], V2DFmode, 0);") | |
1237 | ||
00820ea0 | 1238 | (define_insn "<sse>_storeu<ssemodesuffix><avxsizesuffix>" |
1239 | [(set (match_operand:VF 0 "memory_operand" "=m") | |
1240 | (unspec:VF | |
6a3f5f59 | 1241 | [(match_operand:VF 1 "register_operand" "v")] |
00820ea0 | 1242 | UNSPEC_STOREU))] |
1243 | "TARGET_SSE" | |
1244 | { | |
1245 | switch (get_attr_mode (insn)) | |
1246 | { | |
6a3f5f59 | 1247 | case MODE_V16SF: |
00820ea0 | 1248 | case MODE_V8SF: |
1249 | case MODE_V4SF: | |
1250 | return "%vmovups\t{%1, %0|%0, %1}"; | |
1251 | default: | |
1252 | return "%vmovu<ssemodesuffix>\t{%1, %0|%0, %1}"; | |
1253 | } | |
1254 | } | |
1255 | [(set_attr "type" "ssemov") | |
1256 | (set_attr "movu" "1") | |
8c1dfa94 | 1257 | (set_attr "ssememalign" "8") |
00820ea0 | 1258 | (set_attr "prefix" "maybe_vex") |
1259 | (set (attr "mode") | |
ca94bc0d | 1260 | (cond [(and (match_test "<MODE_SIZE> == 16") |
1706116d | 1261 | (ior (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL") |
1262 | (match_test "TARGET_SSE_TYPELESS_STORES"))) | |
00820ea0 | 1263 | (const_string "<ssePSmode>") |
1264 | (match_test "TARGET_AVX") | |
1265 | (const_string "<MODE>") | |
1266 | (match_test "optimize_function_for_size_p (cfun)") | |
1267 | (const_string "V4SF") | |
1268 | ] | |
1269 | (const_string "<MODE>")))]) | |
1270 | ||
250533c0 | 1271 | (define_insn "<avx512>_storeu<ssemodesuffix><avxsizesuffix>_mask" |
1272 | [(set (match_operand:VF_AVX512VL 0 "memory_operand" "=m") | |
1273 | (vec_merge:VF_AVX512VL | |
1274 | (unspec:VF_AVX512VL | |
1275 | [(match_operand:VF_AVX512VL 1 "register_operand" "v")] | |
5220cab6 | 1276 | UNSPEC_STOREU) |
1277 | (match_dup 0) | |
a31e7f46 | 1278 | (match_operand:<avx512fmaskmode> 2 "register_operand" "Yk")))] |
5220cab6 | 1279 | "TARGET_AVX512F" |
1280 | { | |
1281 | switch (get_attr_mode (insn)) | |
1282 | { | |
1283 | case MODE_V16SF: | |
250533c0 | 1284 | case MODE_V8SF: |
1285 | case MODE_V4SF: | |
5220cab6 | 1286 | return "vmovups\t{%1, %0%{%2%}|%0%{%2%}, %1}"; |
1287 | default: | |
1288 | return "vmovu<ssemodesuffix>\t{%1, %0%{%2%}|%0%{%2%}, %1}"; | |
1289 | } | |
1290 | } | |
1291 | [(set_attr "type" "ssemov") | |
1292 | (set_attr "movu" "1") | |
1293 | (set_attr "memory" "store") | |
1294 | (set_attr "prefix" "evex") | |
1295 | (set_attr "mode" "<sseinsnmode>")]) | |
1296 | ||
6a5f6dde | 1297 | ;; Merge movlpd/movhpd to movupd for TARGET_SSE_UNALIGNED_STORE_OPTIMAL targets. |
1298 | (define_peephole2 | |
1299 | [(set (match_operand:DF 0 "memory_operand") | |
1300 | (vec_select:DF (match_operand:V2DF 1 "register_operand") | |
1301 | (parallel [(const_int 0)]))) | |
1302 | (set (match_operand:DF 2 "memory_operand") | |
1303 | (vec_select:DF (match_operand:V2DF 3 "register_operand") | |
1304 | (parallel [(const_int 1)])))] | |
1305 | "TARGET_SSE2 && TARGET_SSE_UNALIGNED_STORE_OPTIMAL | |
1306 | && ix86_operands_ok_for_move_multiple (operands, false, DFmode)" | |
1307 | [(set (match_dup 4) | |
1308 | (unspec:V2DF [(match_dup 1)] UNSPEC_STOREU))] | |
1309 | "operands[4] = adjust_address (operands[0], V2DFmode, 0);") | |
1310 | ||
97173adf | 1311 | /* For AVX, normal *mov<mode>_internal pattern will handle unaligned loads |
1312 | just fine if misaligned_operand is true, and without the UNSPEC it can | |
1313 | be combined with arithmetic instructions. If misaligned_operand is | |
1314 | false, still emit UNSPEC_LOADU insn to honor user's request for | |
1315 | misaligned load. */ | |
58fb74ce | 1316 | (define_expand "<sse2_avx_avx512f>_loaddqu<mode><mask_name>" |
97173adf | 1317 | [(set (match_operand:VI1 0 "register_operand") |
1318 | (unspec:VI1 | |
1319 | [(match_operand:VI1 1 "nonimmediate_operand")] | |
58fb74ce | 1320 | UNSPEC_LOADU))] |
97173adf | 1321 | "TARGET_SSE2 && <mask_avx512vl_condition> && <mask_avx512bw_condition>" |
58fb74ce | 1322 | { |
58fb74ce | 1323 | if (TARGET_AVX |
8688c545 | 1324 | && misaligned_operand (operands[1], <MODE>mode)) |
58fb74ce | 1325 | { |
8688c545 | 1326 | rtx src = operands[1]; |
1327 | if (<mask_applied>) | |
1328 | src = gen_rtx_VEC_MERGE (<MODE>mode, operands[1], | |
1329 | operands[2 * <mask_applied>], | |
1330 | operands[3 * <mask_applied>]); | |
d1f9b275 | 1331 | emit_insn (gen_rtx_SET (operands[0], src)); |
58fb74ce | 1332 | DONE; |
1333 | } | |
1334 | }) | |
1335 | ||
97173adf | 1336 | (define_expand "<sse2_avx_avx512f>_loaddqu<mode><mask_name>" |
1337 | [(set (match_operand:VI_ULOADSTORE_BW_AVX512VL 0 "register_operand") | |
1338 | (unspec:VI_ULOADSTORE_BW_AVX512VL | |
1339 | [(match_operand:VI_ULOADSTORE_BW_AVX512VL 1 "nonimmediate_operand")] | |
1340 | UNSPEC_LOADU))] | |
1341 | "TARGET_AVX512BW" | |
1342 | { | |
1343 | if (misaligned_operand (operands[1], <MODE>mode)) | |
1344 | { | |
1345 | rtx src = operands[1]; | |
1346 | if (<mask_applied>) | |
1347 | src = gen_rtx_VEC_MERGE (<MODE>mode, operands[1], | |
1348 | operands[2 * <mask_applied>], | |
1349 | operands[3 * <mask_applied>]); | |
d1f9b275 | 1350 | emit_insn (gen_rtx_SET (operands[0], src)); |
97173adf | 1351 | DONE; |
1352 | } | |
1353 | }) | |
1354 | ||
1355 | (define_expand "<sse2_avx_avx512f>_loaddqu<mode><mask_name>" | |
1356 | [(set (match_operand:VI_ULOADSTORE_F_AVX512VL 0 "register_operand") | |
1357 | (unspec:VI_ULOADSTORE_F_AVX512VL | |
1358 | [(match_operand:VI_ULOADSTORE_F_AVX512VL 1 "nonimmediate_operand")] | |
1359 | UNSPEC_LOADU))] | |
1360 | "TARGET_AVX512F" | |
1361 | { | |
1362 | if (misaligned_operand (operands[1], <MODE>mode)) | |
1363 | { | |
1364 | rtx src = operands[1]; | |
1365 | if (<mask_applied>) | |
1366 | src = gen_rtx_VEC_MERGE (<MODE>mode, operands[1], | |
1367 | operands[2 * <mask_applied>], | |
1368 | operands[3 * <mask_applied>]); | |
d1f9b275 | 1369 | emit_insn (gen_rtx_SET (operands[0], src)); |
97173adf | 1370 | DONE; |
1371 | } | |
1372 | }) | |
1373 | ||
58fb74ce | 1374 | (define_insn "*<sse2_avx_avx512f>_loaddqu<mode><mask_name>" |
97173adf | 1375 | [(set (match_operand:VI1 0 "register_operand" "=v") |
1376 | (unspec:VI1 | |
1377 | [(match_operand:VI1 1 "nonimmediate_operand" "vm")] | |
6a3f5f59 | 1378 | UNSPEC_LOADU))] |
97173adf | 1379 | "TARGET_SSE2 && <mask_avx512vl_condition> && <mask_avx512bw_condition>" |
00820ea0 | 1380 | { |
1381 | switch (get_attr_mode (insn)) | |
1382 | { | |
1383 | case MODE_V8SF: | |
1384 | case MODE_V4SF: | |
1385 | return "%vmovups\t{%1, %0|%0, %1}"; | |
1386 | default: | |
97173adf | 1387 | if (!(TARGET_AVX512VL && TARGET_AVX512BW)) |
1388 | return "%vmovdqu\t{%1, %0|%0, %1}"; | |
1389 | else | |
1390 | return "vmovdqu<ssescalarsize>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"; | |
00820ea0 | 1391 | } |
1392 | } | |
1393 | [(set_attr "type" "ssemov") | |
1394 | (set_attr "movu" "1") | |
8c1dfa94 | 1395 | (set_attr "ssememalign" "8") |
00820ea0 | 1396 | (set (attr "prefix_data16") |
1397 | (if_then_else | |
1398 | (match_test "TARGET_AVX") | |
1399 | (const_string "*") | |
1400 | (const_string "1"))) | |
1401 | (set_attr "prefix" "maybe_vex") | |
1402 | (set (attr "mode") | |
7d460314 | 1403 | (cond [(and (match_test "<MODE_SIZE> == 16") |
1404 | (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")) | |
00820ea0 | 1405 | (const_string "<ssePSmode>") |
00820ea0 | 1406 | (match_test "TARGET_AVX") |
1407 | (const_string "<sseinsnmode>") | |
1408 | (match_test "optimize_function_for_size_p (cfun)") | |
1409 | (const_string "V4SF") | |
1410 | ] | |
1411 | (const_string "<sseinsnmode>")))]) | |
1412 | ||
97173adf | 1413 | (define_insn "*<sse2_avx_avx512f>_loaddqu<mode><mask_name>" |
1414 | [(set (match_operand:VI_ULOADSTORE_BW_AVX512VL 0 "register_operand" "=v") | |
1415 | (unspec:VI_ULOADSTORE_BW_AVX512VL | |
1416 | [(match_operand:VI_ULOADSTORE_BW_AVX512VL 1 "nonimmediate_operand" "vm")] | |
1417 | UNSPEC_LOADU))] | |
1418 | "TARGET_AVX512BW" | |
1419 | "vmovdqu<ssescalarsize>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"; | |
1420 | [(set_attr "type" "ssemov") | |
1421 | (set_attr "movu" "1") | |
1422 | (set_attr "ssememalign" "8") | |
1423 | (set_attr "prefix" "maybe_evex")]) | |
1424 | ||
1425 | (define_insn "*<sse2_avx_avx512f>_loaddqu<mode><mask_name>" | |
1426 | [(set (match_operand:VI_ULOADSTORE_F_AVX512VL 0 "register_operand" "=v") | |
1427 | (unspec:VI_ULOADSTORE_F_AVX512VL | |
1428 | [(match_operand:VI_ULOADSTORE_F_AVX512VL 1 "nonimmediate_operand" "vm")] | |
1429 | UNSPEC_LOADU))] | |
1430 | "TARGET_AVX512F" | |
1431 | "vmovdqu<ssescalarsize>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"; | |
1432 | [(set_attr "type" "ssemov") | |
1433 | (set_attr "movu" "1") | |
1434 | (set_attr "ssememalign" "8") | |
1435 | (set_attr "prefix" "maybe_evex")]) | |
1436 | ||
6a3f5f59 | 1437 | (define_insn "<sse2_avx_avx512f>_storedqu<mode>" |
97173adf | 1438 | [(set (match_operand:VI1 0 "memory_operand" "=m") |
1439 | (unspec:VI1 | |
1440 | [(match_operand:VI1 1 "register_operand" "v")] | |
6a3f5f59 | 1441 | UNSPEC_STOREU))] |
00820ea0 | 1442 | "TARGET_SSE2" |
034788fc | 1443 | { |
1444 | switch (get_attr_mode (insn)) | |
1445 | { | |
1706116d | 1446 | case MODE_V16SF: |
034788fc | 1447 | case MODE_V8SF: |
1448 | case MODE_V4SF: | |
1449 | return "%vmovups\t{%1, %0|%0, %1}"; | |
1450 | default: | |
97173adf | 1451 | switch (<MODE>mode) |
1452 | { | |
1453 | case V32QImode: | |
1454 | case V16QImode: | |
1455 | if (!(TARGET_AVX512VL && TARGET_AVX512BW)) | |
1456 | return "%vmovdqu\t{%1, %0|%0, %1}"; | |
1457 | default: | |
1458 | return "vmovdqu<ssescalarsize>\t{%1, %0|%0, %1}"; | |
1459 | } | |
034788fc | 1460 | } |
1461 | } | |
ed30e0a6 | 1462 | [(set_attr "type" "ssemov") |
fbfe006e | 1463 | (set_attr "movu" "1") |
8c1dfa94 | 1464 | (set_attr "ssememalign" "8") |
e5f53f2a | 1465 | (set (attr "prefix_data16") |
1466 | (if_then_else | |
6be3efec | 1467 | (match_test "TARGET_AVX") |
e5f53f2a | 1468 | (const_string "*") |
1469 | (const_string "1"))) | |
1470 | (set_attr "prefix" "maybe_vex") | |
034788fc | 1471 | (set (attr "mode") |
ca94bc0d | 1472 | (cond [(and (match_test "<MODE_SIZE> == 16") |
1706116d | 1473 | (ior (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL") |
1474 | (match_test "TARGET_SSE_TYPELESS_STORES"))) | |
dab25421 | 1475 | (const_string "<ssePSmode>") |
034788fc | 1476 | (match_test "TARGET_AVX") |
1477 | (const_string "<sseinsnmode>") | |
dab25421 | 1478 | (match_test "optimize_function_for_size_p (cfun)") |
034788fc | 1479 | (const_string "V4SF") |
1480 | ] | |
dab25421 | 1481 | (const_string "<sseinsnmode>")))]) |
ed30e0a6 | 1482 | |
97173adf | 1483 | (define_insn "<sse2_avx_avx512f>_storedqu<mode>" |
1484 | [(set (match_operand:VI_ULOADSTORE_BW_AVX512VL 0 "memory_operand" "=m") | |
1485 | (unspec:VI_ULOADSTORE_BW_AVX512VL | |
1486 | [(match_operand:VI_ULOADSTORE_BW_AVX512VL 1 "register_operand" "v")] | |
1487 | UNSPEC_STOREU))] | |
1488 | "TARGET_AVX512BW" | |
1489 | "vmovdqu<ssescalarsize>\t{%1, %0|%0, %1}" | |
1490 | [(set_attr "type" "ssemov") | |
1491 | (set_attr "movu" "1") | |
1492 | (set_attr "ssememalign" "8") | |
1493 | (set_attr "prefix" "maybe_evex")]) | |
1494 | ||
1495 | (define_insn "<sse2_avx_avx512f>_storedqu<mode>" | |
1496 | [(set (match_operand:VI_ULOADSTORE_F_AVX512VL 0 "memory_operand" "=m") | |
1497 | (unspec:VI_ULOADSTORE_F_AVX512VL | |
1498 | [(match_operand:VI_ULOADSTORE_F_AVX512VL 1 "register_operand" "v")] | |
1499 | UNSPEC_STOREU))] | |
1500 | "TARGET_AVX512F" | |
1501 | "vmovdqu<ssescalarsize>\t{%1, %0|%0, %1}" | |
1502 | [(set_attr "type" "ssemov") | |
1503 | (set_attr "movu" "1") | |
1504 | (set_attr "ssememalign" "8") | |
1505 | (set_attr "prefix" "maybe_vex")]) | |
1506 | ||
1507 | (define_insn "<avx512>_storedqu<mode>_mask" | |
1508 | [(set (match_operand:VI48_AVX512VL 0 "memory_operand" "=m") | |
1509 | (vec_merge:VI48_AVX512VL | |
1510 | (unspec:VI48_AVX512VL | |
1511 | [(match_operand:VI48_AVX512VL 1 "register_operand" "v")] | |
5220cab6 | 1512 | UNSPEC_STOREU) |
1513 | (match_dup 0) | |
a31e7f46 | 1514 | (match_operand:<avx512fmaskmode> 2 "register_operand" "Yk")))] |
5220cab6 | 1515 | "TARGET_AVX512F" |
97173adf | 1516 | "vmovdqu<ssescalarsize>\t{%1, %0%{%2%}|%0%{%2%}, %1}" |
1517 | [(set_attr "type" "ssemov") | |
1518 | (set_attr "movu" "1") | |
1519 | (set_attr "memory" "store") | |
1520 | (set_attr "prefix" "evex") | |
1521 | (set_attr "mode" "<sseinsnmode>")]) | |
1522 | ||
1523 | (define_insn "<avx512>_storedqu<mode>_mask" | |
1524 | [(set (match_operand:VI12_AVX512VL 0 "memory_operand" "=m") | |
1525 | (vec_merge:VI12_AVX512VL | |
1526 | (unspec:VI12_AVX512VL | |
1527 | [(match_operand:VI12_AVX512VL 1 "register_operand" "v")] | |
1528 | UNSPEC_STOREU) | |
1529 | (match_dup 0) | |
1530 | (match_operand:<avx512fmaskmode> 2 "register_operand" "Yk")))] | |
1531 | "TARGET_AVX512BW" | |
1532 | "vmovdqu<ssescalarsize>\t{%1, %0%{%2%}|%0%{%2%}, %1}" | |
5220cab6 | 1533 | [(set_attr "type" "ssemov") |
1534 | (set_attr "movu" "1") | |
1535 | (set_attr "memory" "store") | |
1536 | (set_attr "prefix" "evex") | |
1537 | (set_attr "mode" "<sseinsnmode>")]) | |
1538 | ||
63d5e521 | 1539 | (define_insn "<sse3>_lddqu<avxsizesuffix>" |
e5f53f2a | 1540 | [(set (match_operand:VI1 0 "register_operand" "=x") |
1541 | (unspec:VI1 [(match_operand:VI1 1 "memory_operand" "m")] | |
1542 | UNSPEC_LDDQU))] | |
1543 | "TARGET_SSE3" | |
1544 | "%vlddqu\t{%1, %0|%0, %1}" | |
5802c0cb | 1545 | [(set_attr "type" "ssemov") |
fbfe006e | 1546 | (set_attr "movu" "1") |
8c1dfa94 | 1547 | (set_attr "ssememalign" "8") |
e5f53f2a | 1548 | (set (attr "prefix_data16") |
1549 | (if_then_else | |
6be3efec | 1550 | (match_test "TARGET_AVX") |
e5f53f2a | 1551 | (const_string "*") |
1552 | (const_string "0"))) | |
1553 | (set (attr "prefix_rep") | |
1554 | (if_then_else | |
6be3efec | 1555 | (match_test "TARGET_AVX") |
e5f53f2a | 1556 | (const_string "*") |
1557 | (const_string "1"))) | |
1558 | (set_attr "prefix" "maybe_vex") | |
63d5e521 | 1559 | (set_attr "mode" "<sseinsnmode>")]) |
b49a1e34 | 1560 | |
821b85a2 | 1561 | (define_insn "sse2_movnti<mode>" |
1562 | [(set (match_operand:SWI48 0 "memory_operand" "=m") | |
1563 | (unspec:SWI48 [(match_operand:SWI48 1 "register_operand" "r")] | |
0fd2a6af | 1564 | UNSPEC_MOVNT))] |
5802c0cb | 1565 | "TARGET_SSE2" |
1566 | "movnti\t{%1, %0|%0, %1}" | |
fbfe006e | 1567 | [(set_attr "type" "ssemov") |
00a0e418 | 1568 | (set_attr "prefix_data16" "0") |
821b85a2 | 1569 | (set_attr "mode" "<MODE>")]) |
5802c0cb | 1570 | |
e5f53f2a | 1571 | (define_insn "<sse>_movnt<mode>" |
1572 | [(set (match_operand:VF 0 "memory_operand" "=m") | |
6a3f5f59 | 1573 | (unspec:VF |
1574 | [(match_operand:VF 1 "register_operand" "v")] | |
1575 | UNSPEC_MOVNT))] | |
e5f53f2a | 1576 | "TARGET_SSE" |
1577 | "%vmovnt<ssemodesuffix>\t{%1, %0|%0, %1}" | |
1578 | [(set_attr "type" "ssemov") | |
1579 | (set_attr "prefix" "maybe_vex") | |
1580 | (set_attr "mode" "<MODE>")]) | |
1581 | ||
1582 | (define_insn "<sse2>_movnt<mode>" | |
1583 | [(set (match_operand:VI8 0 "memory_operand" "=m") | |
4c1099de | 1584 | (unspec:VI8 [(match_operand:VI8 1 "register_operand" "v")] |
e5f53f2a | 1585 | UNSPEC_MOVNT))] |
1586 | "TARGET_SSE2" | |
1587 | "%vmovntdq\t{%1, %0|%0, %1}" | |
ed30e0a6 | 1588 | [(set_attr "type" "ssecvt") |
e5f53f2a | 1589 | (set (attr "prefix_data16") |
1590 | (if_then_else | |
6be3efec | 1591 | (match_test "TARGET_AVX") |
e5f53f2a | 1592 | (const_string "*") |
1593 | (const_string "1"))) | |
1594 | (set_attr "prefix" "maybe_vex") | |
63d5e521 | 1595 | (set_attr "mode" "<sseinsnmode>")]) |
ed30e0a6 | 1596 | |
5b5037b3 | 1597 | ; Expand patterns for non-temporal stores. At the moment, only those |
1598 | ; that directly map to insns are defined; it would be possible to | |
1599 | ; define patterns for other modes that would expand to several insns. | |
1600 | ||
6fe5844b | 1601 | ;; Modes handled by storent patterns. |
1602 | (define_mode_iterator STORENT_MODE | |
0fd2a6af | 1603 | [(DI "TARGET_SSE2 && TARGET_64BIT") (SI "TARGET_SSE2") |
1604 | (SF "TARGET_SSE4A") (DF "TARGET_SSE4A") | |
6a3f5f59 | 1605 | (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") (V2DI "TARGET_SSE2") |
1606 | (V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF | |
1607 | (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")]) | |
6fe5844b | 1608 | |
2a466fea | 1609 | (define_expand "storent<mode>" |
abd4f58b | 1610 | [(set (match_operand:STORENT_MODE 0 "memory_operand") |
8cedf886 | 1611 | (unspec:STORENT_MODE |
abd4f58b | 1612 | [(match_operand:STORENT_MODE 1 "register_operand")] |
6fe5844b | 1613 | UNSPEC_MOVNT))] |
1614 | "TARGET_SSE") | |
5b5037b3 | 1615 | |
5802c0cb | 1616 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; |
1617 | ;; | |
2a466fea | 1618 | ;; Parallel floating point arithmetic |
5802c0cb | 1619 | ;; |
1620 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | |
1621 | ||
f9162d6c | 1622 | (define_expand "<code><mode>2" |
abd4f58b | 1623 | [(set (match_operand:VF 0 "register_operand") |
27e5502d | 1624 | (absneg:VF |
abd4f58b | 1625 | (match_operand:VF 1 "register_operand")))] |
6fe5844b | 1626 | "TARGET_SSE" |
f9162d6c | 1627 | "ix86_expand_fp_absneg_operator (<CODE>, <MODE>mode, operands); DONE;") |
5802c0cb | 1628 | |
27e5502d | 1629 | (define_insn_and_split "*absneg<mode>2" |
6a3f5f59 | 1630 | [(set (match_operand:VF 0 "register_operand" "=x,x,v,v") |
27e5502d | 1631 | (match_operator:VF 3 "absneg_operator" |
6a3f5f59 | 1632 | [(match_operand:VF 1 "nonimmediate_operand" "0, xm, v, m")])) |
1633 | (use (match_operand:VF 2 "nonimmediate_operand" "xm, 0, vm,v"))] | |
6fe5844b | 1634 | "TARGET_SSE" |
1c098c41 | 1635 | "#" |
a3d5479a | 1636 | "&& reload_completed" |
1c098c41 | 1637 | [(const_int 0)] |
1638 | { | |
27e5502d | 1639 | enum rtx_code absneg_op; |
1640 | rtx op1, op2; | |
d8927ee1 | 1641 | rtx t; |
1642 | ||
27e5502d | 1643 | if (TARGET_AVX) |
1644 | { | |
1645 | if (MEM_P (operands[1])) | |
1646 | op1 = operands[2], op2 = operands[1]; | |
1647 | else | |
1648 | op1 = operands[1], op2 = operands[2]; | |
1649 | } | |
d8927ee1 | 1650 | else |
27e5502d | 1651 | { |
1652 | op1 = operands[0]; | |
1653 | if (rtx_equal_p (operands[0], operands[1])) | |
1654 | op2 = operands[2]; | |
1655 | else | |
1656 | op2 = operands[1]; | |
1657 | } | |
d8927ee1 | 1658 | |
27e5502d | 1659 | absneg_op = GET_CODE (operands[3]) == NEG ? XOR : AND; |
1660 | t = gen_rtx_fmt_ee (absneg_op, <MODE>mode, op1, op2); | |
d1f9b275 | 1661 | t = gen_rtx_SET (operands[0], t); |
d8927ee1 | 1662 | emit_insn (t); |
1c098c41 | 1663 | DONE; |
27e5502d | 1664 | } |
1665 | [(set_attr "isa" "noavx,noavx,avx,avx")]) | |
ed30e0a6 | 1666 | |
be60ab96 | 1667 | (define_expand "<plusminus_insn><mode>3<mask_name><round_name>" |
abd4f58b | 1668 | [(set (match_operand:VF 0 "register_operand") |
27e5502d | 1669 | (plusminus:VF |
be60ab96 | 1670 | (match_operand:VF 1 "<round_nimm_predicate>") |
1671 | (match_operand:VF 2 "<round_nimm_predicate>")))] | |
1672 | "TARGET_SSE && <mask_mode512bit_condition> && <round_mode512bit_condition>" | |
9409fce7 | 1673 | "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);") |
5802c0cb | 1674 | |
be60ab96 | 1675 | (define_insn "*<plusminus_insn><mode>3<mask_name><round_name>" |
e13e1b39 | 1676 | [(set (match_operand:VF 0 "register_operand" "=x,v") |
27e5502d | 1677 | (plusminus:VF |
be60ab96 | 1678 | (match_operand:VF 1 "<round_nimm_predicate>" "<comm>0,v") |
1679 | (match_operand:VF 2 "<round_nimm_predicate>" "xm,<round_constraint>")))] | |
1680 | "TARGET_SSE && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands) && <mask_mode512bit_condition> && <round_mode512bit_condition>" | |
27e5502d | 1681 | "@ |
1682 | <plusminus_mnemonic><ssemodesuffix>\t{%2, %0|%0, %2} | |
be60ab96 | 1683 | v<plusminus_mnemonic><ssemodesuffix>\t{<round_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2<round_mask_op3>}" |
27e5502d | 1684 | [(set_attr "isa" "noavx,avx") |
1685 | (set_attr "type" "sseadd") | |
5220cab6 | 1686 | (set_attr "prefix" "<mask_prefix3>") |
2a466fea | 1687 | (set_attr "mode" "<MODE>")]) |
5802c0cb | 1688 | |
0b7cc9c6 | 1689 | (define_insn "<sse>_vm<plusminus_insn><mode>3<round_name>" |
e13e1b39 | 1690 | [(set (match_operand:VF_128 0 "register_operand" "=x,v") |
27e5502d | 1691 | (vec_merge:VF_128 |
1692 | (plusminus:VF_128 | |
e13e1b39 | 1693 | (match_operand:VF_128 1 "register_operand" "0,v") |
0b7cc9c6 | 1694 | (match_operand:VF_128 2 "nonimmediate_operand" "xm,<round_constraint>")) |
5802c0cb | 1695 | (match_dup 1) |
1696 | (const_int 1)))] | |
6fe5844b | 1697 | "TARGET_SSE" |
27e5502d | 1698 | "@ |
c358a059 | 1699 | <plusminus_mnemonic><ssescalarmodesuffix>\t{%2, %0|%0, %<iptr>2} |
0b7cc9c6 | 1700 | v<plusminus_mnemonic><ssescalarmodesuffix>\t{<round_op3>%2, %1, %0|%0, %1, %<iptr>2<round_op3>}" |
27e5502d | 1701 | [(set_attr "isa" "noavx,avx") |
1702 | (set_attr "type" "sseadd") | |
0b7cc9c6 | 1703 | (set_attr "prefix" "<round_prefix>") |
2a466fea | 1704 | (set_attr "mode" "<ssescalarmode>")]) |
5802c0cb | 1705 | |
be60ab96 | 1706 | (define_expand "mul<mode>3<mask_name><round_name>" |
abd4f58b | 1707 | [(set (match_operand:VF 0 "register_operand") |
27e5502d | 1708 | (mult:VF |
be60ab96 | 1709 | (match_operand:VF 1 "<round_nimm_predicate>") |
1710 | (match_operand:VF 2 "<round_nimm_predicate>")))] | |
1711 | "TARGET_SSE && <mask_mode512bit_condition> && <round_mode512bit_condition>" | |
2a466fea | 1712 | "ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);") |
1713 | ||
be60ab96 | 1714 | (define_insn "*mul<mode>3<mask_name><round_name>" |
6a3f5f59 | 1715 | [(set (match_operand:VF 0 "register_operand" "=x,v") |
27e5502d | 1716 | (mult:VF |
be60ab96 | 1717 | (match_operand:VF 1 "<round_nimm_predicate>" "%0,v") |
1718 | (match_operand:VF 2 "<round_nimm_predicate>" "xm,<round_constraint>")))] | |
1719 | "TARGET_SSE && ix86_binary_operator_ok (MULT, <MODE>mode, operands) && <mask_mode512bit_condition> && <round_mode512bit_condition>" | |
27e5502d | 1720 | "@ |
1721 | mul<ssemodesuffix>\t{%2, %0|%0, %2} | |
be60ab96 | 1722 | vmul<ssemodesuffix>\t{<round_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2<round_mask_op3>}" |
27e5502d | 1723 | [(set_attr "isa" "noavx,avx") |
1724 | (set_attr "type" "ssemul") | |
5220cab6 | 1725 | (set_attr "prefix" "<mask_prefix3>") |
6470d004 | 1726 | (set_attr "btver2_decode" "direct,double") |
2a466fea | 1727 | (set_attr "mode" "<MODE>")]) |
5802c0cb | 1728 | |
0b7cc9c6 | 1729 | (define_insn "<sse>_vm<multdiv_mnemonic><mode>3<round_name>" |
e13e1b39 | 1730 | [(set (match_operand:VF_128 0 "register_operand" "=x,v") |
27e5502d | 1731 | (vec_merge:VF_128 |
1793b773 | 1732 | (multdiv:VF_128 |
e13e1b39 | 1733 | (match_operand:VF_128 1 "register_operand" "0,v") |
0b7cc9c6 | 1734 | (match_operand:VF_128 2 "nonimmediate_operand" "xm,<round_constraint>")) |
5802c0cb | 1735 | (match_dup 1) |
1736 | (const_int 1)))] | |
6fe5844b | 1737 | "TARGET_SSE" |
27e5502d | 1738 | "@ |
1793b773 | 1739 | <multdiv_mnemonic><ssescalarmodesuffix>\t{%2, %0|%0, %<iptr>2} |
0b7cc9c6 | 1740 | v<multdiv_mnemonic><ssescalarmodesuffix>\t{<round_op3>%2, %1, %0|%0, %1, %<iptr>2<round_op3>}" |
27e5502d | 1741 | [(set_attr "isa" "noavx,avx") |
1793b773 | 1742 | (set_attr "type" "sse<multdiv_mnemonic>") |
0b7cc9c6 | 1743 | (set_attr "prefix" "<round_prefix>") |
1793b773 | 1744 | (set_attr "btver2_decode" "direct,double") |
2a466fea | 1745 | (set_attr "mode" "<ssescalarmode>")]) |
5802c0cb | 1746 | |
27e5502d | 1747 | (define_expand "div<mode>3" |
abd4f58b | 1748 | [(set (match_operand:VF2 0 "register_operand") |
1749 | (div:VF2 (match_operand:VF2 1 "register_operand") | |
1750 | (match_operand:VF2 2 "nonimmediate_operand")))] | |
6fe5844b | 1751 | "TARGET_SSE2" |
27e5502d | 1752 | "ix86_fixup_binary_operands_no_copy (DIV, <MODE>mode, operands);") |
1753 | ||
1754 | (define_expand "div<mode>3" | |
abd4f58b | 1755 | [(set (match_operand:VF1 0 "register_operand") |
1756 | (div:VF1 (match_operand:VF1 1 "register_operand") | |
1757 | (match_operand:VF1 2 "nonimmediate_operand")))] | |
6fe5844b | 1758 | "TARGET_SSE" |
ed30e0a6 | 1759 | { |
27e5502d | 1760 | ix86_fixup_binary_operands_no_copy (DIV, <MODE>mode, operands); |
ed30e0a6 | 1761 | |
8acd1d98 | 1762 | if (TARGET_SSE_MATH |
1763 | && TARGET_RECIP_VEC_DIV | |
1764 | && !optimize_insn_for_size_p () | |
ed30e0a6 | 1765 | && flag_finite_math_only && !flag_trapping_math |
1766 | && flag_unsafe_math_optimizations) | |
1767 | { | |
27e5502d | 1768 | ix86_emit_swdivsf (operands[0], operands[1], operands[2], <MODE>mode); |
e174638f | 1769 | DONE; |
1770 | } | |
1771 | }) | |
5802c0cb | 1772 | |
be60ab96 | 1773 | (define_insn "<sse>_div<mode>3<mask_name><round_name>" |
e13e1b39 | 1774 | [(set (match_operand:VF 0 "register_operand" "=x,v") |
27e5502d | 1775 | (div:VF |
e13e1b39 | 1776 | (match_operand:VF 1 "register_operand" "0,v") |
be60ab96 | 1777 | (match_operand:VF 2 "<round_nimm_predicate>" "xm,<round_constraint>")))] |
1778 | "TARGET_SSE && <mask_mode512bit_condition> && <round_mode512bit_condition>" | |
27e5502d | 1779 | "@ |
1780 | div<ssemodesuffix>\t{%2, %0|%0, %2} | |
be60ab96 | 1781 | vdiv<ssemodesuffix>\t{<round_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2<round_mask_op3>}" |
27e5502d | 1782 | [(set_attr "isa" "noavx,avx") |
1783 | (set_attr "type" "ssediv") | |
5220cab6 | 1784 | (set_attr "prefix" "<mask_prefix3>") |
2a466fea | 1785 | (set_attr "mode" "<MODE>")]) |
5802c0cb | 1786 | |
27e5502d | 1787 | (define_insn "<sse>_rcp<mode>2" |
03ae25dc | 1788 | [(set (match_operand:VF1_128_256 0 "register_operand" "=x") |
1789 | (unspec:VF1_128_256 | |
1790 | [(match_operand:VF1_128_256 1 "nonimmediate_operand" "xm")] UNSPEC_RCP))] | |
6fe5844b | 1791 | "TARGET_SSE" |
ed30e0a6 | 1792 | "%vrcpps\t{%1, %0|%0, %1}" |
5802c0cb | 1793 | [(set_attr "type" "sse") |
fbfe006e | 1794 | (set_attr "atom_sse_attr" "rcp") |
6470d004 | 1795 | (set_attr "btver2_sse_attr" "rcp") |
ed30e0a6 | 1796 | (set_attr "prefix" "maybe_vex") |
27e5502d | 1797 | (set_attr "mode" "<MODE>")]) |
ed30e0a6 | 1798 | |
5802c0cb | 1799 | (define_insn "sse_vmrcpv4sf2" |
27e5502d | 1800 | [(set (match_operand:V4SF 0 "register_operand" "=x,x") |
5802c0cb | 1801 | (vec_merge:V4SF |
27e5502d | 1802 | (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm,xm")] |
5802c0cb | 1803 | UNSPEC_RCP) |
27e5502d | 1804 | (match_operand:V4SF 2 "register_operand" "0,x") |
5802c0cb | 1805 | (const_int 1)))] |
1806 | "TARGET_SSE" | |
27e5502d | 1807 | "@ |
c358a059 | 1808 | rcpss\t{%1, %0|%0, %k1} |
1809 | vrcpss\t{%1, %2, %0|%0, %2, %k1}" | |
27e5502d | 1810 | [(set_attr "isa" "noavx,avx") |
1811 | (set_attr "type" "sse") | |
8c1dfa94 | 1812 | (set_attr "ssememalign" "32") |
fbfe006e | 1813 | (set_attr "atom_sse_attr" "rcp") |
6470d004 | 1814 | (set_attr "btver2_sse_attr" "rcp") |
27e5502d | 1815 | (set_attr "prefix" "orig,vex") |
5802c0cb | 1816 | (set_attr "mode" "SF")]) |
1817 | ||
5220cab6 | 1818 | (define_insn "<mask_codefor>rcp14<mode><mask_name>" |
250533c0 | 1819 | [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v") |
1820 | (unspec:VF_AVX512VL | |
1821 | [(match_operand:VF_AVX512VL 1 "nonimmediate_operand" "vm")] | |
85065932 | 1822 | UNSPEC_RCP14))] |
1823 | "TARGET_AVX512F" | |
5220cab6 | 1824 | "vrcp14<ssemodesuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}" |
85065932 | 1825 | [(set_attr "type" "sse") |
1826 | (set_attr "prefix" "evex") | |
1827 | (set_attr "mode" "<MODE>")]) | |
1828 | ||
0b7cc9c6 | 1829 | (define_insn "srcp14<mode>" |
85065932 | 1830 | [(set (match_operand:VF_128 0 "register_operand" "=v") |
1831 | (vec_merge:VF_128 | |
1832 | (unspec:VF_128 | |
fbf4df62 | 1833 | [(match_operand:VF_128 1 "nonimmediate_operand" "vm")] |
85065932 | 1834 | UNSPEC_RCP14) |
fbf4df62 | 1835 | (match_operand:VF_128 2 "register_operand" "v") |
85065932 | 1836 | (const_int 1)))] |
1837 | "TARGET_AVX512F" | |
fbf4df62 | 1838 | "vrcp14<ssescalarmodesuffix>\t{%1, %2, %0|%0, %2, %1}" |
85065932 | 1839 | [(set_attr "type" "sse") |
1840 | (set_attr "prefix" "evex") | |
1841 | (set_attr "mode" "<MODE>")]) | |
1842 | ||
27e5502d | 1843 | (define_expand "sqrt<mode>2" |
abd4f58b | 1844 | [(set (match_operand:VF2 0 "register_operand") |
1845 | (sqrt:VF2 (match_operand:VF2 1 "nonimmediate_operand")))] | |
6fe5844b | 1846 | "TARGET_SSE2") |
ed30e0a6 | 1847 | |
27e5502d | 1848 | (define_expand "sqrt<mode>2" |
abd4f58b | 1849 | [(set (match_operand:VF1 0 "register_operand") |
1850 | (sqrt:VF1 (match_operand:VF1 1 "nonimmediate_operand")))] | |
6fe5844b | 1851 | "TARGET_SSE" |
2a466fea | 1852 | { |
8acd1d98 | 1853 | if (TARGET_SSE_MATH |
1854 | && TARGET_RECIP_VEC_SQRT | |
1855 | && !optimize_insn_for_size_p () | |
2a466fea | 1856 | && flag_finite_math_only && !flag_trapping_math |
1857 | && flag_unsafe_math_optimizations) | |
1858 | { | |
27e5502d | 1859 | ix86_emit_swsqrtsf (operands[0], operands[1], <MODE>mode, false); |
2a466fea | 1860 | DONE; |
1861 | } | |
1862 | }) | |
1863 | ||
be60ab96 | 1864 | (define_insn "<sse>_sqrt<mode>2<mask_name><round_name>" |
e13e1b39 | 1865 | [(set (match_operand:VF 0 "register_operand" "=v") |
be60ab96 | 1866 | (sqrt:VF (match_operand:VF 1 "<round_nimm_predicate>" "<round_constraint>")))] |
1867 | "TARGET_SSE && <mask_mode512bit_condition> && <round_mode512bit_condition>" | |
1868 | "%vsqrt<ssemodesuffix>\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}" | |
2a466fea | 1869 | [(set_attr "type" "sse") |
fbfe006e | 1870 | (set_attr "atom_sse_attr" "sqrt") |
6470d004 | 1871 | (set_attr "btver2_sse_attr" "sqrt") |
ed30e0a6 | 1872 | (set_attr "prefix" "maybe_vex") |
27e5502d | 1873 | (set_attr "mode" "<MODE>")]) |
ed30e0a6 | 1874 | |
0b7cc9c6 | 1875 | (define_insn "<sse>_vmsqrt<mode>2<round_name>" |
e13e1b39 | 1876 | [(set (match_operand:VF_128 0 "register_operand" "=x,v") |
27e5502d | 1877 | (vec_merge:VF_128 |
1878 | (sqrt:VF_128 | |
0b7cc9c6 | 1879 | (match_operand:VF_128 1 "nonimmediate_operand" "xm,<round_constraint>")) |
e13e1b39 | 1880 | (match_operand:VF_128 2 "register_operand" "0,v") |
2a466fea | 1881 | (const_int 1)))] |
6fe5844b | 1882 | "TARGET_SSE" |
27e5502d | 1883 | "@ |
c358a059 | 1884 | sqrt<ssescalarmodesuffix>\t{%1, %0|%0, %<iptr>1} |
0b7cc9c6 | 1885 | vsqrt<ssescalarmodesuffix>\t{<round_op3>%1, %2, %0|%0, %2, %<iptr>1<round_op3>}" |
27e5502d | 1886 | [(set_attr "isa" "noavx,avx") |
1887 | (set_attr "type" "sse") | |
fbfe006e | 1888 | (set_attr "atom_sse_attr" "sqrt") |
0b7cc9c6 | 1889 | (set_attr "prefix" "<round_prefix>") |
5220cab6 | 1890 | (set_attr "btver2_sse_attr" "sqrt") |
2a466fea | 1891 | (set_attr "mode" "<ssescalarmode>")]) |
1892 | ||
27e5502d | 1893 | (define_expand "rsqrt<mode>2" |
03ae25dc | 1894 | [(set (match_operand:VF1_128_256 0 "register_operand") |
1895 | (unspec:VF1_128_256 | |
1896 | [(match_operand:VF1_128_256 1 "nonimmediate_operand")] UNSPEC_RSQRT))] | |
1a086819 | 1897 | "TARGET_SSE_MATH" |
e174638f | 1898 | { |
27e5502d | 1899 | ix86_emit_swsqrtsf (operands[0], operands[1], <MODE>mode, true); |
1a086819 | 1900 | DONE; |
e174638f | 1901 | }) |
1902 | ||
27e5502d | 1903 | (define_insn "<sse>_rsqrt<mode>2" |
03ae25dc | 1904 | [(set (match_operand:VF1_128_256 0 "register_operand" "=x") |
1905 | (unspec:VF1_128_256 | |
1906 | [(match_operand:VF1_128_256 1 "nonimmediate_operand" "xm")] UNSPEC_RSQRT))] | |
6fe5844b | 1907 | "TARGET_SSE" |
ed30e0a6 | 1908 | "%vrsqrtps\t{%1, %0|%0, %1}" |
d42cfd5f | 1909 | [(set_attr "type" "sse") |
ed30e0a6 | 1910 | (set_attr "prefix" "maybe_vex") |
27e5502d | 1911 | (set_attr "mode" "<MODE>")]) |
ed30e0a6 | 1912 | |
5220cab6 | 1913 | (define_insn "<mask_codefor>rsqrt14<mode><mask_name>" |
250533c0 | 1914 | [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v") |
1915 | (unspec:VF_AVX512VL | |
1916 | [(match_operand:VF_AVX512VL 1 "nonimmediate_operand" "vm")] | |
85065932 | 1917 | UNSPEC_RSQRT14))] |
1918 | "TARGET_AVX512F" | |
5220cab6 | 1919 | "vrsqrt14<ssemodesuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}" |
85065932 | 1920 | [(set_attr "type" "sse") |
1921 | (set_attr "prefix" "evex") | |
1922 | (set_attr "mode" "<MODE>")]) | |
1923 | ||
0b7cc9c6 | 1924 | (define_insn "rsqrt14<mode>" |
85065932 | 1925 | [(set (match_operand:VF_128 0 "register_operand" "=v") |
1926 | (vec_merge:VF_128 | |
1927 | (unspec:VF_128 | |
c4f782fd | 1928 | [(match_operand:VF_128 1 "nonimmediate_operand" "vm")] |
85065932 | 1929 | UNSPEC_RSQRT14) |
c4f782fd | 1930 | (match_operand:VF_128 2 "register_operand" "v") |
85065932 | 1931 | (const_int 1)))] |
1932 | "TARGET_AVX512F" | |
c4f782fd | 1933 | "vrsqrt14<ssescalarmodesuffix>\t{%1, %2, %0|%0, %2, %1}" |
85065932 | 1934 | [(set_attr "type" "sse") |
1935 | (set_attr "prefix" "evex") | |
1936 | (set_attr "mode" "<MODE>")]) | |
1937 | ||
5802c0cb | 1938 | (define_insn "sse_vmrsqrtv4sf2" |
27e5502d | 1939 | [(set (match_operand:V4SF 0 "register_operand" "=x,x") |
5802c0cb | 1940 | (vec_merge:V4SF |
27e5502d | 1941 | (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm,xm")] |
5802c0cb | 1942 | UNSPEC_RSQRT) |
27e5502d | 1943 | (match_operand:V4SF 2 "register_operand" "0,x") |
5802c0cb | 1944 | (const_int 1)))] |
1945 | "TARGET_SSE" | |
27e5502d | 1946 | "@ |
c358a059 | 1947 | rsqrtss\t{%1, %0|%0, %k1} |
1948 | vrsqrtss\t{%1, %2, %0|%0, %2, %k1}" | |
27e5502d | 1949 | [(set_attr "isa" "noavx,avx") |
1950 | (set_attr "type" "sse") | |
8c1dfa94 | 1951 | (set_attr "ssememalign" "32") |
27e5502d | 1952 | (set_attr "prefix" "orig,vex") |
5802c0cb | 1953 | (set_attr "mode" "SF")]) |
1954 | ||
79eddd43 | 1955 | ;; ??? For !flag_finite_math_only, the representation with SMIN/SMAX |
009b318f | 1956 | ;; isn't really correct, as those rtl operators aren't defined when |
79eddd43 | 1957 | ;; applied to NaNs. Hopefully the optimizers won't get too smart on us. |
1958 | ||
dbfe84d5 | 1959 | (define_expand "<code><mode>3<mask_name><round_saeonly_name>" |
abd4f58b | 1960 | [(set (match_operand:VF 0 "register_operand") |
27e5502d | 1961 | (smaxmin:VF |
dbfe84d5 | 1962 | (match_operand:VF 1 "<round_saeonly_nimm_predicate>") |
1963 | (match_operand:VF 2 "<round_saeonly_nimm_predicate>")))] | |
1964 | "TARGET_SSE && <mask_mode512bit_condition> && <round_saeonly_mode512bit_condition>" | |
79eddd43 | 1965 | { |
1966 | if (!flag_finite_math_only) | |
2a466fea | 1967 | operands[1] = force_reg (<MODE>mode, operands[1]); |
9409fce7 | 1968 | ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands); |
79eddd43 | 1969 | }) |
5802c0cb | 1970 | |
dbfe84d5 | 1971 | (define_insn "*<code><mode>3_finite<mask_name><round_saeonly_name>" |
e13e1b39 | 1972 | [(set (match_operand:VF 0 "register_operand" "=x,v") |
27e5502d | 1973 | (smaxmin:VF |
dbfe84d5 | 1974 | (match_operand:VF 1 "<round_saeonly_nimm_predicate>" "%0,v") |
1975 | (match_operand:VF 2 "<round_saeonly_nimm_predicate>" "xm,<round_saeonly_constraint>")))] | |
6fe5844b | 1976 | "TARGET_SSE && flag_finite_math_only |
5220cab6 | 1977 | && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands) |
dbfe84d5 | 1978 | && <mask_mode512bit_condition> && <round_saeonly_mode512bit_condition>" |
27e5502d | 1979 | "@ |
1980 | <maxmin_float><ssemodesuffix>\t{%2, %0|%0, %2} | |
dbfe84d5 | 1981 | v<maxmin_float><ssemodesuffix>\t{<round_saeonly_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2<round_saeonly_mask_op3>}" |
27e5502d | 1982 | [(set_attr "isa" "noavx,avx") |
1983 | (set_attr "type" "sseadd") | |
6470d004 | 1984 | (set_attr "btver2_sse_attr" "maxmin") |
5220cab6 | 1985 | (set_attr "prefix" "<mask_prefix3>") |
2a466fea | 1986 | (set_attr "mode" "<MODE>")]) |
5802c0cb | 1987 | |
dbfe84d5 | 1988 | (define_insn "*<code><mode>3<mask_name><round_saeonly_name>" |
e13e1b39 | 1989 | [(set (match_operand:VF 0 "register_operand" "=x,v") |
27e5502d | 1990 | (smaxmin:VF |
e13e1b39 | 1991 | (match_operand:VF 1 "register_operand" "0,v") |
dbfe84d5 | 1992 | (match_operand:VF 2 "<round_saeonly_nimm_predicate>" "xm,<round_saeonly_constraint>")))] |
5220cab6 | 1993 | "TARGET_SSE && !flag_finite_math_only |
dbfe84d5 | 1994 | && <mask_mode512bit_condition> && <round_saeonly_mode512bit_condition>" |
27e5502d | 1995 | "@ |
1996 | <maxmin_float><ssemodesuffix>\t{%2, %0|%0, %2} | |
dbfe84d5 | 1997 | v<maxmin_float><ssemodesuffix>\t{<round_saeonly_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2<round_saeonly_mask_op3>}" |
27e5502d | 1998 | [(set_attr "isa" "noavx,avx") |
1999 | (set_attr "type" "sseadd") | |
6470d004 | 2000 | (set_attr "btver2_sse_attr" "maxmin") |
5220cab6 | 2001 | (set_attr "prefix" "<mask_prefix3>") |
2a466fea | 2002 | (set_attr "mode" "<MODE>")]) |
79eddd43 | 2003 | |
0b7cc9c6 | 2004 | (define_insn "<sse>_vm<code><mode>3<round_saeonly_name>" |
e13e1b39 | 2005 | [(set (match_operand:VF_128 0 "register_operand" "=x,v") |
27e5502d | 2006 | (vec_merge:VF_128 |
2007 | (smaxmin:VF_128 | |
e13e1b39 | 2008 | (match_operand:VF_128 1 "register_operand" "0,v") |
0b7cc9c6 | 2009 | (match_operand:VF_128 2 "nonimmediate_operand" "xm,<round_saeonly_constraint>")) |
79eddd43 | 2010 | (match_dup 1) |
2011 | (const_int 1)))] | |
6fe5844b | 2012 | "TARGET_SSE" |
27e5502d | 2013 | "@ |
c358a059 | 2014 | <maxmin_float><ssescalarmodesuffix>\t{%2, %0|%0, %<iptr>2} |
0b7cc9c6 | 2015 | v<maxmin_float><ssescalarmodesuffix>\t{<round_saeonly_op3>%2, %1, %0|%0, %1, %<iptr>2<round_saeonly_op3>}" |
27e5502d | 2016 | [(set_attr "isa" "noavx,avx") |
2017 | (set_attr "type" "sse") | |
6470d004 | 2018 | (set_attr "btver2_sse_attr" "maxmin") |
0b7cc9c6 | 2019 | (set_attr "prefix" "<round_saeonly_prefix>") |
2a466fea | 2020 | (set_attr "mode" "<ssescalarmode>")]) |
5802c0cb | 2021 | |
6cab81bd | 2022 | ;; These versions of the min/max patterns implement exactly the operations |
2023 | ;; min = (op1 < op2 ? op1 : op2) | |
2024 | ;; max = (!(op1 < op2) ? op1 : op2) | |
2025 | ;; Their operands are not commutative, and thus they may be used in the | |
2026 | ;; presence of -0.0 and NaN. | |
2027 | ||
2a466fea | 2028 | (define_insn "*ieee_smin<mode>3" |
6a3f5f59 | 2029 | [(set (match_operand:VF 0 "register_operand" "=v,v") |
27e5502d | 2030 | (unspec:VF |
6a3f5f59 | 2031 | [(match_operand:VF 1 "register_operand" "0,v") |
2032 | (match_operand:VF 2 "nonimmediate_operand" "vm,vm")] | |
2a466fea | 2033 | UNSPEC_IEEE_MIN))] |
6fe5844b | 2034 | "TARGET_SSE" |
27e5502d | 2035 | "@ |
8ba20934 | 2036 | min<ssemodesuffix>\t{%2, %0|%0, %2} |
2037 | vmin<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}" | |
27e5502d | 2038 | [(set_attr "isa" "noavx,avx") |
2039 | (set_attr "type" "sseadd") | |
2040 | (set_attr "prefix" "orig,vex") | |
2a466fea | 2041 | (set_attr "mode" "<MODE>")]) |
6cab81bd | 2042 | |
2a466fea | 2043 | (define_insn "*ieee_smax<mode>3" |
6a3f5f59 | 2044 | [(set (match_operand:VF 0 "register_operand" "=v,v") |
27e5502d | 2045 | (unspec:VF |
6a3f5f59 | 2046 | [(match_operand:VF 1 "register_operand" "0,v") |
2047 | (match_operand:VF 2 "nonimmediate_operand" "vm,vm")] | |
2a466fea | 2048 | UNSPEC_IEEE_MAX))] |
6fe5844b | 2049 | "TARGET_SSE" |
27e5502d | 2050 | "@ |
8ba20934 | 2051 | max<ssemodesuffix>\t{%2, %0|%0, %2} |
2052 | vmax<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}" | |
27e5502d | 2053 | [(set_attr "isa" "noavx,avx") |
2054 | (set_attr "type" "sseadd") | |
2055 | (set_attr "prefix" "orig,vex") | |
2a466fea | 2056 | (set_attr "mode" "<MODE>")]) |
6cab81bd | 2057 | |
ed30e0a6 | 2058 | (define_insn "avx_addsubv4df3" |
2059 | [(set (match_operand:V4DF 0 "register_operand" "=x") | |
2060 | (vec_merge:V4DF | |
06af5c80 | 2061 | (minus:V4DF |
ed30e0a6 | 2062 | (match_operand:V4DF 1 "register_operand" "x") |
2063 | (match_operand:V4DF 2 "nonimmediate_operand" "xm")) | |
06af5c80 | 2064 | (plus:V4DF (match_dup 1) (match_dup 2)) |
2065 | (const_int 5)))] | |
6c197bf1 | 2066 | "TARGET_AVX" |
2067 | "vaddsubpd\t{%2, %1, %0|%0, %1, %2}" | |
2068 | [(set_attr "type" "sseadd") | |
2069 | (set_attr "prefix" "vex") | |
2070 | (set_attr "mode" "V4DF")]) | |
2071 | ||
27e5502d | 2072 | (define_insn "sse3_addsubv2df3" |
2073 | [(set (match_operand:V2DF 0 "register_operand" "=x,x") | |
2074 | (vec_merge:V2DF | |
06af5c80 | 2075 | (minus:V2DF |
27e5502d | 2076 | (match_operand:V2DF 1 "register_operand" "0,x") |
2077 | (match_operand:V2DF 2 "nonimmediate_operand" "xm,xm")) | |
06af5c80 | 2078 | (plus:V2DF (match_dup 1) (match_dup 2)) |
2079 | (const_int 1)))] | |
6c197bf1 | 2080 | "TARGET_SSE3" |
2081 | "@ | |
2082 | addsubpd\t{%2, %0|%0, %2} | |
2083 | vaddsubpd\t{%2, %1, %0|%0, %1, %2}" | |
2084 | [(set_attr "isa" "noavx,avx") | |
2085 | (set_attr "type" "sseadd") | |
2086 | (set_attr "atom_unit" "complex") | |
2087 | (set_attr "prefix" "orig,vex") | |
2088 | (set_attr "mode" "V2DF")]) | |
2089 | ||
27e5502d | 2090 | (define_insn "avx_addsubv8sf3" |
2091 | [(set (match_operand:V8SF 0 "register_operand" "=x") | |
2092 | (vec_merge:V8SF | |
06af5c80 | 2093 | (minus:V8SF |
27e5502d | 2094 | (match_operand:V8SF 1 "register_operand" "x") |
2095 | (match_operand:V8SF 2 "nonimmediate_operand" "xm")) | |
06af5c80 | 2096 | (plus:V8SF (match_dup 1) (match_dup 2)) |
2097 | (const_int 85)))] | |
6c197bf1 | 2098 | "TARGET_AVX" |
2099 | "vaddsubps\t{%2, %1, %0|%0, %1, %2}" | |
2100 | [(set_attr "type" "sseadd") | |
2101 | (set_attr "prefix" "vex") | |
2102 | (set_attr "mode" "V8SF")]) | |
2103 | ||
5802c0cb | 2104 | (define_insn "sse3_addsubv4sf3" |
27e5502d | 2105 | [(set (match_operand:V4SF 0 "register_operand" "=x,x") |
5802c0cb | 2106 | (vec_merge:V4SF |
06af5c80 | 2107 | (minus:V4SF |
27e5502d | 2108 | (match_operand:V4SF 1 "register_operand" "0,x") |
2109 | (match_operand:V4SF 2 "nonimmediate_operand" "xm,xm")) | |
06af5c80 | 2110 | (plus:V4SF (match_dup 1) (match_dup 2)) |
2111 | (const_int 5)))] | |
5802c0cb | 2112 | "TARGET_SSE3" |
27e5502d | 2113 | "@ |
2114 | addsubps\t{%2, %0|%0, %2} | |
2115 | vaddsubps\t{%2, %1, %0|%0, %1, %2}" | |
2116 | [(set_attr "isa" "noavx,avx") | |
2117 | (set_attr "type" "sseadd") | |
6c197bf1 | 2118 | (set_attr "prefix" "orig,vex") |
2119 | (set_attr "prefix_rep" "1,*") | |
2120 | (set_attr "mode" "V4SF")]) | |
2121 | ||
06af5c80 | 2122 | (define_split |
2123 | [(set (match_operand:VF_128_256 0 "register_operand") | |
2124 | (match_operator:VF_128_256 6 "addsub_vm_operator" | |
2125 | [(minus:VF_128_256 | |
2126 | (match_operand:VF_128_256 1 "register_operand") | |
2127 | (match_operand:VF_128_256 2 "nonimmediate_operand")) | |
2128 | (plus:VF_128_256 | |
2129 | (match_operand:VF_128_256 3 "nonimmediate_operand") | |
2130 | (match_operand:VF_128_256 4 "nonimmediate_operand")) | |
2131 | (match_operand 5 "const_int_operand")]))] | |
2132 | "TARGET_SSE3 | |
2133 | && can_create_pseudo_p () | |
2134 | && ((rtx_equal_p (operands[1], operands[3]) | |
2135 | && rtx_equal_p (operands[2], operands[4])) | |
2136 | || (rtx_equal_p (operands[1], operands[4]) | |
2137 | && rtx_equal_p (operands[2], operands[3])))" | |
2138 | [(set (match_dup 0) | |
2139 | (vec_merge:VF_128_256 | |
2140 | (minus:VF_128_256 (match_dup 1) (match_dup 2)) | |
2141 | (plus:VF_128_256 (match_dup 1) (match_dup 2)) | |
2142 | (match_dup 5)))]) | |
6c197bf1 | 2143 | |
06af5c80 | 2144 | (define_split |
2145 | [(set (match_operand:VF_128_256 0 "register_operand") | |
2146 | (match_operator:VF_128_256 6 "addsub_vm_operator" | |
2147 | [(plus:VF_128_256 | |
2148 | (match_operand:VF_128_256 1 "nonimmediate_operand") | |
2149 | (match_operand:VF_128_256 2 "nonimmediate_operand")) | |
2150 | (minus:VF_128_256 | |
2151 | (match_operand:VF_128_256 3 "register_operand") | |
2152 | (match_operand:VF_128_256 4 "nonimmediate_operand")) | |
2153 | (match_operand 5 "const_int_operand")]))] | |
2154 | "TARGET_SSE3 | |
2155 | && can_create_pseudo_p () | |
2156 | && ((rtx_equal_p (operands[1], operands[3]) | |
2157 | && rtx_equal_p (operands[2], operands[4])) | |
2158 | || (rtx_equal_p (operands[1], operands[4]) | |
2159 | && rtx_equal_p (operands[2], operands[3])))" | |
2160 | [(set (match_dup 0) | |
2161 | (vec_merge:VF_128_256 | |
2162 | (minus:VF_128_256 (match_dup 3) (match_dup 4)) | |
2163 | (plus:VF_128_256 (match_dup 3) (match_dup 4)) | |
2164 | (match_dup 5)))] | |
2165 | { | |
2166 | /* Negate mask bits to compensate for swapped PLUS and MINUS RTXes. */ | |
2167 | operands[5] | |
2168 | = GEN_INT (~INTVAL (operands[5]) | |
2169 | & ((HOST_WIDE_INT_1U << GET_MODE_NUNITS (<MODE>mode)) - 1)); | |
2170 | }) | |
2171 | ||
2172 | (define_split | |
2173 | [(set (match_operand:VF_128_256 0 "register_operand") | |
2174 | (match_operator:VF_128_256 7 "addsub_vs_operator" | |
2175 | [(vec_concat:<ssedoublemode> | |
2176 | (minus:VF_128_256 | |
2177 | (match_operand:VF_128_256 1 "register_operand") | |
2178 | (match_operand:VF_128_256 2 "nonimmediate_operand")) | |
2179 | (plus:VF_128_256 | |
2180 | (match_operand:VF_128_256 3 "nonimmediate_operand") | |
2181 | (match_operand:VF_128_256 4 "nonimmediate_operand"))) | |
2182 | (match_parallel 5 "addsub_vs_parallel" | |
2183 | [(match_operand 6 "const_int_operand")])]))] | |
2184 | "TARGET_SSE3 | |
2185 | && can_create_pseudo_p () | |
2186 | && ((rtx_equal_p (operands[1], operands[3]) | |
2187 | && rtx_equal_p (operands[2], operands[4])) | |
2188 | || (rtx_equal_p (operands[1], operands[4]) | |
2189 | && rtx_equal_p (operands[2], operands[3])))" | |
2190 | [(set (match_dup 0) | |
2191 | (vec_merge:VF_128_256 | |
2192 | (minus:VF_128_256 (match_dup 1) (match_dup 2)) | |
2193 | (plus:VF_128_256 (match_dup 1) (match_dup 2)) | |
2194 | (match_dup 5)))] | |
2195 | { | |
2196 | int i, nelt = XVECLEN (operands[5], 0); | |
2197 | HOST_WIDE_INT ival = 0; | |
2198 | ||
2199 | for (i = 0; i < nelt; i++) | |
2200 | if (INTVAL (XVECEXP (operands[5], 0, i)) < GET_MODE_NUNITS (<MODE>mode)) | |
2201 | ival |= HOST_WIDE_INT_1 << i; | |
2202 | ||
2203 | operands[5] = GEN_INT (ival); | |
2204 | }) | |
2205 | ||
2206 | (define_split | |
2207 | [(set (match_operand:VF_128_256 0 "register_operand") | |
2208 | (match_operator:VF_128_256 7 "addsub_vs_operator" | |
2209 | [(vec_concat:<ssedoublemode> | |
2210 | (plus:VF_128_256 | |
2211 | (match_operand:VF_128_256 1 "nonimmediate_operand") | |
2212 | (match_operand:VF_128_256 2 "nonimmediate_operand")) | |
2213 | (minus:VF_128_256 | |
2214 | (match_operand:VF_128_256 3 "register_operand") | |
2215 | (match_operand:VF_128_256 4 "nonimmediate_operand"))) | |
2216 | (match_parallel 5 "addsub_vs_parallel" | |
2217 | [(match_operand 6 "const_int_operand")])]))] | |
2218 | "TARGET_SSE3 | |
2219 | && can_create_pseudo_p () | |
2220 | && ((rtx_equal_p (operands[1], operands[3]) | |
2221 | && rtx_equal_p (operands[2], operands[4])) | |
2222 | || (rtx_equal_p (operands[1], operands[4]) | |
2223 | && rtx_equal_p (operands[2], operands[3])))" | |
2224 | [(set (match_dup 0) | |
2225 | (vec_merge:VF_128_256 | |
2226 | (minus:VF_128_256 (match_dup 3) (match_dup 4)) | |
2227 | (plus:VF_128_256 (match_dup 3) (match_dup 4)) | |
2228 | (match_dup 5)))] | |
2229 | { | |
2230 | int i, nelt = XVECLEN (operands[5], 0); | |
2231 | HOST_WIDE_INT ival = 0; | |
2232 | ||
2233 | for (i = 0; i < nelt; i++) | |
2234 | if (INTVAL (XVECEXP (operands[5], 0, i)) >= GET_MODE_NUNITS (<MODE>mode)) | |
2235 | ival |= HOST_WIDE_INT_1 << i; | |
2236 | ||
2237 | operands[5] = GEN_INT (ival); | |
2238 | }) | |
5802c0cb | 2239 | |
ed30e0a6 | 2240 | (define_insn "avx_h<plusminus_insn>v4df3" |
2241 | [(set (match_operand:V4DF 0 "register_operand" "=x") | |
2242 | (vec_concat:V4DF | |
2243 | (vec_concat:V2DF | |
2244 | (plusminus:DF | |
2245 | (vec_select:DF | |
2246 | (match_operand:V4DF 1 "register_operand" "x") | |
2247 | (parallel [(const_int 0)])) | |
2248 | (vec_select:DF (match_dup 1) (parallel [(const_int 1)]))) | |
ed30e0a6 | 2249 | (plusminus:DF |
2250 | (vec_select:DF | |
2251 | (match_operand:V4DF 2 "nonimmediate_operand" "xm") | |
2252 | (parallel [(const_int 0)])) | |
80f058cd | 2253 | (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))) |
2254 | (vec_concat:V2DF | |
2255 | (plusminus:DF | |
2256 | (vec_select:DF (match_dup 1) (parallel [(const_int 2)])) | |
2257 | (vec_select:DF (match_dup 1) (parallel [(const_int 3)]))) | |
ed30e0a6 | 2258 | (plusminus:DF |
2259 | (vec_select:DF (match_dup 2) (parallel [(const_int 2)])) | |
2260 | (vec_select:DF (match_dup 2) (parallel [(const_int 3)]))))))] | |
2261 | "TARGET_AVX" | |
2262 | "vh<plusminus_mnemonic>pd\t{%2, %1, %0|%0, %1, %2}" | |
2263 | [(set_attr "type" "sseadd") | |
2264 | (set_attr "prefix" "vex") | |
2265 | (set_attr "mode" "V4DF")]) | |
2266 | ||
6095368e | 2267 | (define_expand "sse3_haddv2df3" |
2268 | [(set (match_operand:V2DF 0 "register_operand") | |
2269 | (vec_concat:V2DF | |
2270 | (plus:DF | |
2271 | (vec_select:DF | |
2272 | (match_operand:V2DF 1 "register_operand") | |
2273 | (parallel [(const_int 0)])) | |
2274 | (vec_select:DF (match_dup 1) (parallel [(const_int 1)]))) | |
2275 | (plus:DF | |
2276 | (vec_select:DF | |
2277 | (match_operand:V2DF 2 "nonimmediate_operand") | |
2278 | (parallel [(const_int 0)])) | |
2279 | (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))] | |
2280 | "TARGET_SSE3") | |
2281 | ||
2282 | (define_insn "*sse3_haddv2df3" | |
27e5502d | 2283 | [(set (match_operand:V2DF 0 "register_operand" "=x,x") |
2284 | (vec_concat:V2DF | |
6095368e | 2285 | (plus:DF |
2286 | (vec_select:DF | |
2287 | (match_operand:V2DF 1 "register_operand" "0,x") | |
2288 | (parallel [(match_operand:SI 3 "const_0_to_1_operand")])) | |
2289 | (vec_select:DF | |
2290 | (match_dup 1) | |
2291 | (parallel [(match_operand:SI 4 "const_0_to_1_operand")]))) | |
2292 | (plus:DF | |
2293 | (vec_select:DF | |
2294 | (match_operand:V2DF 2 "nonimmediate_operand" "xm,xm") | |
2295 | (parallel [(match_operand:SI 5 "const_0_to_1_operand")])) | |
2296 | (vec_select:DF | |
2297 | (match_dup 2) | |
2298 | (parallel [(match_operand:SI 6 "const_0_to_1_operand")])))))] | |
2299 | "TARGET_SSE3 | |
2300 | && INTVAL (operands[3]) != INTVAL (operands[4]) | |
2301 | && INTVAL (operands[5]) != INTVAL (operands[6])" | |
2302 | "@ | |
2303 | haddpd\t{%2, %0|%0, %2} | |
2304 | vhaddpd\t{%2, %1, %0|%0, %1, %2}" | |
2305 | [(set_attr "isa" "noavx,avx") | |
2306 | (set_attr "type" "sseadd") | |
2307 | (set_attr "prefix" "orig,vex") | |
2308 | (set_attr "mode" "V2DF")]) | |
2309 | ||
2310 | (define_insn "sse3_hsubv2df3" | |
2311 | [(set (match_operand:V2DF 0 "register_operand" "=x,x") | |
2312 | (vec_concat:V2DF | |
2313 | (minus:DF | |
27e5502d | 2314 | (vec_select:DF |
2315 | (match_operand:V2DF 1 "register_operand" "0,x") | |
2316 | (parallel [(const_int 0)])) | |
2317 | (vec_select:DF (match_dup 1) (parallel [(const_int 1)]))) | |
6095368e | 2318 | (minus:DF |
27e5502d | 2319 | (vec_select:DF |
2320 | (match_operand:V2DF 2 "nonimmediate_operand" "xm,xm") | |
2321 | (parallel [(const_int 0)])) | |
2322 | (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))] | |
2323 | "TARGET_SSE3" | |
2324 | "@ | |
6095368e | 2325 | hsubpd\t{%2, %0|%0, %2} |
2326 | vhsubpd\t{%2, %1, %0|%0, %1, %2}" | |
27e5502d | 2327 | [(set_attr "isa" "noavx,avx") |
2328 | (set_attr "type" "sseadd") | |
2329 | (set_attr "prefix" "orig,vex") | |
2330 | (set_attr "mode" "V2DF")]) | |
2331 | ||
6095368e | 2332 | (define_insn "*sse3_haddv2df3_low" |
2333 | [(set (match_operand:DF 0 "register_operand" "=x,x") | |
2334 | (plus:DF | |
2335 | (vec_select:DF | |
2336 | (match_operand:V2DF 1 "register_operand" "0,x") | |
2337 | (parallel [(match_operand:SI 2 "const_0_to_1_operand")])) | |
2338 | (vec_select:DF | |
2339 | (match_dup 1) | |
2340 | (parallel [(match_operand:SI 3 "const_0_to_1_operand")]))))] | |
2341 | "TARGET_SSE3 | |
2342 | && INTVAL (operands[2]) != INTVAL (operands[3])" | |
2343 | "@ | |
2344 | haddpd\t{%0, %0|%0, %0} | |
2345 | vhaddpd\t{%1, %1, %0|%0, %1, %1}" | |
2346 | [(set_attr "isa" "noavx,avx") | |
2347 | (set_attr "type" "sseadd1") | |
2348 | (set_attr "prefix" "orig,vex") | |
2349 | (set_attr "mode" "V2DF")]) | |
2350 | ||
2351 | (define_insn "*sse3_hsubv2df3_low" | |
2352 | [(set (match_operand:DF 0 "register_operand" "=x,x") | |
2353 | (minus:DF | |
2354 | (vec_select:DF | |
2355 | (match_operand:V2DF 1 "register_operand" "0,x") | |
2356 | (parallel [(const_int 0)])) | |
2357 | (vec_select:DF | |
2358 | (match_dup 1) | |
2359 | (parallel [(const_int 1)]))))] | |
2360 | "TARGET_SSE3" | |
2361 | "@ | |
2362 | hsubpd\t{%0, %0|%0, %0} | |
2363 | vhsubpd\t{%1, %1, %0|%0, %1, %1}" | |
2364 | [(set_attr "isa" "noavx,avx") | |
2365 | (set_attr "type" "sseadd1") | |
2366 | (set_attr "prefix" "orig,vex") | |
2367 | (set_attr "mode" "V2DF")]) | |
2368 | ||
ed30e0a6 | 2369 | (define_insn "avx_h<plusminus_insn>v8sf3" |
2370 | [(set (match_operand:V8SF 0 "register_operand" "=x") | |
2371 | (vec_concat:V8SF | |
2372 | (vec_concat:V4SF | |
2373 | (vec_concat:V2SF | |
2374 | (plusminus:SF | |
2375 | (vec_select:SF | |
2376 | (match_operand:V8SF 1 "register_operand" "x") | |
2377 | (parallel [(const_int 0)])) | |
2378 | (vec_select:SF (match_dup 1) (parallel [(const_int 1)]))) | |
2379 | (plusminus:SF | |
2380 | (vec_select:SF (match_dup 1) (parallel [(const_int 2)])) | |
2381 | (vec_select:SF (match_dup 1) (parallel [(const_int 3)])))) | |
2382 | (vec_concat:V2SF | |
2383 | (plusminus:SF | |
2384 | (vec_select:SF | |
2385 | (match_operand:V8SF 2 "nonimmediate_operand" "xm") | |
2386 | (parallel [(const_int 0)])) | |
2387 | (vec_select:SF (match_dup 2) (parallel [(const_int 1)]))) | |
2388 | (plusminus:SF | |
2389 | (vec_select:SF (match_dup 2) (parallel [(const_int 2)])) | |
2390 | (vec_select:SF (match_dup 2) (parallel [(const_int 3)]))))) | |
2391 | (vec_concat:V4SF | |
2392 | (vec_concat:V2SF | |
2393 | (plusminus:SF | |
2394 | (vec_select:SF (match_dup 1) (parallel [(const_int 4)])) | |
2395 | (vec_select:SF (match_dup 1) (parallel [(const_int 5)]))) | |
2396 | (plusminus:SF | |
2397 | (vec_select:SF (match_dup 1) (parallel [(const_int 6)])) | |
2398 | (vec_select:SF (match_dup 1) (parallel [(const_int 7)])))) | |
2399 | (vec_concat:V2SF | |
2400 | (plusminus:SF | |
2401 | (vec_select:SF (match_dup 2) (parallel [(const_int 4)])) | |
2402 | (vec_select:SF (match_dup 2) (parallel [(const_int 5)]))) | |
2403 | (plusminus:SF | |
2404 | (vec_select:SF (match_dup 2) (parallel [(const_int 6)])) | |
2405 | (vec_select:SF (match_dup 2) (parallel [(const_int 7)])))))))] | |
2406 | "TARGET_AVX" | |
2407 | "vh<plusminus_mnemonic>ps\t{%2, %1, %0|%0, %1, %2}" | |
2408 | [(set_attr "type" "sseadd") | |
2409 | (set_attr "prefix" "vex") | |
2410 | (set_attr "mode" "V8SF")]) | |
2411 | ||
801ff5b2 | 2412 | (define_insn "sse3_h<plusminus_insn>v4sf3" |
27e5502d | 2413 | [(set (match_operand:V4SF 0 "register_operand" "=x,x") |
2a466fea | 2414 | (vec_concat:V4SF |
2415 | (vec_concat:V2SF | |
9409fce7 | 2416 | (plusminus:SF |
2a466fea | 2417 | (vec_select:SF |
27e5502d | 2418 | (match_operand:V4SF 1 "register_operand" "0,x") |
2a466fea | 2419 | (parallel [(const_int 0)])) |
2420 | (vec_select:SF (match_dup 1) (parallel [(const_int 1)]))) | |
9409fce7 | 2421 | (plusminus:SF |
5802c0cb | 2422 | (vec_select:SF (match_dup 1) (parallel [(const_int 2)])) |
2423 | (vec_select:SF (match_dup 1) (parallel [(const_int 3)])))) | |
2424 | (vec_concat:V2SF | |
9409fce7 | 2425 | (plusminus:SF |
5802c0cb | 2426 | (vec_select:SF |
27e5502d | 2427 | (match_operand:V4SF 2 "nonimmediate_operand" "xm,xm") |
5802c0cb | 2428 | (parallel [(const_int 0)])) |
2429 | (vec_select:SF (match_dup 2) (parallel [(const_int 1)]))) | |
9409fce7 | 2430 | (plusminus:SF |
5802c0cb | 2431 | (vec_select:SF (match_dup 2) (parallel [(const_int 2)])) |
2432 | (vec_select:SF (match_dup 2) (parallel [(const_int 3)]))))))] | |
2433 | "TARGET_SSE3" | |
27e5502d | 2434 | "@ |
2435 | h<plusminus_mnemonic>ps\t{%2, %0|%0, %2} | |
2436 | vh<plusminus_mnemonic>ps\t{%2, %1, %0|%0, %1, %2}" | |
2437 | [(set_attr "isa" "noavx,avx") | |
2438 | (set_attr "type" "sseadd") | |
fbfe006e | 2439 | (set_attr "atom_unit" "complex") |
27e5502d | 2440 | (set_attr "prefix" "orig,vex") |
2441 | (set_attr "prefix_rep" "1,*") | |
5802c0cb | 2442 | (set_attr "mode" "V4SF")]) |
2443 | ||
b8c0e65c | 2444 | (define_expand "reduc_plus_scal_v8df" |
2445 | [(match_operand:DF 0 "register_operand") | |
8e9989b0 | 2446 | (match_operand:V8DF 1 "register_operand")] |
2447 | "TARGET_AVX512F" | |
2448 | { | |
b8c0e65c | 2449 | rtx tmp = gen_reg_rtx (V8DFmode); |
2450 | ix86_expand_reduc (gen_addv8df3, tmp, operands[1]); | |
2451 | emit_insn (gen_vec_extractv8df (operands[0], tmp, const0_rtx)); | |
8e9989b0 | 2452 | DONE; |
2453 | }) | |
2454 | ||
b8c0e65c | 2455 | (define_expand "reduc_plus_scal_v4df" |
2456 | [(match_operand:DF 0 "register_operand") | |
abd4f58b | 2457 | (match_operand:V4DF 1 "register_operand")] |
ed30e0a6 | 2458 | "TARGET_AVX" |
27e5502d | 2459 | { |
2460 | rtx tmp = gen_reg_rtx (V4DFmode); | |
2461 | rtx tmp2 = gen_reg_rtx (V4DFmode); | |
b8c0e65c | 2462 | rtx vec_res = gen_reg_rtx (V4DFmode); |
27e5502d | 2463 | emit_insn (gen_avx_haddv4df3 (tmp, operands[1], operands[1])); |
2464 | emit_insn (gen_avx_vperm2f128v4df3 (tmp2, tmp, tmp, GEN_INT (1))); | |
b8c0e65c | 2465 | emit_insn (gen_addv4df3 (vec_res, tmp, tmp2)); |
2466 | emit_insn (gen_vec_extractv4df (operands[0], vec_res, const0_rtx)); | |
27e5502d | 2467 | DONE; |
2468 | }) | |
ed30e0a6 | 2469 | |
b8c0e65c | 2470 | (define_expand "reduc_plus_scal_v2df" |
2471 | [(match_operand:DF 0 "register_operand") | |
abd4f58b | 2472 | (match_operand:V2DF 1 "register_operand")] |
2a466fea | 2473 | "TARGET_SSE3" |
27e5502d | 2474 | { |
b8c0e65c | 2475 | rtx tmp = gen_reg_rtx (V2DFmode); |
2476 | emit_insn (gen_sse3_haddv2df3 (tmp, operands[1], operands[1])); | |
2477 | emit_insn (gen_vec_extractv2df (operands[0], tmp, const0_rtx)); | |
27e5502d | 2478 | DONE; |
2479 | }) | |
2a466fea | 2480 | |
b8c0e65c | 2481 | (define_expand "reduc_plus_scal_v16sf" |
2482 | [(match_operand:SF 0 "register_operand") | |
8e9989b0 | 2483 | (match_operand:V16SF 1 "register_operand")] |
2484 | "TARGET_AVX512F" | |
2485 | { | |
b8c0e65c | 2486 | rtx tmp = gen_reg_rtx (V16SFmode); |
2487 | ix86_expand_reduc (gen_addv16sf3, tmp, operands[1]); | |
2488 | emit_insn (gen_vec_extractv16sf (operands[0], tmp, const0_rtx)); | |
8e9989b0 | 2489 | DONE; |
2490 | }) | |
2491 | ||
b8c0e65c | 2492 | (define_expand "reduc_plus_scal_v8sf" |
2493 | [(match_operand:SF 0 "register_operand") | |
abd4f58b | 2494 | (match_operand:V8SF 1 "register_operand")] |
8cedf886 | 2495 | "TARGET_AVX" |
2496 | { | |
2497 | rtx tmp = gen_reg_rtx (V8SFmode); | |
2498 | rtx tmp2 = gen_reg_rtx (V8SFmode); | |
b8c0e65c | 2499 | rtx vec_res = gen_reg_rtx (V8SFmode); |
8cedf886 | 2500 | emit_insn (gen_avx_haddv8sf3 (tmp, operands[1], operands[1])); |
19fbdfaf | 2501 | emit_insn (gen_avx_haddv8sf3 (tmp2, tmp, tmp)); |
2502 | emit_insn (gen_avx_vperm2f128v8sf3 (tmp, tmp2, tmp2, GEN_INT (1))); | |
b8c0e65c | 2503 | emit_insn (gen_addv8sf3 (vec_res, tmp, tmp2)); |
2504 | emit_insn (gen_vec_extractv8sf (operands[0], vec_res, const0_rtx)); | |
8cedf886 | 2505 | DONE; |
2506 | }) | |
2507 | ||
b8c0e65c | 2508 | (define_expand "reduc_plus_scal_v4sf" |
2509 | [(match_operand:SF 0 "register_operand") | |
abd4f58b | 2510 | (match_operand:V4SF 1 "register_operand")] |
49f312aa | 2511 | "TARGET_SSE" |
2512 | { | |
b8c0e65c | 2513 | rtx vec_res = gen_reg_rtx (V4SFmode); |
49f312aa | 2514 | if (TARGET_SSE3) |
2515 | { | |
2516 | rtx tmp = gen_reg_rtx (V4SFmode); | |
2517 | emit_insn (gen_sse3_haddv4sf3 (tmp, operands[1], operands[1])); | |
b8c0e65c | 2518 | emit_insn (gen_sse3_haddv4sf3 (vec_res, tmp, tmp)); |
49f312aa | 2519 | } |
2520 | else | |
b8c0e65c | 2521 | ix86_expand_reduc (gen_addv4sf3, vec_res, operands[1]); |
2522 | emit_insn (gen_vec_extractv4sf (operands[0], vec_res, const0_rtx)); | |
49f312aa | 2523 | DONE; |
2524 | }) | |
2525 | ||
bb7ad312 | 2526 | ;; Modes handled by reduc_sm{in,ax}* patterns. |
2527 | (define_mode_iterator REDUC_SMINMAX_MODE | |
2528 | [(V32QI "TARGET_AVX2") (V16HI "TARGET_AVX2") | |
2529 | (V8SI "TARGET_AVX2") (V4DI "TARGET_AVX2") | |
2530 | (V8SF "TARGET_AVX") (V4DF "TARGET_AVX") | |
24d7a006 | 2531 | (V4SF "TARGET_SSE") (V64QI "TARGET_AVX512BW") |
2532 | (V32HI "TARGET_AVX512BW") (V16SI "TARGET_AVX512F") | |
d2ff59d6 | 2533 | (V8DI "TARGET_AVX512F") (V16SF "TARGET_AVX512F") |
2534 | (V8DF "TARGET_AVX512F")]) | |
bb7ad312 | 2535 | |
b8c0e65c | 2536 | (define_expand "reduc_<code>_scal_<mode>" |
bb7ad312 | 2537 | [(smaxmin:REDUC_SMINMAX_MODE |
b8c0e65c | 2538 | (match_operand:<ssescalarmode> 0 "register_operand") |
abd4f58b | 2539 | (match_operand:REDUC_SMINMAX_MODE 1 "register_operand"))] |
bb7ad312 | 2540 | "" |
49f312aa | 2541 | { |
b8c0e65c | 2542 | rtx tmp = gen_reg_rtx (<MODE>mode); |
2543 | ix86_expand_reduc (gen_<code><mode>3, tmp, operands[1]); | |
2544 | emit_insn (gen_vec_extract<mode> (operands[0], tmp, const0_rtx)); | |
2e954432 | 2545 | DONE; |
2546 | }) | |
2547 | ||
b8c0e65c | 2548 | (define_expand "reduc_<code>_scal_<mode>" |
9c9987c5 | 2549 | [(umaxmin:VI_AVX512BW |
b8c0e65c | 2550 | (match_operand:<ssescalarmode> 0 "register_operand") |
9c9987c5 | 2551 | (match_operand:VI_AVX512BW 1 "register_operand"))] |
d2ff59d6 | 2552 | "TARGET_AVX512F" |
2553 | { | |
b8c0e65c | 2554 | rtx tmp = gen_reg_rtx (<MODE>mode); |
2555 | ix86_expand_reduc (gen_<code><mode>3, tmp, operands[1]); | |
2556 | emit_insn (gen_vec_extract<mode> (operands[0], tmp, const0_rtx)); | |
d2ff59d6 | 2557 | DONE; |
2558 | }) | |
2559 | ||
b8c0e65c | 2560 | (define_expand "reduc_<code>_scal_<mode>" |
bb7ad312 | 2561 | [(umaxmin:VI_256 |
b8c0e65c | 2562 | (match_operand:<ssescalarmode> 0 "register_operand") |
abd4f58b | 2563 | (match_operand:VI_256 1 "register_operand"))] |
bb7ad312 | 2564 | "TARGET_AVX2" |
2e954432 | 2565 | { |
b8c0e65c | 2566 | rtx tmp = gen_reg_rtx (<MODE>mode); |
2567 | ix86_expand_reduc (gen_<code><mode>3, tmp, operands[1]); | |
2568 | emit_insn (gen_vec_extract<mode> (operands[0], tmp, const0_rtx)); | |
49f312aa | 2569 | DONE; |
2570 | }) | |
2571 | ||
b8c0e65c | 2572 | (define_expand "reduc_umin_scal_v8hi" |
e529f590 | 2573 | [(umin:V8HI |
b8c0e65c | 2574 | (match_operand:HI 0 "register_operand") |
abd4f58b | 2575 | (match_operand:V8HI 1 "register_operand"))] |
e529f590 | 2576 | "TARGET_SSE4_1" |
2577 | { | |
b8c0e65c | 2578 | rtx tmp = gen_reg_rtx (V8HImode); |
2579 | ix86_expand_reduc (gen_uminv8hi3, tmp, operands[1]); | |
2580 | emit_insn (gen_vec_extractv8hi (operands[0], tmp, const0_rtx)); | |
e529f590 | 2581 | DONE; |
2582 | }) | |
2583 | ||
6164575a | 2584 | (define_insn "<mask_codefor>reducep<mode><mask_name>" |
2585 | [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v") | |
2586 | (unspec:VF_AVX512VL | |
2587 | [(match_operand:VF_AVX512VL 1 "nonimmediate_operand" "vm") | |
2588 | (match_operand:SI 2 "const_0_to_255_operand")] | |
2589 | UNSPEC_REDUCE))] | |
2590 | "TARGET_AVX512DQ" | |
2591 | "vreduce<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}" | |
2592 | [(set_attr "type" "sse") | |
2593 | (set_attr "prefix" "evex") | |
2594 | (set_attr "mode" "<MODE>")]) | |
2595 | ||
2596 | (define_insn "reduces<mode>" | |
2597 | [(set (match_operand:VF_128 0 "register_operand" "=v") | |
2598 | (vec_merge:VF_128 | |
2599 | (unspec:VF_128 | |
2600 | [(match_operand:VF_128 1 "register_operand" "v") | |
2601 | (match_operand:VF_128 2 "nonimmediate_operand" "vm") | |
2602 | (match_operand:SI 3 "const_0_to_255_operand")] | |
2603 | UNSPEC_REDUCE) | |
2604 | (match_dup 1) | |
2605 | (const_int 1)))] | |
2606 | "TARGET_AVX512DQ" | |
2607 | "vreduce<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}" | |
2608 | [(set_attr "type" "sse") | |
2609 | (set_attr "prefix" "evex") | |
2610 | (set_attr "mode" "<MODE>")]) | |
2611 | ||
5802c0cb | 2612 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; |
2613 | ;; | |
2a466fea | 2614 | ;; Parallel floating point comparisons |
5802c0cb | 2615 | ;; |
2616 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | |
2617 | ||
27e5502d | 2618 | (define_insn "avx_cmp<mode>3" |
6a3f5f59 | 2619 | [(set (match_operand:VF_128_256 0 "register_operand" "=x") |
2620 | (unspec:VF_128_256 | |
2621 | [(match_operand:VF_128_256 1 "register_operand" "x") | |
2622 | (match_operand:VF_128_256 2 "nonimmediate_operand" "xm") | |
ed30e0a6 | 2623 | (match_operand:SI 3 "const_0_to_31_operand" "n")] |
2624 | UNSPEC_PCMP))] | |
2625 | "TARGET_AVX" | |
0061967e | 2626 | "vcmp<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}" |
ed30e0a6 | 2627 | [(set_attr "type" "ssecmp") |
00a0e418 | 2628 | (set_attr "length_immediate" "1") |
ed30e0a6 | 2629 | (set_attr "prefix" "vex") |
2630 | (set_attr "mode" "<MODE>")]) | |
2631 | ||
27e5502d | 2632 | (define_insn "avx_vmcmp<mode>3" |
2633 | [(set (match_operand:VF_128 0 "register_operand" "=x") | |
2634 | (vec_merge:VF_128 | |
2635 | (unspec:VF_128 | |
2636 | [(match_operand:VF_128 1 "register_operand" "x") | |
2637 | (match_operand:VF_128 2 "nonimmediate_operand" "xm") | |
ed30e0a6 | 2638 | (match_operand:SI 3 "const_0_to_31_operand" "n")] |
2639 | UNSPEC_PCMP) | |
2640 | (match_dup 1) | |
2641 | (const_int 1)))] | |
2642 | "TARGET_AVX" | |
c358a059 | 2643 | "vcmp<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %<iptr>2, %3}" |
ed30e0a6 | 2644 | [(set_attr "type" "ssecmp") |
00a0e418 | 2645 | (set_attr "length_immediate" "1") |
ed30e0a6 | 2646 | (set_attr "prefix" "vex") |
2647 | (set_attr "mode" "<ssescalarmode>")]) | |
2648 | ||
dd1f4650 | 2649 | (define_insn "*<sse>_maskcmp<mode>3_comm" |
6a3f5f59 | 2650 | [(set (match_operand:VF_128_256 0 "register_operand" "=x,x") |
2651 | (match_operator:VF_128_256 3 "sse_comparison_operator" | |
2652 | [(match_operand:VF_128_256 1 "register_operand" "%0,x") | |
2653 | (match_operand:VF_128_256 2 "nonimmediate_operand" "xm,xm")]))] | |
dd1f4650 | 2654 | "TARGET_SSE |
2655 | && GET_RTX_CLASS (GET_CODE (operands[3])) == RTX_COMM_COMPARE" | |
2656 | "@ | |
2657 | cmp%D3<ssemodesuffix>\t{%2, %0|%0, %2} | |
2658 | vcmp%D3<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}" | |
2659 | [(set_attr "isa" "noavx,avx") | |
2660 | (set_attr "type" "ssecmp") | |
2661 | (set_attr "length_immediate" "1") | |
2662 | (set_attr "prefix" "orig,vex") | |
2663 | (set_attr "mode" "<MODE>")]) | |
2664 | ||
ed30e0a6 | 2665 | (define_insn "<sse>_maskcmp<mode>3" |
6a3f5f59 | 2666 | [(set (match_operand:VF_128_256 0 "register_operand" "=x,x") |
2667 | (match_operator:VF_128_256 3 "sse_comparison_operator" | |
2668 | [(match_operand:VF_128_256 1 "register_operand" "0,x") | |
2669 | (match_operand:VF_128_256 2 "nonimmediate_operand" "xm,xm")]))] | |
6fe5844b | 2670 | "TARGET_SSE" |
27e5502d | 2671 | "@ |
2672 | cmp%D3<ssemodesuffix>\t{%2, %0|%0, %2} | |
2673 | vcmp%D3<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}" | |
2674 | [(set_attr "isa" "noavx,avx") | |
2675 | (set_attr "type" "ssecmp") | |
00a0e418 | 2676 | (set_attr "length_immediate" "1") |
27e5502d | 2677 | (set_attr "prefix" "orig,vex") |
2a466fea | 2678 | (set_attr "mode" "<MODE>")]) |
e7fdb903 | 2679 | |
2a466fea | 2680 | (define_insn "<sse>_vmmaskcmp<mode>3" |
27e5502d | 2681 | [(set (match_operand:VF_128 0 "register_operand" "=x,x") |
2682 | (vec_merge:VF_128 | |
2683 | (match_operator:VF_128 3 "sse_comparison_operator" | |
2684 | [(match_operand:VF_128 1 "register_operand" "0,x") | |
2685 | (match_operand:VF_128 2 "nonimmediate_operand" "xm,xm")]) | |
5802c0cb | 2686 | (match_dup 1) |
2687 | (const_int 1)))] | |
6fe5844b | 2688 | "TARGET_SSE" |
27e5502d | 2689 | "@ |
c358a059 | 2690 | cmp%D3<ssescalarmodesuffix>\t{%2, %0|%0, %<iptr>2} |
2691 | vcmp%D3<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %<iptr>2}" | |
27e5502d | 2692 | [(set_attr "isa" "noavx,avx") |
2693 | (set_attr "type" "ssecmp") | |
2694 | (set_attr "length_immediate" "1,*") | |
2695 | (set_attr "prefix" "orig,vex") | |
2a466fea | 2696 | (set_attr "mode" "<ssescalarmode>")]) |
5802c0cb | 2697 | |
8e6b975f | 2698 | (define_mode_attr cmp_imm_predicate |
f50aa6e9 | 2699 | [(V16SF "const_0_to_31_operand") (V8DF "const_0_to_31_operand") |
2700 | (V16SI "const_0_to_7_operand") (V8DI "const_0_to_7_operand") | |
2701 | (V8SF "const_0_to_31_operand") (V4DF "const_0_to_31_operand") | |
2702 | (V8SI "const_0_to_7_operand") (V4DI "const_0_to_7_operand") | |
2703 | (V4SF "const_0_to_31_operand") (V2DF "const_0_to_31_operand") | |
2704 | (V4SI "const_0_to_7_operand") (V2DI "const_0_to_7_operand") | |
2705 | (V32HI "const_0_to_7_operand") (V64QI "const_0_to_7_operand") | |
2706 | (V16HI "const_0_to_7_operand") (V32QI "const_0_to_7_operand") | |
2707 | (V8HI "const_0_to_7_operand") (V16QI "const_0_to_7_operand")]) | |
2708 | ||
2709 | (define_insn "<avx512>_cmp<mode>3<mask_scalar_merge_name><round_saeonly_name>" | |
a31e7f46 | 2710 | [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk") |
8e6b975f | 2711 | (unspec:<avx512fmaskmode> |
f50aa6e9 | 2712 | [(match_operand:V48_AVX512VL 1 "register_operand" "v") |
2713 | (match_operand:V48_AVX512VL 2 "nonimmediate_operand" "<round_saeonly_constraint>") | |
8e6b975f | 2714 | (match_operand:SI 3 "<cmp_imm_predicate>" "n")] |
2715 | UNSPEC_PCMP))] | |
dbfe84d5 | 2716 | "TARGET_AVX512F && <round_saeonly_mode512bit_condition>" |
2717 | "v<sseintprefix>cmp<ssemodesuffix>\t{%3, <round_saeonly_mask_scalar_merge_op4>%2, %1, %0<mask_scalar_merge_operand4>|%0<mask_scalar_merge_operand4>, %1, %2<round_saeonly_mask_scalar_merge_op4>, %3}" | |
8e6b975f | 2718 | [(set_attr "type" "ssecmp") |
2719 | (set_attr "length_immediate" "1") | |
2720 | (set_attr "prefix" "evex") | |
2721 | (set_attr "mode" "<sseinsnmode>")]) | |
2722 | ||
f50aa6e9 | 2723 | (define_insn "<avx512>_cmp<mode>3<mask_scalar_merge_name>" |
2724 | [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk") | |
2725 | (unspec:<avx512fmaskmode> | |
2726 | [(match_operand:VI12_AVX512VL 1 "register_operand" "v") | |
2727 | (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm") | |
2728 | (match_operand:SI 3 "<cmp_imm_predicate>" "n")] | |
2729 | UNSPEC_PCMP))] | |
2730 | "TARGET_AVX512BW" | |
2731 | "vpcmp<ssemodesuffix>\t{%3, %2, %1, %0<mask_scalar_merge_operand4>|%0<mask_scalar_merge_operand4>, %1, %2, %3}" | |
2732 | [(set_attr "type" "ssecmp") | |
2733 | (set_attr "length_immediate" "1") | |
2734 | (set_attr "prefix" "evex") | |
2735 | (set_attr "mode" "<sseinsnmode>")]) | |
2736 | ||
6b76cef2 | 2737 | (define_insn "<avx512>_ucmp<mode>3<mask_scalar_merge_name>" |
a31e7f46 | 2738 | [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk") |
d2ff59d6 | 2739 | (unspec:<avx512fmaskmode> |
6b76cef2 | 2740 | [(match_operand:VI12_AVX512VL 1 "register_operand" "v") |
2741 | (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm") | |
2742 | (match_operand:SI 3 "const_0_to_7_operand" "n")] | |
2743 | UNSPEC_UNSIGNED_PCMP))] | |
2744 | "TARGET_AVX512BW" | |
2745 | "vpcmpu<ssemodesuffix>\t{%3, %2, %1, %0<mask_scalar_merge_operand4>|%0<mask_scalar_merge_operand4>, %1, %2, %3}" | |
2746 | [(set_attr "type" "ssecmp") | |
2747 | (set_attr "length_immediate" "1") | |
2748 | (set_attr "prefix" "evex") | |
2749 | (set_attr "mode" "<sseinsnmode>")]) | |
2750 | ||
2751 | (define_insn "<avx512>_ucmp<mode>3<mask_scalar_merge_name>" | |
2752 | [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk") | |
2753 | (unspec:<avx512fmaskmode> | |
2754 | [(match_operand:VI48_AVX512VL 1 "register_operand" "v") | |
2755 | (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "vm") | |
d2ff59d6 | 2756 | (match_operand:SI 3 "const_0_to_7_operand" "n")] |
2757 | UNSPEC_UNSIGNED_PCMP))] | |
2758 | "TARGET_AVX512F" | |
c3d9b089 | 2759 | "vpcmpu<ssemodesuffix>\t{%3, %2, %1, %0<mask_scalar_merge_operand4>|%0<mask_scalar_merge_operand4>, %1, %2, %3}" |
d2ff59d6 | 2760 | [(set_attr "type" "ssecmp") |
2761 | (set_attr "length_immediate" "1") | |
2762 | (set_attr "prefix" "evex") | |
2763 | (set_attr "mode" "<sseinsnmode>")]) | |
2764 | ||
dbfe84d5 | 2765 | (define_insn "avx512f_vmcmp<mode>3<round_saeonly_name>" |
a31e7f46 | 2766 | [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk") |
2344eae2 | 2767 | (and:<avx512fmaskmode> |
2768 | (unspec:<avx512fmaskmode> | |
2769 | [(match_operand:VF_128 1 "register_operand" "v") | |
dbfe84d5 | 2770 | (match_operand:VF_128 2 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>") |
2344eae2 | 2771 | (match_operand:SI 3 "const_0_to_31_operand" "n")] |
2772 | UNSPEC_PCMP) | |
2773 | (const_int 1)))] | |
2774 | "TARGET_AVX512F" | |
dbfe84d5 | 2775 | "vcmp<ssescalarmodesuffix>\t{%3, <round_saeonly_op4>%2, %1, %0|%0, %1, %2<round_saeonly_op4>, %3}" |
2344eae2 | 2776 | [(set_attr "type" "ssecmp") |
2777 | (set_attr "length_immediate" "1") | |
2778 | (set_attr "prefix" "evex") | |
2779 | (set_attr "mode" "<ssescalarmode>")]) | |
2780 | ||
dbfe84d5 | 2781 | (define_insn "avx512f_vmcmp<mode>3_mask<round_saeonly_name>" |
a31e7f46 | 2782 | [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk") |
5220cab6 | 2783 | (and:<avx512fmaskmode> |
2784 | (unspec:<avx512fmaskmode> | |
2785 | [(match_operand:VF_128 1 "register_operand" "v") | |
dbfe84d5 | 2786 | (match_operand:VF_128 2 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>") |
5220cab6 | 2787 | (match_operand:SI 3 "const_0_to_31_operand" "n")] |
2788 | UNSPEC_PCMP) | |
2789 | (and:<avx512fmaskmode> | |
a31e7f46 | 2790 | (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk") |
5220cab6 | 2791 | (const_int 1))))] |
2792 | "TARGET_AVX512F" | |
dbfe84d5 | 2793 | "vcmp<ssescalarmodesuffix>\t{%3, <round_saeonly_op5>%2, %1, %0%{%4%}|%0%{%4%}, %1, %2<round_saeonly_op5>, %3}" |
5220cab6 | 2794 | [(set_attr "type" "ssecmp") |
2795 | (set_attr "length_immediate" "1") | |
2796 | (set_attr "prefix" "evex") | |
2797 | (set_attr "mode" "<ssescalarmode>")]) | |
2798 | ||
2344eae2 | 2799 | (define_insn "avx512f_maskcmp<mode>3" |
a31e7f46 | 2800 | [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk") |
2344eae2 | 2801 | (match_operator:<avx512fmaskmode> 3 "sse_comparison_operator" |
2802 | [(match_operand:VF 1 "register_operand" "v") | |
2803 | (match_operand:VF 2 "nonimmediate_operand" "vm")]))] | |
2804 | "TARGET_SSE" | |
2805 | "vcmp%D3<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}" | |
2806 | [(set_attr "type" "ssecmp") | |
2807 | (set_attr "length_immediate" "1") | |
2808 | (set_attr "prefix" "evex") | |
2809 | (set_attr "mode" "<sseinsnmode>")]) | |
2810 | ||
dbfe84d5 | 2811 | (define_insn "<sse>_comi<round_saeonly_name>" |
5802c0cb | 2812 | [(set (reg:CCFP FLAGS_REG) |
2813 | (compare:CCFP | |
2a466fea | 2814 | (vec_select:MODEF |
4c1099de | 2815 | (match_operand:<ssevecmode> 0 "register_operand" "v") |
5802c0cb | 2816 | (parallel [(const_int 0)])) |
2a466fea | 2817 | (vec_select:MODEF |
dbfe84d5 | 2818 | (match_operand:<ssevecmode> 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>") |
5802c0cb | 2819 | (parallel [(const_int 0)]))))] |
2a466fea | 2820 | "SSE_FLOAT_MODE_P (<MODE>mode)" |
dbfe84d5 | 2821 | "%vcomi<ssemodesuffix>\t{<round_saeonly_op2>%1, %0|%0, %<iptr>1<round_saeonly_op2>}" |
5802c0cb | 2822 | [(set_attr "type" "ssecomi") |
ed30e0a6 | 2823 | (set_attr "prefix" "maybe_vex") |
00a0e418 | 2824 | (set_attr "prefix_rep" "0") |
2825 | (set (attr "prefix_data16") | |
2826 | (if_then_else (eq_attr "mode" "DF") | |
2827 | (const_string "1") | |
2828 | (const_string "0"))) | |
2a466fea | 2829 | (set_attr "mode" "<MODE>")]) |
5802c0cb | 2830 | |
dbfe84d5 | 2831 | (define_insn "<sse>_ucomi<round_saeonly_name>" |
5802c0cb | 2832 | [(set (reg:CCFPU FLAGS_REG) |
2833 | (compare:CCFPU | |
2a466fea | 2834 | (vec_select:MODEF |
4c1099de | 2835 | (match_operand:<ssevecmode> 0 "register_operand" "v") |
5802c0cb | 2836 | (parallel [(const_int 0)])) |
2a466fea | 2837 | (vec_select:MODEF |
dbfe84d5 | 2838 | (match_operand:<ssevecmode> 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>") |
5802c0cb | 2839 | (parallel [(const_int 0)]))))] |
2a466fea | 2840 | "SSE_FLOAT_MODE_P (<MODE>mode)" |
dbfe84d5 | 2841 | "%vucomi<ssemodesuffix>\t{<round_saeonly_op2>%1, %0|%0, %<iptr>1<round_saeonly_op2>}" |
5802c0cb | 2842 | [(set_attr "type" "ssecomi") |
ed30e0a6 | 2843 | (set_attr "prefix" "maybe_vex") |
00a0e418 | 2844 | (set_attr "prefix_rep" "0") |
2845 | (set (attr "prefix_data16") | |
2846 | (if_then_else (eq_attr "mode" "DF") | |
2847 | (const_string "1") | |
2848 | (const_string "0"))) | |
2a466fea | 2849 | (set_attr "mode" "<MODE>")]) |
5802c0cb | 2850 | |
dab48979 | 2851 | (define_expand "vec_cmp<mode><avx512fmaskmodelower>" |
2852 | [(set (match_operand:<avx512fmaskmode> 0 "register_operand") | |
2853 | (match_operator:<avx512fmaskmode> 1 "" | |
2854 | [(match_operand:V48_AVX512VL 2 "register_operand") | |
2855 | (match_operand:V48_AVX512VL 3 "nonimmediate_operand")]))] | |
2856 | "TARGET_AVX512F" | |
2857 | { | |
2858 | bool ok = ix86_expand_mask_vec_cmp (operands); | |
2859 | gcc_assert (ok); | |
2860 | DONE; | |
2861 | }) | |
2862 | ||
2863 | (define_expand "vec_cmp<mode><avx512fmaskmodelower>" | |
2864 | [(set (match_operand:<avx512fmaskmode> 0 "register_operand") | |
2865 | (match_operator:<avx512fmaskmode> 1 "" | |
2866 | [(match_operand:VI12_AVX512VL 2 "register_operand") | |
2867 | (match_operand:VI12_AVX512VL 3 "nonimmediate_operand")]))] | |
2868 | "TARGET_AVX512BW" | |
2869 | { | |
2870 | bool ok = ix86_expand_mask_vec_cmp (operands); | |
2871 | gcc_assert (ok); | |
2872 | DONE; | |
2873 | }) | |
2874 | ||
2875 | (define_expand "vec_cmp<mode><sseintvecmodelower>" | |
2876 | [(set (match_operand:<sseintvecmode> 0 "register_operand") | |
2877 | (match_operator:<sseintvecmode> 1 "" | |
2878 | [(match_operand:VI_256 2 "register_operand") | |
2879 | (match_operand:VI_256 3 "nonimmediate_operand")]))] | |
2880 | "TARGET_AVX2" | |
2881 | { | |
2882 | bool ok = ix86_expand_int_vec_cmp (operands); | |
2883 | gcc_assert (ok); | |
2884 | DONE; | |
2885 | }) | |
2886 | ||
2887 | (define_expand "vec_cmp<mode><sseintvecmodelower>" | |
2888 | [(set (match_operand:<sseintvecmode> 0 "register_operand") | |
2889 | (match_operator:<sseintvecmode> 1 "" | |
2890 | [(match_operand:VI124_128 2 "register_operand") | |
2891 | (match_operand:VI124_128 3 "nonimmediate_operand")]))] | |
2892 | "TARGET_SSE2" | |
2893 | { | |
2894 | bool ok = ix86_expand_int_vec_cmp (operands); | |
2895 | gcc_assert (ok); | |
2896 | DONE; | |
2897 | }) | |
2898 | ||
2899 | (define_expand "vec_cmpv2div2di" | |
2900 | [(set (match_operand:V2DI 0 "register_operand") | |
2901 | (match_operator:V2DI 1 "" | |
2902 | [(match_operand:V2DI 2 "register_operand") | |
2903 | (match_operand:V2DI 3 "nonimmediate_operand")]))] | |
2904 | "TARGET_SSE4_2" | |
2905 | { | |
2906 | bool ok = ix86_expand_int_vec_cmp (operands); | |
2907 | gcc_assert (ok); | |
2908 | DONE; | |
2909 | }) | |
2910 | ||
2911 | (define_expand "vec_cmp<mode><sseintvecmodelower>" | |
2912 | [(set (match_operand:<sseintvecmode> 0 "register_operand") | |
2913 | (match_operator:<sseintvecmode> 1 "" | |
2914 | [(match_operand:VF_256 2 "register_operand") | |
2915 | (match_operand:VF_256 3 "nonimmediate_operand")]))] | |
2916 | "TARGET_AVX" | |
2917 | { | |
2918 | bool ok = ix86_expand_fp_vec_cmp (operands); | |
2919 | gcc_assert (ok); | |
2920 | DONE; | |
2921 | }) | |
2922 | ||
2923 | (define_expand "vec_cmp<mode><sseintvecmodelower>" | |
2924 | [(set (match_operand:<sseintvecmode> 0 "register_operand") | |
2925 | (match_operator:<sseintvecmode> 1 "" | |
2926 | [(match_operand:VF_128 2 "register_operand") | |
2927 | (match_operand:VF_128 3 "nonimmediate_operand")]))] | |
2928 | "TARGET_SSE" | |
2929 | { | |
2930 | bool ok = ix86_expand_fp_vec_cmp (operands); | |
2931 | gcc_assert (ok); | |
2932 | DONE; | |
2933 | }) | |
2934 | ||
2935 | (define_expand "vec_cmpu<mode><avx512fmaskmodelower>" | |
2936 | [(set (match_operand:<avx512fmaskmode> 0 "register_operand") | |
2937 | (match_operator:<avx512fmaskmode> 1 "" | |
2938 | [(match_operand:VI48_AVX512VL 2 "register_operand") | |
2939 | (match_operand:VI48_AVX512VL 3 "nonimmediate_operand")]))] | |
2940 | "TARGET_AVX512F" | |
2941 | { | |
2942 | bool ok = ix86_expand_mask_vec_cmp (operands); | |
2943 | gcc_assert (ok); | |
2944 | DONE; | |
2945 | }) | |
2946 | ||
2947 | (define_expand "vec_cmpu<mode><avx512fmaskmodelower>" | |
2948 | [(set (match_operand:<avx512fmaskmode> 0 "register_operand") | |
2949 | (match_operator:<avx512fmaskmode> 1 "" | |
2950 | [(match_operand:VI12_AVX512VL 2 "register_operand") | |
2951 | (match_operand:VI12_AVX512VL 3 "nonimmediate_operand")]))] | |
2952 | "TARGET_AVX512BW" | |
2953 | { | |
2954 | bool ok = ix86_expand_mask_vec_cmp (operands); | |
2955 | gcc_assert (ok); | |
2956 | DONE; | |
2957 | }) | |
2958 | ||
2959 | (define_expand "vec_cmpu<mode><sseintvecmodelower>" | |
2960 | [(set (match_operand:<sseintvecmode> 0 "register_operand") | |
2961 | (match_operator:<sseintvecmode> 1 "" | |
2962 | [(match_operand:VI_256 2 "register_operand") | |
2963 | (match_operand:VI_256 3 "nonimmediate_operand")]))] | |
2964 | "TARGET_AVX2" | |
2965 | { | |
2966 | bool ok = ix86_expand_int_vec_cmp (operands); | |
2967 | gcc_assert (ok); | |
2968 | DONE; | |
2969 | }) | |
2970 | ||
2971 | (define_expand "vec_cmpu<mode><sseintvecmodelower>" | |
2972 | [(set (match_operand:<sseintvecmode> 0 "register_operand") | |
2973 | (match_operator:<sseintvecmode> 1 "" | |
2974 | [(match_operand:VI124_128 2 "register_operand") | |
2975 | (match_operand:VI124_128 3 "nonimmediate_operand")]))] | |
2976 | "TARGET_SSE2" | |
2977 | { | |
2978 | bool ok = ix86_expand_int_vec_cmp (operands); | |
2979 | gcc_assert (ok); | |
2980 | DONE; | |
2981 | }) | |
2982 | ||
2983 | (define_expand "vec_cmpuv2div2di" | |
2984 | [(set (match_operand:V2DI 0 "register_operand") | |
2985 | (match_operator:V2DI 1 "" | |
2986 | [(match_operand:V2DI 2 "register_operand") | |
2987 | (match_operand:V2DI 3 "nonimmediate_operand")]))] | |
2988 | "TARGET_SSE4_2" | |
2989 | { | |
2990 | bool ok = ix86_expand_int_vec_cmp (operands); | |
2991 | gcc_assert (ok); | |
2992 | DONE; | |
2993 | }) | |
2994 | ||
f23a3158 | 2995 | (define_expand "vcond<V_512:mode><VF_512:mode>" |
2996 | [(set (match_operand:V_512 0 "register_operand") | |
2997 | (if_then_else:V_512 | |
2998 | (match_operator 3 "" | |
2999 | [(match_operand:VF_512 4 "nonimmediate_operand") | |
3000 | (match_operand:VF_512 5 "nonimmediate_operand")]) | |
3001 | (match_operand:V_512 1 "general_operand") | |
3002 | (match_operand:V_512 2 "general_operand")))] | |
3003 | "TARGET_AVX512F | |
3004 | && (GET_MODE_NUNITS (<V_512:MODE>mode) | |
3005 | == GET_MODE_NUNITS (<VF_512:MODE>mode))" | |
3006 | { | |
3007 | bool ok = ix86_expand_fp_vcond (operands); | |
3008 | gcc_assert (ok); | |
3009 | DONE; | |
3010 | }) | |
3011 | ||
d6b19f6b | 3012 | (define_expand "vcond<V_256:mode><VF_256:mode>" |
abd4f58b | 3013 | [(set (match_operand:V_256 0 "register_operand") |
d6b19f6b | 3014 | (if_then_else:V_256 |
5deb404d | 3015 | (match_operator 3 "" |
abd4f58b | 3016 | [(match_operand:VF_256 4 "nonimmediate_operand") |
3017 | (match_operand:VF_256 5 "nonimmediate_operand")]) | |
3018 | (match_operand:V_256 1 "general_operand") | |
3019 | (match_operand:V_256 2 "general_operand")))] | |
d6b19f6b | 3020 | "TARGET_AVX |
3021 | && (GET_MODE_NUNITS (<V_256:MODE>mode) | |
3022 | == GET_MODE_NUNITS (<VF_256:MODE>mode))" | |
3023 | { | |
3024 | bool ok = ix86_expand_fp_vcond (operands); | |
3025 | gcc_assert (ok); | |
3026 | DONE; | |
3027 | }) | |
3028 | ||
3029 | (define_expand "vcond<V_128:mode><VF_128:mode>" | |
abd4f58b | 3030 | [(set (match_operand:V_128 0 "register_operand") |
d6b19f6b | 3031 | (if_then_else:V_128 |
3032 | (match_operator 3 "" | |
abd4f58b | 3033 | [(match_operand:VF_128 4 "nonimmediate_operand") |
3034 | (match_operand:VF_128 5 "nonimmediate_operand")]) | |
3035 | (match_operand:V_128 1 "general_operand") | |
3036 | (match_operand:V_128 2 "general_operand")))] | |
d6b19f6b | 3037 | "TARGET_SSE |
3038 | && (GET_MODE_NUNITS (<V_128:MODE>mode) | |
3039 | == GET_MODE_NUNITS (<VF_128:MODE>mode))" | |
76405cce | 3040 | { |
17e313b0 | 3041 | bool ok = ix86_expand_fp_vcond (operands); |
3042 | gcc_assert (ok); | |
3043 | DONE; | |
76405cce | 3044 | }) |
3045 | ||
98da9bbe | 3046 | (define_expand "vcond_mask_<mode><avx512fmaskmodelower>" |
3047 | [(set (match_operand:V48_AVX512VL 0 "register_operand") | |
3048 | (vec_merge:V48_AVX512VL | |
3049 | (match_operand:V48_AVX512VL 1 "nonimmediate_operand") | |
3050 | (match_operand:V48_AVX512VL 2 "vector_move_operand") | |
3051 | (match_operand:<avx512fmaskmode> 3 "register_operand")))] | |
3052 | "TARGET_AVX512F") | |
3053 | ||
3054 | (define_expand "vcond_mask_<mode><avx512fmaskmodelower>" | |
3055 | [(set (match_operand:VI12_AVX512VL 0 "register_operand") | |
3056 | (vec_merge:VI12_AVX512VL | |
3057 | (match_operand:VI12_AVX512VL 1 "nonimmediate_operand") | |
3058 | (match_operand:VI12_AVX512VL 2 "vector_move_operand") | |
3059 | (match_operand:<avx512fmaskmode> 3 "register_operand")))] | |
3060 | "TARGET_AVX512BW") | |
3061 | ||
3062 | (define_expand "vcond_mask_<mode><sseintvecmodelower>" | |
3063 | [(set (match_operand:VI_256 0 "register_operand") | |
3064 | (vec_merge:VI_256 | |
3065 | (match_operand:VI_256 1 "nonimmediate_operand") | |
3066 | (match_operand:VI_256 2 "vector_move_operand") | |
3067 | (match_operand:<sseintvecmode> 3 "register_operand")))] | |
3068 | "TARGET_AVX2" | |
3069 | { | |
3070 | ix86_expand_sse_movcc (operands[0], operands[3], | |
3071 | operands[1], operands[2]); | |
3072 | DONE; | |
3073 | }) | |
3074 | ||
3075 | (define_expand "vcond_mask_<mode><sseintvecmodelower>" | |
3076 | [(set (match_operand:VI124_128 0 "register_operand") | |
3077 | (vec_merge:VI124_128 | |
3078 | (match_operand:VI124_128 1 "nonimmediate_operand") | |
3079 | (match_operand:VI124_128 2 "vector_move_operand") | |
3080 | (match_operand:<sseintvecmode> 3 "register_operand")))] | |
3081 | "TARGET_SSE2" | |
3082 | { | |
3083 | ix86_expand_sse_movcc (operands[0], operands[3], | |
3084 | operands[1], operands[2]); | |
3085 | DONE; | |
3086 | }) | |
3087 | ||
3088 | (define_expand "vcond_mask_v2div2di" | |
3089 | [(set (match_operand:V2DI 0 "register_operand") | |
3090 | (vec_merge:V2DI | |
3091 | (match_operand:V2DI 1 "nonimmediate_operand") | |
3092 | (match_operand:V2DI 2 "vector_move_operand") | |
3093 | (match_operand:V2DI 3 "register_operand")))] | |
3094 | "TARGET_SSE4_2" | |
3095 | { | |
3096 | ix86_expand_sse_movcc (operands[0], operands[3], | |
3097 | operands[1], operands[2]); | |
3098 | DONE; | |
3099 | }) | |
3100 | ||
3101 | (define_expand "vcond_mask_<mode><sseintvecmodelower>" | |
3102 | [(set (match_operand:VF_256 0 "register_operand") | |
3103 | (vec_merge:VF_256 | |
3104 | (match_operand:VF_256 1 "nonimmediate_operand") | |
3105 | (match_operand:VF_256 2 "vector_move_operand") | |
3106 | (match_operand:<sseintvecmode> 3 "register_operand")))] | |
3107 | "TARGET_AVX" | |
3108 | { | |
3109 | ix86_expand_sse_movcc (operands[0], operands[3], | |
3110 | operands[1], operands[2]); | |
3111 | DONE; | |
3112 | }) | |
3113 | ||
3114 | (define_expand "vcond_mask_<mode><sseintvecmodelower>" | |
3115 | [(set (match_operand:VF_128 0 "register_operand") | |
3116 | (vec_merge:VF_128 | |
3117 | (match_operand:VF_128 1 "nonimmediate_operand") | |
3118 | (match_operand:VF_128 2 "vector_move_operand") | |
3119 | (match_operand:<sseintvecmode> 3 "register_operand")))] | |
3120 | "TARGET_SSE" | |
3121 | { | |
3122 | ix86_expand_sse_movcc (operands[0], operands[3], | |
3123 | operands[1], operands[2]); | |
3124 | DONE; | |
3125 | }) | |
3126 | ||
5802c0cb | 3127 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; |
3128 | ;; | |
2a466fea | 3129 | ;; Parallel floating point logical operations |
5802c0cb | 3130 | ;; |
3131 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | |
3132 | ||
0607f34b | 3133 | (define_insn "<sse>_andnot<mode>3<mask_name>" |
3134 | [(set (match_operand:VF_128_256 0 "register_operand" "=x,v") | |
3135 | (and:VF_128_256 | |
3136 | (not:VF_128_256 | |
3137 | (match_operand:VF_128_256 1 "register_operand" "0,v")) | |
3138 | (match_operand:VF_128_256 2 "nonimmediate_operand" "xm,vm")))] | |
3139 | "TARGET_SSE && <mask_avx512vl_condition>" | |
887423c0 | 3140 | { |
0607f34b | 3141 | static char buf[128]; |
596112aa | 3142 | const char *ops; |
3143 | const char *suffix; | |
3144 | ||
3145 | switch (get_attr_mode (insn)) | |
3146 | { | |
3147 | case MODE_V8SF: | |
3148 | case MODE_V4SF: | |
3149 | suffix = "ps"; | |
3150 | break; | |
3151 | default: | |
3152 | suffix = "<ssemodesuffix>"; | |
3153 | } | |
5802c0cb | 3154 | |
887423c0 | 3155 | switch (which_alternative) |
3156 | { | |
3157 | case 0: | |
596112aa | 3158 | ops = "andn%s\t{%%2, %%0|%%0, %%2}"; |
887423c0 | 3159 | break; |
3160 | case 1: | |
0607f34b | 3161 | ops = "vandn%s\t{%%2, %%1, %%0<mask_operand3_1>|%%0<mask_operand3_1>, %%1, %%2}"; |
887423c0 | 3162 | break; |
3163 | default: | |
3164 | gcc_unreachable (); | |
3165 | } | |
ed30e0a6 | 3166 | |
0607f34b | 3167 | /* There is no vandnp[sd] in avx512f. Use vpandn[qd]. */ |
3168 | if (<mask_applied> && !TARGET_AVX512DQ) | |
6a3f5f59 | 3169 | { |
0607f34b | 3170 | suffix = GET_MODE_INNER (<MODE>mode) == DFmode ? "q" : "d"; |
3171 | ops = "vpandn%s\t{%%2, %%1, %%0<mask_operand3_1>|%%0<mask_operand3_1>, %%1, %%2}"; | |
6a3f5f59 | 3172 | } |
3173 | ||
596112aa | 3174 | snprintf (buf, sizeof (buf), ops, suffix); |
887423c0 | 3175 | return buf; |
6fc76bb0 | 3176 | } |
887423c0 | 3177 | [(set_attr "isa" "noavx,avx") |
3178 | (set_attr "type" "sselog") | |
6a3f5f59 | 3179 | (set_attr "prefix" "orig,maybe_evex") |
596112aa | 3180 | (set (attr "mode") |
7d460314 | 3181 | (cond [(and (match_test "<MODE_SIZE> == 16") |
3182 | (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")) | |
596112aa | 3183 | (const_string "<ssePSmode>") |
3184 | (match_test "TARGET_AVX") | |
3185 | (const_string "<MODE>") | |
3186 | (match_test "optimize_function_for_size_p (cfun)") | |
3187 | (const_string "V4SF") | |
3188 | ] | |
3189 | (const_string "<MODE>")))]) | |
ed30e0a6 | 3190 | |
0607f34b | 3191 | |
3192 | (define_insn "<sse>_andnot<mode>3<mask_name>" | |
3193 | [(set (match_operand:VF_512 0 "register_operand" "=v") | |
3194 | (and:VF_512 | |
3195 | (not:VF_512 | |
3196 | (match_operand:VF_512 1 "register_operand" "v")) | |
3197 | (match_operand:VF_512 2 "nonimmediate_operand" "vm")))] | |
3198 | "TARGET_AVX512F" | |
3199 | { | |
3200 | static char buf[128]; | |
3201 | const char *ops; | |
3202 | const char *suffix; | |
3203 | ||
3204 | suffix = "<ssemodesuffix>"; | |
3205 | ops = ""; | |
3206 | ||
3207 | /* There is no vandnp[sd] in avx512f. Use vpandn[qd]. */ | |
3208 | if (!TARGET_AVX512DQ) | |
3209 | { | |
3210 | suffix = GET_MODE_INNER (<MODE>mode) == DFmode ? "q" : "d"; | |
3211 | ops = "p"; | |
3212 | } | |
3213 | ||
3214 | snprintf (buf, sizeof (buf), | |
3215 | "v%sandn%s\t{%%2, %%1, %%0<mask_operand3_1>|%%0<mask_operand3_1>, %%1, %%2}", | |
3216 | ops, suffix); | |
3217 | return buf; | |
3218 | } | |
3219 | [(set_attr "type" "sselog") | |
3220 | (set_attr "prefix" "evex") | |
3221 | (set_attr "mode" "<sseinsnmode>")]) | |
3222 | ||
3223 | (define_expand "<code><mode>3<mask_name>" | |
6a3f5f59 | 3224 | [(set (match_operand:VF_128_256 0 "register_operand") |
0607f34b | 3225 | (any_logic:VF_128_256 |
3226 | (match_operand:VF_128_256 1 "nonimmediate_operand") | |
3227 | (match_operand:VF_128_256 2 "nonimmediate_operand")))] | |
3228 | "TARGET_SSE && <mask_avx512vl_condition>" | |
b6bc2701 | 3229 | "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);") |
5802c0cb | 3230 | |
0607f34b | 3231 | (define_expand "<code><mode>3<mask_name>" |
6a3f5f59 | 3232 | [(set (match_operand:VF_512 0 "register_operand") |
0607f34b | 3233 | (any_logic:VF_512 |
6a3f5f59 | 3234 | (match_operand:VF_512 1 "nonimmediate_operand") |
3235 | (match_operand:VF_512 2 "nonimmediate_operand")))] | |
3236 | "TARGET_AVX512F" | |
3237 | "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);") | |
3238 | ||
0607f34b | 3239 | (define_insn "*<code><mode>3<mask_name>" |
3240 | [(set (match_operand:VF_128_256 0 "register_operand" "=x,v") | |
3241 | (any_logic:VF_128_256 | |
3242 | (match_operand:VF_128_256 1 "nonimmediate_operand" "%0,v") | |
3243 | (match_operand:VF_128_256 2 "nonimmediate_operand" "xm,vm")))] | |
3244 | "TARGET_SSE && <mask_avx512vl_condition> | |
3245 | && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)" | |
6fc76bb0 | 3246 | { |
0607f34b | 3247 | static char buf[128]; |
596112aa | 3248 | const char *ops; |
3249 | const char *suffix; | |
3250 | ||
3251 | switch (get_attr_mode (insn)) | |
3252 | { | |
3253 | case MODE_V8SF: | |
3254 | case MODE_V4SF: | |
3255 | suffix = "ps"; | |
3256 | break; | |
3257 | default: | |
3258 | suffix = "<ssemodesuffix>"; | |
3259 | } | |
887423c0 | 3260 | |
3261 | switch (which_alternative) | |
3262 | { | |
3263 | case 0: | |
596112aa | 3264 | ops = "<logic>%s\t{%%2, %%0|%%0, %%2}"; |
887423c0 | 3265 | break; |
3266 | case 1: | |
0607f34b | 3267 | ops = "v<logic>%s\t{%%2, %%1, %%0<mask_operand3_1>|%%0<mask_operand3_1>, %%1, %%2}"; |
887423c0 | 3268 | break; |
3269 | default: | |
3270 | gcc_unreachable (); | |
3271 | } | |
3272 | ||
0607f34b | 3273 | /* There is no v<logic>p[sd] in avx512f. Use vp<logic>[dq]. */ |
3274 | if (<mask_applied> && !TARGET_AVX512DQ) | |
6a3f5f59 | 3275 | { |
0607f34b | 3276 | suffix = GET_MODE_INNER (<MODE>mode) == DFmode ? "q" : "d"; |
3277 | ops = "vp<logic>%s\t{%%2, %%1, %%0<mask_operand3_1>|%%0<mask_operand3_1>, %%1, %%2}"; | |
6a3f5f59 | 3278 | } |
3279 | ||
596112aa | 3280 | snprintf (buf, sizeof (buf), ops, suffix); |
887423c0 | 3281 | return buf; |
6fc76bb0 | 3282 | } |
887423c0 | 3283 | [(set_attr "isa" "noavx,avx") |
3284 | (set_attr "type" "sselog") | |
6a3f5f59 | 3285 | (set_attr "prefix" "orig,maybe_evex") |
596112aa | 3286 | (set (attr "mode") |
7d460314 | 3287 | (cond [(and (match_test "<MODE_SIZE> == 16") |
3288 | (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")) | |
596112aa | 3289 | (const_string "<ssePSmode>") |
3290 | (match_test "TARGET_AVX") | |
3291 | (const_string "<MODE>") | |
3292 | (match_test "optimize_function_for_size_p (cfun)") | |
3293 | (const_string "V4SF") | |
3294 | ] | |
3295 | (const_string "<MODE>")))]) | |
5802c0cb | 3296 | |
0607f34b | 3297 | (define_insn "*<code><mode>3<mask_name>" |
3298 | [(set (match_operand:VF_512 0 "register_operand" "=v") | |
3299 | (any_logic:VF_512 | |
3300 | (match_operand:VF_512 1 "nonimmediate_operand" "%v") | |
3301 | (match_operand:VF_512 2 "nonimmediate_operand" "vm")))] | |
3302 | "TARGET_AVX512F && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)" | |
3303 | { | |
3304 | static char buf[128]; | |
3305 | const char *ops; | |
3306 | const char *suffix; | |
3307 | ||
3308 | suffix = "<ssemodesuffix>"; | |
3309 | ops = ""; | |
3310 | ||
3311 | /* There is no v<logic>p[sd] in avx512f. Use vp<logic>[dq]. */ | |
3312 | if ((<MODE_SIZE> == 64 || <mask_applied>) && !TARGET_AVX512DQ) | |
3313 | { | |
3314 | suffix = GET_MODE_INNER (<MODE>mode) == DFmode ? "q" : "d"; | |
3315 | ops = "p"; | |
3316 | } | |
3317 | ||
3318 | snprintf (buf, sizeof (buf), | |
3319 | "v%s<logic>%s\t{%%2, %%1, %%0<mask_operand3_1>|%%0<mask_operand3_1>, %%1, %%2}", | |
3320 | ops, suffix); | |
3321 | return buf; | |
3322 | } | |
3323 | [(set_attr "type" "sselog") | |
3324 | (set_attr "prefix" "evex") | |
3325 | (set_attr "mode" "<sseinsnmode>")]) | |
3326 | ||
3d86078b | 3327 | (define_expand "copysign<mode>3" |
ddb24cdb | 3328 | [(set (match_dup 4) |
887423c0 | 3329 | (and:VF |
3330 | (not:VF (match_dup 3)) | |
abd4f58b | 3331 | (match_operand:VF 1 "nonimmediate_operand"))) |
ddb24cdb | 3332 | (set (match_dup 5) |
887423c0 | 3333 | (and:VF (match_dup 3) |
abd4f58b | 3334 | (match_operand:VF 2 "nonimmediate_operand"))) |
3335 | (set (match_operand:VF 0 "register_operand") | |
887423c0 | 3336 | (ior:VF (match_dup 4) (match_dup 5)))] |
6fe5844b | 3337 | "TARGET_SSE" |
3d86078b | 3338 | { |
8cedf886 | 3339 | operands[3] = ix86_build_signbit_mask (<MODE>mode, 1, 0); |
07ddf75c | 3340 | |
ddb24cdb | 3341 | operands[4] = gen_reg_rtx (<MODE>mode); |
07ddf75c | 3342 | operands[5] = gen_reg_rtx (<MODE>mode); |
3d86078b | 3343 | }) |
3344 | ||
8d1e0693 | 3345 | ;; Also define scalar versions. These are used for abs, neg, and |
cea27bec | 3346 | ;; conditional move. Using subregs into vector modes causes register |
8d1e0693 | 3347 | ;; allocation lossage. These patterns do not allow memory operands |
3348 | ;; because the native instructions read the full 128-bits. | |
3349 | ||
841985a7 | 3350 | (define_insn "*andnot<mode>3" |
887423c0 | 3351 | [(set (match_operand:MODEF 0 "register_operand" "=x,x") |
2a466fea | 3352 | (and:MODEF |
3353 | (not:MODEF | |
887423c0 | 3354 | (match_operand:MODEF 1 "register_operand" "0,x")) |
3355 | (match_operand:MODEF 2 "register_operand" "x,x")))] | |
2a466fea | 3356 | "SSE_FLOAT_MODE_P (<MODE>mode)" |
6fc76bb0 | 3357 | { |
887423c0 | 3358 | static char buf[32]; |
596112aa | 3359 | const char *ops; |
887423c0 | 3360 | const char *suffix |
596112aa | 3361 | = (get_attr_mode (insn) == MODE_V4SF) ? "ps" : "<ssevecmodesuffix>"; |
3362 | ||
3363 | switch (which_alternative) | |
3364 | { | |
3365 | case 0: | |
3366 | ops = "andn%s\t{%%2, %%0|%%0, %%2}"; | |
3367 | break; | |
3368 | case 1: | |
3369 | ops = "vandn%s\t{%%2, %%1, %%0|%%0, %%1, %%2}"; | |
3370 | break; | |
3371 | default: | |
3372 | gcc_unreachable (); | |
3373 | } | |
3374 | ||
3375 | snprintf (buf, sizeof (buf), ops, suffix); | |
3376 | return buf; | |
3377 | } | |
3378 | [(set_attr "isa" "noavx,avx") | |
3379 | (set_attr "type" "sselog") | |
3380 | (set_attr "prefix" "orig,vex") | |
3381 | (set (attr "mode") | |
7d460314 | 3382 | (cond [(and (match_test "<MODE_SIZE> == 16") |
3383 | (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")) | |
596112aa | 3384 | (const_string "V4SF") |
3385 | (match_test "TARGET_AVX") | |
3386 | (const_string "<ssevecmode>") | |
3387 | (match_test "optimize_function_for_size_p (cfun)") | |
3388 | (const_string "V4SF") | |
3389 | ] | |
3390 | (const_string "<ssevecmode>")))]) | |
3391 | ||
3392 | (define_insn "*andnottf3" | |
3393 | [(set (match_operand:TF 0 "register_operand" "=x,x") | |
3394 | (and:TF | |
3395 | (not:TF (match_operand:TF 1 "register_operand" "0,x")) | |
3396 | (match_operand:TF 2 "nonimmediate_operand" "xm,xm")))] | |
3397 | "TARGET_SSE" | |
3398 | { | |
3399 | static char buf[32]; | |
3400 | const char *ops; | |
3401 | const char *tmp | |
3402 | = (get_attr_mode (insn) == MODE_V4SF) ? "andnps" : "pandn"; | |
887423c0 | 3403 | |
3404 | switch (which_alternative) | |
3405 | { | |
3406 | case 0: | |
596112aa | 3407 | ops = "%s\t{%%2, %%0|%%0, %%2}"; |
887423c0 | 3408 | break; |
3409 | case 1: | |
596112aa | 3410 | ops = "v%s\t{%%2, %%1, %%0|%%0, %%1, %%2}"; |
887423c0 | 3411 | break; |
3412 | default: | |
3413 | gcc_unreachable (); | |
3414 | } | |
3415 | ||
596112aa | 3416 | snprintf (buf, sizeof (buf), ops, tmp); |
887423c0 | 3417 | return buf; |
6fc76bb0 | 3418 | } |
887423c0 | 3419 | [(set_attr "isa" "noavx,avx") |
3420 | (set_attr "type" "sselog") | |
596112aa | 3421 | (set (attr "prefix_data16") |
3422 | (if_then_else | |
3423 | (and (eq_attr "alternative" "0") | |
3424 | (eq_attr "mode" "TI")) | |
3425 | (const_string "1") | |
3426 | (const_string "*"))) | |
887423c0 | 3427 | (set_attr "prefix" "orig,vex") |
596112aa | 3428 | (set (attr "mode") |
3429 | (cond [(match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL") | |
3430 | (const_string "V4SF") | |
3431 | (match_test "TARGET_AVX") | |
3432 | (const_string "TI") | |
3433 | (ior (not (match_test "TARGET_SSE2")) | |
3434 | (match_test "optimize_function_for_size_p (cfun)")) | |
3435 | (const_string "V4SF") | |
3436 | ] | |
3437 | (const_string "TI")))]) | |
ed30e0a6 | 3438 | |
b6bc2701 | 3439 | (define_insn "*<code><mode>3" |
887423c0 | 3440 | [(set (match_operand:MODEF 0 "register_operand" "=x,x") |
5acb11ef | 3441 | (any_logic:MODEF |
887423c0 | 3442 | (match_operand:MODEF 1 "register_operand" "%0,x") |
3443 | (match_operand:MODEF 2 "register_operand" "x,x")))] | |
2a466fea | 3444 | "SSE_FLOAT_MODE_P (<MODE>mode)" |
6fc76bb0 | 3445 | { |
887423c0 | 3446 | static char buf[32]; |
596112aa | 3447 | const char *ops; |
887423c0 | 3448 | const char *suffix |
596112aa | 3449 | = (get_attr_mode (insn) == MODE_V4SF) ? "ps" : "<ssevecmodesuffix>"; |
3450 | ||
3451 | switch (which_alternative) | |
3452 | { | |
3453 | case 0: | |
3454 | ops = "<logic>%s\t{%%2, %%0|%%0, %%2}"; | |
3455 | break; | |
3456 | case 1: | |
3457 | ops = "v<logic>%s\t{%%2, %%1, %%0|%%0, %%1, %%2}"; | |
3458 | break; | |
3459 | default: | |
3460 | gcc_unreachable (); | |
3461 | } | |
3462 | ||
3463 | snprintf (buf, sizeof (buf), ops, suffix); | |
3464 | return buf; | |
3465 | } | |
3466 | [(set_attr "isa" "noavx,avx") | |
3467 | (set_attr "type" "sselog") | |
3468 | (set_attr "prefix" "orig,vex") | |
3469 | (set (attr "mode") | |
7d460314 | 3470 | (cond [(and (match_test "<MODE_SIZE> == 16") |
3471 | (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")) | |
596112aa | 3472 | (const_string "V4SF") |
3473 | (match_test "TARGET_AVX") | |
3474 | (const_string "<ssevecmode>") | |
3475 | (match_test "optimize_function_for_size_p (cfun)") | |
3476 | (const_string "V4SF") | |
3477 | ] | |
3478 | (const_string "<ssevecmode>")))]) | |
3479 | ||
3480 | (define_expand "<code>tf3" | |
3481 | [(set (match_operand:TF 0 "register_operand") | |
3482 | (any_logic:TF | |
3483 | (match_operand:TF 1 "nonimmediate_operand") | |
3484 | (match_operand:TF 2 "nonimmediate_operand")))] | |
3485 | "TARGET_SSE" | |
3486 | "ix86_fixup_binary_operands_no_copy (<CODE>, TFmode, operands);") | |
3487 | ||
3488 | (define_insn "*<code>tf3" | |
3489 | [(set (match_operand:TF 0 "register_operand" "=x,x") | |
3490 | (any_logic:TF | |
3491 | (match_operand:TF 1 "nonimmediate_operand" "%0,x") | |
3492 | (match_operand:TF 2 "nonimmediate_operand" "xm,xm")))] | |
3493 | "TARGET_SSE | |
3494 | && ix86_binary_operator_ok (<CODE>, TFmode, operands)" | |
3495 | { | |
3496 | static char buf[32]; | |
3497 | const char *ops; | |
3498 | const char *tmp | |
3499 | = (get_attr_mode (insn) == MODE_V4SF) ? "<logic>ps" : "p<logic>"; | |
887423c0 | 3500 | |
3501 | switch (which_alternative) | |
3502 | { | |
3503 | case 0: | |
596112aa | 3504 | ops = "%s\t{%%2, %%0|%%0, %%2}"; |
887423c0 | 3505 | break; |
3506 | case 1: | |
596112aa | 3507 | ops = "v%s\t{%%2, %%1, %%0|%%0, %%1, %%2}"; |
887423c0 | 3508 | break; |
3509 | default: | |
3510 | gcc_unreachable (); | |
3511 | } | |
3512 | ||
596112aa | 3513 | snprintf (buf, sizeof (buf), ops, tmp); |
887423c0 | 3514 | return buf; |
6fc76bb0 | 3515 | } |
887423c0 | 3516 | [(set_attr "isa" "noavx,avx") |
3517 | (set_attr "type" "sselog") | |
596112aa | 3518 | (set (attr "prefix_data16") |
3519 | (if_then_else | |
3520 | (and (eq_attr "alternative" "0") | |
3521 | (eq_attr "mode" "TI")) | |
3522 | (const_string "1") | |
3523 | (const_string "*"))) | |
887423c0 | 3524 | (set_attr "prefix" "orig,vex") |
596112aa | 3525 | (set (attr "mode") |
3526 | (cond [(match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL") | |
3527 | (const_string "V4SF") | |
3528 | (match_test "TARGET_AVX") | |
3529 | (const_string "TI") | |
3530 | (ior (not (match_test "TARGET_SSE2")) | |
3531 | (match_test "optimize_function_for_size_p (cfun)")) | |
3532 | (const_string "V4SF") | |
3533 | ] | |
3534 | (const_string "TI")))]) | |
8d1e0693 | 3535 | |
2f212aae | 3536 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; |
3537 | ;; | |
35811e21 | 3538 | ;; FMA floating point multiply/accumulate instructions. These include |
3539 | ;; scalar versions of the instructions as well as vector versions. | |
2f212aae | 3540 | ;; |
3541 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | |
3542 | ||
21a6219e | 3543 | ;; The standard names for scalar FMA are only available with SSE math enabled. |
c298e021 | 3544 | ;; CPUID bit AVX512F enables evex encoded scalar and 512-bit fma. It doesn't |
3545 | ;; care about FMA bit, so we enable fma for TARGET_AVX512F even when TARGET_FMA | |
3546 | ;; and TARGET_FMA4 are both false. | |
3547 | ;; TODO: In theory AVX512F does not automatically imply FMA, and without FMA | |
3548 | ;; one must force the EVEX encoding of the fma insns. Ideally we'd improve | |
3549 | ;; GAS to allow proper prefix selection. However, for the moment all hardware | |
3550 | ;; that supports AVX512F also supports FMA so we can ignore this for now. | |
3551 | (define_mode_iterator FMAMODEM | |
3552 | [(SF "TARGET_SSE_MATH && (TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F)") | |
3553 | (DF "TARGET_SSE_MATH && (TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F)") | |
4f3da779 | 3554 | (V4SF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL") |
3555 | (V2DF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL") | |
3556 | (V8SF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL") | |
3557 | (V4DF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL") | |
c298e021 | 3558 | (V16SF "TARGET_AVX512F") |
3559 | (V8DF "TARGET_AVX512F")]) | |
35811e21 | 3560 | |
a2f9d5b3 | 3561 | (define_expand "fma<mode>4" |
21a6219e | 3562 | [(set (match_operand:FMAMODEM 0 "register_operand") |
3563 | (fma:FMAMODEM | |
3564 | (match_operand:FMAMODEM 1 "nonimmediate_operand") | |
3565 | (match_operand:FMAMODEM 2 "nonimmediate_operand") | |
8211f5a2 | 3566 | (match_operand:FMAMODEM 3 "nonimmediate_operand")))]) |
5e2b6fd0 | 3567 | |
b9be572e | 3568 | (define_expand "fms<mode>4" |
21a6219e | 3569 | [(set (match_operand:FMAMODEM 0 "register_operand") |
3570 | (fma:FMAMODEM | |
3571 | (match_operand:FMAMODEM 1 "nonimmediate_operand") | |
3572 | (match_operand:FMAMODEM 2 "nonimmediate_operand") | |
8211f5a2 | 3573 | (neg:FMAMODEM (match_operand:FMAMODEM 3 "nonimmediate_operand"))))]) |
b9be572e | 3574 | |
3575 | (define_expand "fnma<mode>4" | |
21a6219e | 3576 | [(set (match_operand:FMAMODEM 0 "register_operand") |
3577 | (fma:FMAMODEM | |
3578 | (neg:FMAMODEM (match_operand:FMAMODEM 1 "nonimmediate_operand")) | |
3579 | (match_operand:FMAMODEM 2 "nonimmediate_operand") | |
8211f5a2 | 3580 | (match_operand:FMAMODEM 3 "nonimmediate_operand")))]) |
b9be572e | 3581 | |
3582 | (define_expand "fnms<mode>4" | |
21a6219e | 3583 | [(set (match_operand:FMAMODEM 0 "register_operand") |
3584 | (fma:FMAMODEM | |
3585 | (neg:FMAMODEM (match_operand:FMAMODEM 1 "nonimmediate_operand")) | |
3586 | (match_operand:FMAMODEM 2 "nonimmediate_operand") | |
8211f5a2 | 3587 | (neg:FMAMODEM (match_operand:FMAMODEM 3 "nonimmediate_operand"))))]) |
21a6219e | 3588 | |
3589 | ;; The builtins for intrinsics are not constrained by SSE math enabled. | |
4f3da779 | 3590 | (define_mode_iterator FMAMODE_AVX512 |
3591 | [(SF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F") | |
3592 | (DF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F") | |
3593 | (V4SF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL") | |
3594 | (V2DF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL") | |
3595 | (V8SF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL") | |
3596 | (V4DF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL") | |
3597 | (V16SF "TARGET_AVX512F") | |
3598 | (V8DF "TARGET_AVX512F")]) | |
3599 | ||
8211f5a2 | 3600 | (define_mode_iterator FMAMODE |
4f3da779 | 3601 | [SF DF V4SF V2DF V8SF V4DF]) |
b9be572e | 3602 | |
5e2b6fd0 | 3603 | (define_expand "fma4i_fmadd_<mode>" |
4f3da779 | 3604 | [(set (match_operand:FMAMODE_AVX512 0 "register_operand") |
3605 | (fma:FMAMODE_AVX512 | |
3606 | (match_operand:FMAMODE_AVX512 1 "nonimmediate_operand") | |
3607 | (match_operand:FMAMODE_AVX512 2 "nonimmediate_operand") | |
3608 | (match_operand:FMAMODE_AVX512 3 "nonimmediate_operand")))]) | |
3609 | ||
3610 | (define_expand "<avx512>_fmadd_<mode>_maskz<round_expand_name>" | |
3611 | [(match_operand:VF_AVX512VL 0 "register_operand") | |
3612 | (match_operand:VF_AVX512VL 1 "<round_expand_nimm_predicate>") | |
3613 | (match_operand:VF_AVX512VL 2 "<round_expand_nimm_predicate>") | |
3614 | (match_operand:VF_AVX512VL 3 "<round_expand_nimm_predicate>") | |
be60ab96 | 3615 | (match_operand:<avx512fmaskmode> 4 "register_operand")] |
4f3da779 | 3616 | "TARGET_AVX512F && <round_mode512bit_condition>" |
be60ab96 | 3617 | { |
adf45678 | 3618 | emit_insn (gen_fma_fmadd_<mode>_maskz_1<round_expand_name> ( |
be60ab96 | 3619 | operands[0], operands[1], operands[2], operands[3], |
adf45678 | 3620 | CONST0_RTX (<MODE>mode), operands[4]<round_expand_operand>)); |
be60ab96 | 3621 | DONE; |
3622 | }) | |
3623 | ||
4f3da779 | 3624 | (define_insn "*fma_fmadd_<mode>" |
3625 | [(set (match_operand:FMAMODE 0 "register_operand" "=v,v,v,x,x") | |
3626 | (fma:FMAMODE | |
3627 | (match_operand:FMAMODE 1 "nonimmediate_operand" "%0,0,v,x,x") | |
3628 | (match_operand:FMAMODE 2 "nonimmediate_operand" "vm,v,vm,x,m") | |
3629 | (match_operand:FMAMODE 3 "nonimmediate_operand" "v,vm,0,xm,x")))] | |
3630 | "TARGET_FMA || TARGET_FMA4" | |
be60ab96 | 3631 | "@ |
4f3da779 | 3632 | vfmadd132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2} |
3633 | vfmadd213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3} | |
3634 | vfmadd231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2} | |
2743953b | 3635 | vfmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3} |
3636 | vfmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}" | |
4f3da779 | 3637 | [(set_attr "isa" "fma,fma,fma,fma4,fma4") |
2743953b | 3638 | (set_attr "type" "ssemuladd") |
35811e21 | 3639 | (set_attr "mode" "<MODE>")]) |
3640 | ||
4f3da779 | 3641 | ;; Suppose AVX-512F as baseline |
3642 | (define_mode_iterator VF_SF_AVX512VL | |
3643 | [SF V16SF (V8SF "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL") | |
3644 | DF V8DF (V4DF "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")]) | |
3645 | ||
8211f5a2 | 3646 | (define_insn "<sd_mask_codefor>fma_fmadd_<mode><sd_maskz_name><round_name>" |
4f3da779 | 3647 | [(set (match_operand:VF_SF_AVX512VL 0 "register_operand" "=v,v,v") |
3648 | (fma:VF_SF_AVX512VL | |
3649 | (match_operand:VF_SF_AVX512VL 1 "<round_nimm_predicate>" "%0,0,v") | |
3650 | (match_operand:VF_SF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>") | |
3651 | (match_operand:VF_SF_AVX512VL 3 "<round_nimm_predicate>" "v,<round_constraint>,0")))] | |
3652 | "TARGET_AVX512F && <sd_mask_mode512bit_condition> && <round_mode512bit_condition>" | |
8211f5a2 | 3653 | "@ |
3654 | vfmadd132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>} | |
3655 | vfmadd213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>} | |
3656 | vfmadd231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}" | |
4f3da779 | 3657 | [(set_attr "type" "ssemuladd") |
8211f5a2 | 3658 | (set_attr "mode" "<MODE>")]) |
3659 | ||
4f3da779 | 3660 | (define_insn "<avx512>_fmadd_<mode>_mask<round_name>" |
3661 | [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v,v") | |
3662 | (vec_merge:VF_AVX512VL | |
3663 | (fma:VF_AVX512VL | |
3664 | (match_operand:VF_AVX512VL 1 "register_operand" "0,0") | |
3665 | (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>,v") | |
3666 | (match_operand:VF_AVX512VL 3 "nonimmediate_operand" "v,<round_constraint>")) | |
5220cab6 | 3667 | (match_dup 1) |
a31e7f46 | 3668 | (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk,Yk")))] |
4f3da779 | 3669 | "TARGET_AVX512F && <round_mode512bit_condition>" |
5220cab6 | 3670 | "@ |
be60ab96 | 3671 | vfmadd132<ssemodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %3, %2<round_op5>} |
3672 | vfmadd213<ssemodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %2, %3<round_op5>}" | |
5220cab6 | 3673 | [(set_attr "isa" "fma_avx512f,fma_avx512f") |
3674 | (set_attr "type" "ssemuladd") | |
3675 | (set_attr "mode" "<MODE>")]) | |
3676 | ||
4f3da779 | 3677 | (define_insn "<avx512>_fmadd_<mode>_mask3<round_name>" |
3678 | [(set (match_operand:VF_AVX512VL 0 "register_operand" "=x") | |
3679 | (vec_merge:VF_AVX512VL | |
3680 | (fma:VF_AVX512VL | |
3681 | (match_operand:VF_AVX512VL 1 "register_operand" "x") | |
3682 | (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>") | |
3683 | (match_operand:VF_AVX512VL 3 "register_operand" "0")) | |
5220cab6 | 3684 | (match_dup 3) |
a31e7f46 | 3685 | (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))] |
5220cab6 | 3686 | "TARGET_AVX512F" |
be60ab96 | 3687 | "vfmadd231<ssemodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %1, %2<round_op5>}" |
5220cab6 | 3688 | [(set_attr "isa" "fma_avx512f") |
3689 | (set_attr "type" "ssemuladd") | |
3690 | (set_attr "mode" "<MODE>")]) | |
3691 | ||
4f3da779 | 3692 | (define_insn "*fma_fmsub_<mode>" |
3693 | [(set (match_operand:FMAMODE 0 "register_operand" "=v,v,v,x,x") | |
3694 | (fma:FMAMODE | |
3695 | (match_operand:FMAMODE 1 "nonimmediate_operand" "%0,0,v,x,x") | |
3696 | (match_operand:FMAMODE 2 "nonimmediate_operand" "vm,v,vm,x,m") | |
3697 | (neg:FMAMODE | |
3698 | (match_operand:FMAMODE 3 "nonimmediate_operand" "v,vm,0,xm,x"))))] | |
3699 | "TARGET_FMA || TARGET_FMA4" | |
35811e21 | 3700 | "@ |
4f3da779 | 3701 | vfmsub132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2} |
3702 | vfmsub213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3} | |
3703 | vfmsub231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2} | |
2743953b | 3704 | vfmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3} |
3705 | vfmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}" | |
4f3da779 | 3706 | [(set_attr "isa" "fma,fma,fma,fma4,fma4") |
2743953b | 3707 | (set_attr "type" "ssemuladd") |
35811e21 | 3708 | (set_attr "mode" "<MODE>")]) |
3709 | ||
8211f5a2 | 3710 | (define_insn "<sd_mask_codefor>fma_fmsub_<mode><sd_maskz_name><round_name>" |
4f3da779 | 3711 | [(set (match_operand:VF_SF_AVX512VL 0 "register_operand" "=v,v,v") |
3712 | (fma:VF_SF_AVX512VL | |
3713 | (match_operand:VF_SF_AVX512VL 1 "<round_nimm_predicate>" "%0,0,v") | |
3714 | (match_operand:VF_SF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>") | |
3715 | (neg:VF_SF_AVX512VL | |
3716 | (match_operand:VF_SF_AVX512VL 3 "<round_nimm_predicate>" "v,<round_constraint>,0"))))] | |
3717 | "TARGET_AVX512F && <sd_mask_mode512bit_condition> && <round_mode512bit_condition>" | |
8211f5a2 | 3718 | "@ |
3719 | vfmsub132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>} | |
3720 | vfmsub213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>} | |
3721 | vfmsub231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}" | |
4f3da779 | 3722 | [(set_attr "type" "ssemuladd") |
8211f5a2 | 3723 | (set_attr "mode" "<MODE>")]) |
3724 | ||
4f3da779 | 3725 | (define_insn "<avx512>_fmsub_<mode>_mask<round_name>" |
3726 | [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v,v") | |
3727 | (vec_merge:VF_AVX512VL | |
3728 | (fma:VF_AVX512VL | |
3729 | (match_operand:VF_AVX512VL 1 "register_operand" "0,0") | |
3730 | (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>,v") | |
3731 | (neg:VF_AVX512VL | |
3732 | (match_operand:VF_AVX512VL 3 "nonimmediate_operand" "v,<round_constraint>"))) | |
5220cab6 | 3733 | (match_dup 1) |
a31e7f46 | 3734 | (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk,Yk")))] |
5220cab6 | 3735 | "TARGET_AVX512F" |
3736 | "@ | |
be60ab96 | 3737 | vfmsub132<ssemodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %3, %2<round_op5>} |
3738 | vfmsub213<ssemodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %2, %3<round_op5>}" | |
5220cab6 | 3739 | [(set_attr "isa" "fma_avx512f,fma_avx512f") |
3740 | (set_attr "type" "ssemuladd") | |
3741 | (set_attr "mode" "<MODE>")]) | |
3742 | ||
4f3da779 | 3743 | (define_insn "<avx512>_fmsub_<mode>_mask3<round_name>" |
3744 | [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v") | |
3745 | (vec_merge:VF_AVX512VL | |
3746 | (fma:VF_AVX512VL | |
3747 | (match_operand:VF_AVX512VL 1 "register_operand" "v") | |
3748 | (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>") | |
3749 | (neg:VF_AVX512VL | |
3750 | (match_operand:VF_AVX512VL 3 "register_operand" "0"))) | |
5220cab6 | 3751 | (match_dup 3) |
a31e7f46 | 3752 | (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))] |
4f3da779 | 3753 | "TARGET_AVX512F && <round_mode512bit_condition>" |
be60ab96 | 3754 | "vfmsub231<ssemodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %1, %2<round_op5>}" |
5220cab6 | 3755 | [(set_attr "isa" "fma_avx512f") |
3756 | (set_attr "type" "ssemuladd") | |
3757 | (set_attr "mode" "<MODE>")]) | |
3758 | ||
4f3da779 | 3759 | (define_insn "*fma_fnmadd_<mode>" |
3760 | [(set (match_operand:FMAMODE 0 "register_operand" "=v,v,v,x,x") | |
3761 | (fma:FMAMODE | |
3762 | (neg:FMAMODE | |
3763 | (match_operand:FMAMODE 1 "nonimmediate_operand" "%0,0,v,x,x")) | |
3764 | (match_operand:FMAMODE 2 "nonimmediate_operand" "vm,v,vm,x,m") | |
3765 | (match_operand:FMAMODE 3 "nonimmediate_operand" "v,vm,0,xm,x")))] | |
3766 | "TARGET_FMA || TARGET_FMA4" | |
be60ab96 | 3767 | "@ |
4f3da779 | 3768 | vfnmadd132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2} |
3769 | vfnmadd213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3} | |
3770 | vfnmadd231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2} | |
2743953b | 3771 | vfnmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3} |
3772 | vfnmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}" | |
4f3da779 | 3773 | [(set_attr "isa" "fma,fma,fma,fma4,fma4") |
2743953b | 3774 | (set_attr "type" "ssemuladd") |
35811e21 | 3775 | (set_attr "mode" "<MODE>")]) |
3776 | ||
8211f5a2 | 3777 | (define_insn "<sd_mask_codefor>fma_fnmadd_<mode><sd_maskz_name><round_name>" |
4f3da779 | 3778 | [(set (match_operand:VF_SF_AVX512VL 0 "register_operand" "=v,v,v") |
3779 | (fma:VF_SF_AVX512VL | |
3780 | (neg:VF_SF_AVX512VL | |
3781 | (match_operand:VF_SF_AVX512VL 1 "<round_nimm_predicate>" "%0,0,v")) | |
3782 | (match_operand:VF_SF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>") | |
3783 | (match_operand:VF_SF_AVX512VL 3 "<round_nimm_predicate>" "v,<round_constraint>,0")))] | |
3784 | "TARGET_AVX512F && <sd_mask_mode512bit_condition> && <round_mode512bit_condition>" | |
8211f5a2 | 3785 | "@ |
3786 | vfnmadd132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>} | |
3787 | vfnmadd213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>} | |
3788 | vfnmadd231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}" | |
4f3da779 | 3789 | [(set_attr "type" "ssemuladd") |
8211f5a2 | 3790 | (set_attr "mode" "<MODE>")]) |
3791 | ||
4f3da779 | 3792 | (define_insn "<avx512>_fnmadd_<mode>_mask<round_name>" |
3793 | [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v,v") | |
3794 | (vec_merge:VF_AVX512VL | |
3795 | (fma:VF_AVX512VL | |
3796 | (neg:VF_AVX512VL | |
3797 | (match_operand:VF_AVX512VL 1 "register_operand" "0,0")) | |
3798 | (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>,v") | |
3799 | (match_operand:VF_AVX512VL 3 "nonimmediate_operand" "v,<round_constraint>")) | |
5220cab6 | 3800 | (match_dup 1) |
a31e7f46 | 3801 | (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk,Yk")))] |
4f3da779 | 3802 | "TARGET_AVX512F && <round_mode512bit_condition>" |
5220cab6 | 3803 | "@ |
be60ab96 | 3804 | vfnmadd132<ssemodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %3, %2<round_op5>} |
3805 | vfnmadd213<ssemodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %2, %3<round_op5>}" | |
5220cab6 | 3806 | [(set_attr "isa" "fma_avx512f,fma_avx512f") |
3807 | (set_attr "type" "ssemuladd") | |
3808 | (set_attr "mode" "<MODE>")]) | |
3809 | ||
4f3da779 | 3810 | (define_insn "<avx512>_fnmadd_<mode>_mask3<round_name>" |
3811 | [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v") | |
3812 | (vec_merge:VF_AVX512VL | |
3813 | (fma:VF_AVX512VL | |
3814 | (neg:VF_AVX512VL | |
3815 | (match_operand:VF_AVX512VL 1 "register_operand" "v")) | |
3816 | (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>") | |
3817 | (match_operand:VF_AVX512VL 3 "register_operand" "0")) | |
5220cab6 | 3818 | (match_dup 3) |
a31e7f46 | 3819 | (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))] |
4f3da779 | 3820 | "TARGET_AVX512F && <round_mode512bit_condition>" |
be60ab96 | 3821 | "vfnmadd231<ssemodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %1, %2<round_op5>}" |
5220cab6 | 3822 | [(set_attr "isa" "fma_avx512f") |
3823 | (set_attr "type" "ssemuladd") | |
3824 | (set_attr "mode" "<MODE>")]) | |
3825 | ||
4f3da779 | 3826 | (define_insn "*fma_fnmsub_<mode>" |
3827 | [(set (match_operand:FMAMODE 0 "register_operand" "=v,v,v,x,x") | |
3828 | (fma:FMAMODE | |
3829 | (neg:FMAMODE | |
3830 | (match_operand:FMAMODE 1 "nonimmediate_operand" "%0,0,v,x,x")) | |
3831 | (match_operand:FMAMODE 2 "nonimmediate_operand" "vm,v,vm,x,m") | |
3832 | (neg:FMAMODE | |
3833 | (match_operand:FMAMODE 3 "nonimmediate_operand" "v,vm,0,xm,x"))))] | |
3834 | "TARGET_FMA || TARGET_FMA4" | |
35811e21 | 3835 | "@ |
be60ab96 | 3836 | vfnmsub132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>} |
3837 | vfnmsub213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>} | |
3838 | vfnmsub231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>} | |
2743953b | 3839 | vfnmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3} |
3840 | vfnmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}" | |
4f3da779 | 3841 | [(set_attr "isa" "fma,fma,fma,fma4,fma4") |
2743953b | 3842 | (set_attr "type" "ssemuladd") |
2f212aae | 3843 | (set_attr "mode" "<MODE>")]) |
3844 | ||
8211f5a2 | 3845 | (define_insn "<sd_mask_codefor>fma_fnmsub_<mode><sd_maskz_name><round_name>" |
4f3da779 | 3846 | [(set (match_operand:VF_SF_AVX512VL 0 "register_operand" "=v,v,v") |
3847 | (fma:VF_SF_AVX512VL | |
3848 | (neg:VF_SF_AVX512VL | |
3849 | (match_operand:VF_SF_AVX512VL 1 "<round_nimm_predicate>" "%0,0,v")) | |
3850 | (match_operand:VF_SF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>") | |
3851 | (neg:VF_SF_AVX512VL | |
3852 | (match_operand:VF_SF_AVX512VL 3 "<round_nimm_predicate>" "v,<round_constraint>,0"))))] | |
3853 | "TARGET_AVX512F && <sd_mask_mode512bit_condition> && <round_mode512bit_condition>" | |
8211f5a2 | 3854 | "@ |
3855 | vfnmsub132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>} | |
3856 | vfnmsub213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>} | |
3857 | vfnmsub231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}" | |
4f3da779 | 3858 | [(set_attr "type" "ssemuladd") |
8211f5a2 | 3859 | (set_attr "mode" "<MODE>")]) |
3860 | ||
4f3da779 | 3861 | (define_insn "<avx512>_fnmsub_<mode>_mask<round_name>" |
3862 | [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v,v") | |
3863 | (vec_merge:VF_AVX512VL | |
3864 | (fma:VF_AVX512VL | |
3865 | (neg:VF_AVX512VL | |
3866 | (match_operand:VF_AVX512VL 1 "register_operand" "0,0")) | |
3867 | (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>,v") | |
3868 | (neg:VF_AVX512VL | |
3869 | (match_operand:VF_AVX512VL 3 "nonimmediate_operand" "v,<round_constraint>"))) | |
5220cab6 | 3870 | (match_dup 1) |
a31e7f46 | 3871 | (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk,Yk")))] |
4f3da779 | 3872 | "TARGET_AVX512F && <round_mode512bit_condition>" |
5220cab6 | 3873 | "@ |
be60ab96 | 3874 | vfnmsub132<ssemodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %3, %2<round_op5>} |
3875 | vfnmsub213<ssemodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %2, %3<round_op5>}" | |
5220cab6 | 3876 | [(set_attr "isa" "fma_avx512f,fma_avx512f") |
3877 | (set_attr "type" "ssemuladd") | |
3878 | (set_attr "mode" "<MODE>")]) | |
3879 | ||
4f3da779 | 3880 | (define_insn "<avx512>_fnmsub_<mode>_mask3<round_name>" |
3881 | [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v") | |
3882 | (vec_merge:VF_AVX512VL | |
3883 | (fma:VF_AVX512VL | |
3884 | (neg:VF_AVX512VL | |
3885 | (match_operand:VF_AVX512VL 1 "register_operand" "v")) | |
3886 | (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>") | |
3887 | (neg:VF_AVX512VL | |
3888 | (match_operand:VF_AVX512VL 3 "register_operand" "0"))) | |
5220cab6 | 3889 | (match_dup 3) |
a31e7f46 | 3890 | (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))] |
5220cab6 | 3891 | "TARGET_AVX512F" |
be60ab96 | 3892 | "vfnmsub231<ssemodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %1, %2<round_op5>}" |
5220cab6 | 3893 | [(set_attr "isa" "fma_avx512f") |
3894 | (set_attr "type" "ssemuladd") | |
3895 | (set_attr "mode" "<MODE>")]) | |
3896 | ||
35811e21 | 3897 | ;; FMA parallel floating point multiply addsub and subadd operations. |
a2f9d5b3 | 3898 | |
35811e21 | 3899 | ;; It would be possible to represent these without the UNSPEC as |
3900 | ;; | |
3901 | ;; (vec_merge | |
3902 | ;; (fma op1 op2 op3) | |
3903 | ;; (fma op1 op2 (neg op3)) | |
3904 | ;; (merge-const)) | |
3905 | ;; | |
3906 | ;; But this doesn't seem useful in practice. | |
3907 | ||
3908 | (define_expand "fmaddsub_<mode>" | |
3909 | [(set (match_operand:VF 0 "register_operand") | |
3910 | (unspec:VF | |
3911 | [(match_operand:VF 1 "nonimmediate_operand") | |
3912 | (match_operand:VF 2 "nonimmediate_operand") | |
3913 | (match_operand:VF 3 "nonimmediate_operand")] | |
3914 | UNSPEC_FMADDSUB))] | |
c298e021 | 3915 | "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F") |
35811e21 | 3916 | |
4f3da779 | 3917 | (define_expand "<avx512>_fmaddsub_<mode>_maskz<round_expand_name>" |
3918 | [(match_operand:VF_AVX512VL 0 "register_operand") | |
3919 | (match_operand:VF_AVX512VL 1 "<round_expand_nimm_predicate>") | |
3920 | (match_operand:VF_AVX512VL 2 "<round_expand_nimm_predicate>") | |
3921 | (match_operand:VF_AVX512VL 3 "<round_expand_nimm_predicate>") | |
9a5ea1d5 | 3922 | (match_operand:<avx512fmaskmode> 4 "register_operand")] |
3923 | "TARGET_AVX512F" | |
3924 | { | |
adf45678 | 3925 | emit_insn (gen_fma_fmaddsub_<mode>_maskz_1<round_expand_name> ( |
9a5ea1d5 | 3926 | operands[0], operands[1], operands[2], operands[3], |
adf45678 | 3927 | CONST0_RTX (<MODE>mode), operands[4]<round_expand_operand>)); |
9a5ea1d5 | 3928 | DONE; |
3929 | }) | |
3930 | ||
4f3da779 | 3931 | (define_insn "*fma_fmaddsub_<mode>" |
8211f5a2 | 3932 | [(set (match_operand:VF_128_256 0 "register_operand" "=v,v,v,x,x") |
3933 | (unspec:VF_128_256 | |
4f3da779 | 3934 | [(match_operand:VF_128_256 1 "nonimmediate_operand" "%0,0,v,x,x") |
3935 | (match_operand:VF_128_256 2 "nonimmediate_operand" "vm,v,vm,x,m") | |
3936 | (match_operand:VF_128_256 3 "nonimmediate_operand" "v,vm,0,xm,x")] | |
35811e21 | 3937 | UNSPEC_FMADDSUB))] |
4f3da779 | 3938 | "TARGET_FMA || TARGET_FMA4" |
35811e21 | 3939 | "@ |
4f3da779 | 3940 | vfmaddsub132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2} |
3941 | vfmaddsub213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3} | |
3942 | vfmaddsub231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2} | |
2743953b | 3943 | vfmaddsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3} |
3944 | vfmaddsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}" | |
4f3da779 | 3945 | [(set_attr "isa" "fma,fma,fma,fma4,fma4") |
2743953b | 3946 | (set_attr "type" "ssemuladd") |
35811e21 | 3947 | (set_attr "mode" "<MODE>")]) |
3948 | ||
8211f5a2 | 3949 | (define_insn "<sd_mask_codefor>fma_fmaddsub_<mode><sd_maskz_name><round_name>" |
4f3da779 | 3950 | [(set (match_operand:VF_SF_AVX512VL 0 "register_operand" "=v,v,v") |
3951 | (unspec:VF_SF_AVX512VL | |
3952 | [(match_operand:VF_SF_AVX512VL 1 "<round_nimm_predicate>" "%0,0,v") | |
3953 | (match_operand:VF_SF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>") | |
3954 | (match_operand:VF_SF_AVX512VL 3 "<round_nimm_predicate>" "v,<round_constraint>,0")] | |
8211f5a2 | 3955 | UNSPEC_FMADDSUB))] |
3956 | "TARGET_AVX512F && <sd_mask_mode512bit_condition> && <round_mode512bit_condition>" | |
3957 | "@ | |
3958 | vfmaddsub132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>} | |
3959 | vfmaddsub213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>} | |
3960 | vfmaddsub231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}" | |
4f3da779 | 3961 | [(set_attr "type" "ssemuladd") |
8211f5a2 | 3962 | (set_attr "mode" "<MODE>")]) |
3963 | ||
4f3da779 | 3964 | (define_insn "<avx512>_fmaddsub_<mode>_mask<round_name>" |
3965 | [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v,v") | |
3966 | (vec_merge:VF_AVX512VL | |
3967 | (unspec:VF_AVX512VL | |
3968 | [(match_operand:VF_AVX512VL 1 "register_operand" "0,0") | |
3969 | (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>,v") | |
3970 | (match_operand:VF_AVX512VL 3 "nonimmediate_operand" "v,<round_constraint>")] | |
5220cab6 | 3971 | UNSPEC_FMADDSUB) |
3972 | (match_dup 1) | |
a31e7f46 | 3973 | (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk,Yk")))] |
5220cab6 | 3974 | "TARGET_AVX512F" |
3975 | "@ | |
be60ab96 | 3976 | vfmaddsub132<ssemodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %3, %2<round_op5>} |
3977 | vfmaddsub213<ssemodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %2, %3<round_op5>}" | |
5220cab6 | 3978 | [(set_attr "isa" "fma_avx512f,fma_avx512f") |
3979 | (set_attr "type" "ssemuladd") | |
3980 | (set_attr "mode" "<MODE>")]) | |
3981 | ||
4f3da779 | 3982 | (define_insn "<avx512>_fmaddsub_<mode>_mask3<round_name>" |
3983 | [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v") | |
3984 | (vec_merge:VF_AVX512VL | |
3985 | (unspec:VF_AVX512VL | |
3986 | [(match_operand:VF_AVX512VL 1 "register_operand" "v") | |
3987 | (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>") | |
3988 | (match_operand:VF_AVX512VL 3 "register_operand" "0")] | |
5220cab6 | 3989 | UNSPEC_FMADDSUB) |
3990 | (match_dup 3) | |
a31e7f46 | 3991 | (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))] |
5220cab6 | 3992 | "TARGET_AVX512F" |
be60ab96 | 3993 | "vfmaddsub231<ssemodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %1, %2<round_op5>}" |
5220cab6 | 3994 | [(set_attr "isa" "fma_avx512f") |
3995 | (set_attr "type" "ssemuladd") | |
3996 | (set_attr "mode" "<MODE>")]) | |
3997 | ||
4f3da779 | 3998 | (define_insn "*fma_fmsubadd_<mode>" |
8211f5a2 | 3999 | [(set (match_operand:VF_128_256 0 "register_operand" "=v,v,v,x,x") |
4000 | (unspec:VF_128_256 | |
4f3da779 | 4001 | [(match_operand:VF_128_256 1 "nonimmediate_operand" "%0,0,v,x,x") |
4002 | (match_operand:VF_128_256 2 "nonimmediate_operand" "vm,v,vm,x,m") | |
8211f5a2 | 4003 | (neg:VF_128_256 |
4f3da779 | 4004 | (match_operand:VF_128_256 3 "nonimmediate_operand" "v,vm,0,xm,x"))] |
35811e21 | 4005 | UNSPEC_FMADDSUB))] |
4f3da779 | 4006 | "TARGET_FMA || TARGET_FMA4" |
35811e21 | 4007 | "@ |
4f3da779 | 4008 | vfmsubadd132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2} |
4009 | vfmsubadd213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3} | |
4010 | vfmsubadd231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2} | |
2743953b | 4011 | vfmsubadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3} |
4012 | vfmsubadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}" | |
4f3da779 | 4013 | [(set_attr "isa" "fma,fma,fma,fma4,fma4") |
2743953b | 4014 | (set_attr "type" "ssemuladd") |
35811e21 | 4015 | (set_attr "mode" "<MODE>")]) |
4016 | ||
8211f5a2 | 4017 | (define_insn "<sd_mask_codefor>fma_fmsubadd_<mode><sd_maskz_name><round_name>" |
4f3da779 | 4018 | [(set (match_operand:VF_SF_AVX512VL 0 "register_operand" "=v,v,v") |
4019 | (unspec:VF_SF_AVX512VL | |
4020 | [(match_operand:VF_SF_AVX512VL 1 "<round_nimm_predicate>" "%0,0,v") | |
4021 | (match_operand:VF_SF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>") | |
4022 | (neg:VF_SF_AVX512VL | |
4023 | (match_operand:VF_SF_AVX512VL 3 "<round_nimm_predicate>" "v,<round_constraint>,0"))] | |
8211f5a2 | 4024 | UNSPEC_FMADDSUB))] |
4025 | "TARGET_AVX512F && <sd_mask_mode512bit_condition> && <round_mode512bit_condition>" | |
4026 | "@ | |
4027 | vfmsubadd132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>} | |
4028 | vfmsubadd213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>} | |
4029 | vfmsubadd231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}" | |
4f3da779 | 4030 | [(set_attr "type" "ssemuladd") |
8211f5a2 | 4031 | (set_attr "mode" "<MODE>")]) |
4032 | ||
4f3da779 | 4033 | (define_insn "<avx512>_fmsubadd_<mode>_mask<round_name>" |
4034 | [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v,v") | |
4035 | (vec_merge:VF_AVX512VL | |
4036 | (unspec:VF_AVX512VL | |
4037 | [(match_operand:VF_AVX512VL 1 "register_operand" "0,0") | |
4038 | (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>,v") | |
4039 | (neg:VF_AVX512VL | |
4040 | (match_operand:VF_AVX512VL 3 "nonimmediate_operand" "v,<round_constraint>"))] | |
5220cab6 | 4041 | UNSPEC_FMADDSUB) |
4042 | (match_dup 1) | |
a31e7f46 | 4043 | (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk,Yk")))] |
5220cab6 | 4044 | "TARGET_AVX512F" |
4045 | "@ | |
be60ab96 | 4046 | vfmsubadd132<ssemodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %3, %2<round_op5>} |
4047 | vfmsubadd213<ssemodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %2, %3<round_op5>}" | |
5220cab6 | 4048 | [(set_attr "isa" "fma_avx512f,fma_avx512f") |
4049 | (set_attr "type" "ssemuladd") | |
4050 | (set_attr "mode" "<MODE>")]) | |
4051 | ||
4f3da779 | 4052 | (define_insn "<avx512>_fmsubadd_<mode>_mask3<round_name>" |
4053 | [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v") | |
4054 | (vec_merge:VF_AVX512VL | |
4055 | (unspec:VF_AVX512VL | |
4056 | [(match_operand:VF_AVX512VL 1 "register_operand" "v") | |
4057 | (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>") | |
4058 | (neg:VF_AVX512VL | |
4059 | (match_operand:VF_AVX512VL 3 "register_operand" "0"))] | |
5220cab6 | 4060 | UNSPEC_FMADDSUB) |
4061 | (match_dup 3) | |
a31e7f46 | 4062 | (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))] |
5220cab6 | 4063 | "TARGET_AVX512F" |
be60ab96 | 4064 | "vfmsubadd231<ssemodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %1, %2<round_op5>}" |
5220cab6 | 4065 | [(set_attr "isa" "fma_avx512f") |
4066 | (set_attr "type" "ssemuladd") | |
4067 | (set_attr "mode" "<MODE>")]) | |
4068 | ||
35811e21 | 4069 | ;; FMA3 floating point scalar intrinsics. These merge result with |
4070 | ;; high-order elements from the destination register. | |
2f212aae | 4071 | |
be60ab96 | 4072 | (define_expand "fmai_vmfmadd_<mode><round_name>" |
65463cb8 | 4073 | [(set (match_operand:VF_128 0 "register_operand") |
4074 | (vec_merge:VF_128 | |
4075 | (fma:VF_128 | |
be60ab96 | 4076 | (match_operand:VF_128 1 "<round_nimm_predicate>") |
4077 | (match_operand:VF_128 2 "<round_nimm_predicate>") | |
4078 | (match_operand:VF_128 3 "<round_nimm_predicate>")) | |
092a264c | 4079 | (match_dup 1) |
65463cb8 | 4080 | (const_int 1)))] |
4081 | "TARGET_FMA") | |
4082 | ||
4083 | (define_insn "*fmai_fmadd_<mode>" | |
e13e1b39 | 4084 | [(set (match_operand:VF_128 0 "register_operand" "=v,v") |
65463cb8 | 4085 | (vec_merge:VF_128 |
4086 | (fma:VF_128 | |
be60ab96 | 4087 | (match_operand:VF_128 1 "<round_nimm_predicate>" " 0, 0") |
4088 | (match_operand:VF_128 2 "<round_nimm_predicate>" "<round_constraint>, v") | |
4089 | (match_operand:VF_128 3 "<round_nimm_predicate>" " v,<round_constraint>")) | |
092a264c | 4090 | (match_dup 1) |
65463cb8 | 4091 | (const_int 1)))] |
c298e021 | 4092 | "TARGET_FMA || TARGET_AVX512F" |
65463cb8 | 4093 | "@ |
be60ab96 | 4094 | vfmadd132<ssescalarmodesuffix>\t{<round_op4>%2, %3, %0|%0, %<iptr>3, %<iptr>2<round_op4>} |
4095 | vfmadd213<ssescalarmodesuffix>\t{<round_op4>%3, %2, %0|%0, %<iptr>2, %<iptr>3<round_op4>}" | |
65463cb8 | 4096 | [(set_attr "type" "ssemuladd") |
4097 | (set_attr "mode" "<MODE>")]) | |
4098 | ||
4099 | (define_insn "*fmai_fmsub_<mode>" | |
e13e1b39 | 4100 | [(set (match_operand:VF_128 0 "register_operand" "=v,v") |
65463cb8 | 4101 | (vec_merge:VF_128 |
4102 | (fma:VF_128 | |
be60ab96 | 4103 | (match_operand:VF_128 1 "<round_nimm_predicate>" "0,0") |
4104 | (match_operand:VF_128 2 "<round_nimm_predicate>" "<round_constraint>,v") | |
65463cb8 | 4105 | (neg:VF_128 |
be60ab96 | 4106 | (match_operand:VF_128 3 "<round_nimm_predicate>" " v,<round_constraint>"))) |
092a264c | 4107 | (match_dup 1) |
65463cb8 | 4108 | (const_int 1)))] |
c298e021 | 4109 | "TARGET_FMA || TARGET_AVX512F" |
65463cb8 | 4110 | "@ |
be60ab96 | 4111 | vfmsub132<ssescalarmodesuffix>\t{<round_op4>%2, %3, %0|%0, %<iptr>3, %<iptr>2<round_op4>} |
4112 | vfmsub213<ssescalarmodesuffix>\t{<round_op4>%3, %2, %0|%0, %<iptr>2, %<iptr>3<round_op4>}" | |
65463cb8 | 4113 | [(set_attr "type" "ssemuladd") |
4114 | (set_attr "mode" "<MODE>")]) | |
4115 | ||
be60ab96 | 4116 | (define_insn "*fmai_fnmadd_<mode><round_name>" |
e13e1b39 | 4117 | [(set (match_operand:VF_128 0 "register_operand" "=v,v") |
65463cb8 | 4118 | (vec_merge:VF_128 |
4119 | (fma:VF_128 | |
4120 | (neg:VF_128 | |
be60ab96 | 4121 | (match_operand:VF_128 2 "<round_nimm_predicate>" "<round_constraint>,v")) |
4122 | (match_operand:VF_128 1 "<round_nimm_predicate>" "0,0") | |
4123 | (match_operand:VF_128 3 "<round_nimm_predicate>" "v,<round_constraint>")) | |
092a264c | 4124 | (match_dup 1) |
65463cb8 | 4125 | (const_int 1)))] |
c298e021 | 4126 | "TARGET_FMA || TARGET_AVX512F" |
65463cb8 | 4127 | "@ |
be60ab96 | 4128 | vfnmadd132<ssescalarmodesuffix>\t{<round_op4>%2, %3, %0|%0, %<iptr>3, %<iptr>2<round_op4>} |
4129 | vfnmadd213<ssescalarmodesuffix>\t{<round_op4>%3, %2, %0|%0, %<iptr>2, %<iptr>3<round_op4>}" | |
65463cb8 | 4130 | [(set_attr "type" "ssemuladd") |
4131 | (set_attr "mode" "<MODE>")]) | |
4132 | ||
be60ab96 | 4133 | (define_insn "*fmai_fnmsub_<mode><round_name>" |
e13e1b39 | 4134 | [(set (match_operand:VF_128 0 "register_operand" "=v,v") |
65463cb8 | 4135 | (vec_merge:VF_128 |
4136 | (fma:VF_128 | |
4137 | (neg:VF_128 | |
be60ab96 | 4138 | (match_operand:VF_128 2 "<round_nimm_predicate>" "<round_constraint>, v")) |
4139 | (match_operand:VF_128 1 "<round_nimm_predicate>" " 0, 0") | |
65463cb8 | 4140 | (neg:VF_128 |
be60ab96 | 4141 | (match_operand:VF_128 3 "<round_nimm_predicate>" " v,<round_constraint>"))) |
092a264c | 4142 | (match_dup 1) |
65463cb8 | 4143 | (const_int 1)))] |
c298e021 | 4144 | "TARGET_FMA || TARGET_AVX512F" |
65463cb8 | 4145 | "@ |
be60ab96 | 4146 | vfnmsub132<ssescalarmodesuffix>\t{<round_op4>%2, %3, %0|%0, %<iptr>3, %<iptr>2<round_op4>} |
4147 | vfnmsub213<ssescalarmodesuffix>\t{<round_op4>%3, %2, %0|%0, %<iptr>2, %<iptr>3<round_op4>}" | |
65463cb8 | 4148 | [(set_attr "type" "ssemuladd") |
4149 | (set_attr "mode" "<MODE>")]) | |
4150 | ||
35811e21 | 4151 | ;; FMA4 floating point scalar intrinsics. These write the |
4152 | ;; entire destination register, with the high-order elements zeroed. | |
4153 | ||
4154 | (define_expand "fma4i_vmfmadd_<mode>" | |
4155 | [(set (match_operand:VF_128 0 "register_operand") | |
4156 | (vec_merge:VF_128 | |
4157 | (fma:VF_128 | |
4158 | (match_operand:VF_128 1 "nonimmediate_operand") | |
4159 | (match_operand:VF_128 2 "nonimmediate_operand") | |
4160 | (match_operand:VF_128 3 "nonimmediate_operand")) | |
4161 | (match_dup 4) | |
4162 | (const_int 1)))] | |
4163 | "TARGET_FMA4" | |
4164 | "operands[4] = CONST0_RTX (<MODE>mode);") | |
4165 | ||
a2f9d5b3 | 4166 | (define_insn "*fma4i_vmfmadd_<mode>" |
6fe5844b | 4167 | [(set (match_operand:VF_128 0 "register_operand" "=x,x") |
4168 | (vec_merge:VF_128 | |
4169 | (fma:VF_128 | |
4170 | (match_operand:VF_128 1 "nonimmediate_operand" "%x,x") | |
4171 | (match_operand:VF_128 2 "nonimmediate_operand" " x,m") | |
4172 | (match_operand:VF_128 3 "nonimmediate_operand" "xm,x")) | |
abd4f58b | 4173 | (match_operand:VF_128 4 "const0_operand") |
a2f9d5b3 | 4174 | (const_int 1)))] |
4175 | "TARGET_FMA4" | |
c358a059 | 4176 | "vfmadd<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %<iptr>2, %<iptr>3}" |
2f212aae | 4177 | [(set_attr "type" "ssemuladd") |
4178 | (set_attr "mode" "<MODE>")]) | |
4179 | ||
a2f9d5b3 | 4180 | (define_insn "*fma4i_vmfmsub_<mode>" |
6fe5844b | 4181 | [(set (match_operand:VF_128 0 "register_operand" "=x,x") |
4182 | (vec_merge:VF_128 | |
4183 | (fma:VF_128 | |
4184 | (match_operand:VF_128 1 "nonimmediate_operand" "%x,x") | |
4185 | (match_operand:VF_128 2 "nonimmediate_operand" " x,m") | |
4186 | (neg:VF_128 | |
4187 | (match_operand:VF_128 3 "nonimmediate_operand" "xm,x"))) | |
abd4f58b | 4188 | (match_operand:VF_128 4 "const0_operand") |
a2f9d5b3 | 4189 | (const_int 1)))] |
4190 | "TARGET_FMA4" | |
c358a059 | 4191 | "vfmsub<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %<iptr>2, %<iptr>3}" |
2f212aae | 4192 | [(set_attr "type" "ssemuladd") |
4193 | (set_attr "mode" "<MODE>")]) | |
4194 | ||
a2f9d5b3 | 4195 | (define_insn "*fma4i_vmfnmadd_<mode>" |
6fe5844b | 4196 | [(set (match_operand:VF_128 0 "register_operand" "=x,x") |
4197 | (vec_merge:VF_128 | |
4198 | (fma:VF_128 | |
4199 | (neg:VF_128 | |
4200 | (match_operand:VF_128 1 "nonimmediate_operand" "%x,x")) | |
4201 | (match_operand:VF_128 2 "nonimmediate_operand" " x,m") | |
4202 | (match_operand:VF_128 3 "nonimmediate_operand" "xm,x")) | |
abd4f58b | 4203 | (match_operand:VF_128 4 "const0_operand") |
a2f9d5b3 | 4204 | (const_int 1)))] |
4205 | "TARGET_FMA4" | |
c358a059 | 4206 | "vfnmadd<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %<iptr>2, %<iptr>3}" |
2f212aae | 4207 | [(set_attr "type" "ssemuladd") |
4208 | (set_attr "mode" "<MODE>")]) | |
4209 | ||
a2f9d5b3 | 4210 | (define_insn "*fma4i_vmfnmsub_<mode>" |
6fe5844b | 4211 | [(set (match_operand:VF_128 0 "register_operand" "=x,x") |
4212 | (vec_merge:VF_128 | |
4213 | (fma:VF_128 | |
4214 | (neg:VF_128 | |
4215 | (match_operand:VF_128 1 "nonimmediate_operand" "%x,x")) | |
4216 | (match_operand:VF_128 2 "nonimmediate_operand" " x,m") | |
4217 | (neg:VF_128 | |
4218 | (match_operand:VF_128 3 "nonimmediate_operand" "xm,x"))) | |
abd4f58b | 4219 | (match_operand:VF_128 4 "const0_operand") |
a2f9d5b3 | 4220 | (const_int 1)))] |
aaf9db06 | 4221 | "TARGET_FMA4" |
c358a059 | 4222 | "vfnmsub<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %<iptr>2, %<iptr>3}" |
2f212aae | 4223 | [(set_attr "type" "ssemuladd") |
4224 | (set_attr "mode" "<MODE>")]) | |
4225 | ||
5e2b6fd0 | 4226 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; |
5802c0cb | 4227 | ;; |
2a466fea | 4228 | ;; Parallel single-precision floating point conversion operations |
4229 | ;; | |
4230 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | |
4231 | ||
4232 | (define_insn "sse_cvtpi2ps" | |
4233 | [(set (match_operand:V4SF 0 "register_operand" "=x") | |
4234 | (vec_merge:V4SF | |
4235 | (vec_duplicate:V4SF | |
4236 | (float:V2SF (match_operand:V2SI 2 "nonimmediate_operand" "ym"))) | |
4237 | (match_operand:V4SF 1 "register_operand" "0") | |
4238 | (const_int 3)))] | |
4239 | "TARGET_SSE" | |
4240 | "cvtpi2ps\t{%2, %0|%0, %2}" | |
4241 | [(set_attr "type" "ssecvt") | |
4242 | (set_attr "mode" "V4SF")]) | |
4243 | ||
4244 | (define_insn "sse_cvtps2pi" | |
4245 | [(set (match_operand:V2SI 0 "register_operand" "=y") | |
4246 | (vec_select:V2SI | |
4247 | (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] | |
4248 | UNSPEC_FIX_NOTRUNC) | |
4249 | (parallel [(const_int 0) (const_int 1)])))] | |
4250 | "TARGET_SSE" | |
c358a059 | 4251 | "cvtps2pi\t{%1, %0|%0, %q1}" |
2a466fea | 4252 | [(set_attr "type" "ssecvt") |
4253 | (set_attr "unit" "mmx") | |
4254 | (set_attr "mode" "DI")]) | |
4255 | ||
4256 | (define_insn "sse_cvttps2pi" | |
4257 | [(set (match_operand:V2SI 0 "register_operand" "=y") | |
4258 | (vec_select:V2SI | |
4259 | (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm")) | |
4260 | (parallel [(const_int 0) (const_int 1)])))] | |
4261 | "TARGET_SSE" | |
c358a059 | 4262 | "cvttps2pi\t{%1, %0|%0, %q1}" |
2a466fea | 4263 | [(set_attr "type" "ssecvt") |
4264 | (set_attr "unit" "mmx") | |
00a0e418 | 4265 | (set_attr "prefix_rep" "0") |
2a466fea | 4266 | (set_attr "mode" "SF")]) |
4267 | ||
be60ab96 | 4268 | (define_insn "sse_cvtsi2ss<round_name>" |
e13e1b39 | 4269 | [(set (match_operand:V4SF 0 "register_operand" "=x,x,v") |
2a466fea | 4270 | (vec_merge:V4SF |
4271 | (vec_duplicate:V4SF | |
be60ab96 | 4272 | (float:SF (match_operand:SI 2 "<round_nimm_predicate>" "r,m,<round_constraint3>"))) |
e13e1b39 | 4273 | (match_operand:V4SF 1 "register_operand" "0,0,v") |
2a466fea | 4274 | (const_int 1)))] |
4275 | "TARGET_SSE" | |
887423c0 | 4276 | "@ |
4277 | cvtsi2ss\t{%2, %0|%0, %2} | |
4278 | cvtsi2ss\t{%2, %0|%0, %2} | |
88048095 | 4279 | vcvtsi2ss\t{%2, <round_op3>%1, %0|%0, %1<round_op3>, %2}" |
887423c0 | 4280 | [(set_attr "isa" "noavx,noavx,avx") |
4281 | (set_attr "type" "sseicvt") | |
4282 | (set_attr "athlon_decode" "vector,double,*") | |
4283 | (set_attr "amdfam10_decode" "vector,double,*") | |
4284 | (set_attr "bdver1_decode" "double,direct,*") | |
6470d004 | 4285 | (set_attr "btver2_decode" "double,double,double") |
4c9faaa4 | 4286 | (set_attr "znver1_decode" "double,double,double") |
4c1099de | 4287 | (set_attr "prefix" "orig,orig,maybe_evex") |
ed30e0a6 | 4288 | (set_attr "mode" "SF")]) |
4289 | ||
be60ab96 | 4290 | (define_insn "sse_cvtsi2ssq<round_name>" |
e13e1b39 | 4291 | [(set (match_operand:V4SF 0 "register_operand" "=x,x,v") |
2a466fea | 4292 | (vec_merge:V4SF |
4293 | (vec_duplicate:V4SF | |
be60ab96 | 4294 | (float:SF (match_operand:DI 2 "<round_nimm_predicate>" "r,m,<round_constraint3>"))) |
e13e1b39 | 4295 | (match_operand:V4SF 1 "register_operand" "0,0,v") |
2a466fea | 4296 | (const_int 1)))] |
4297 | "TARGET_SSE && TARGET_64BIT" | |
887423c0 | 4298 | "@ |
4299 | cvtsi2ssq\t{%2, %0|%0, %2} | |
4300 | cvtsi2ssq\t{%2, %0|%0, %2} | |
88048095 | 4301 | vcvtsi2ssq\t{%2, <round_op3>%1, %0|%0, %1<round_op3>, %2}" |
887423c0 | 4302 | [(set_attr "isa" "noavx,noavx,avx") |
4303 | (set_attr "type" "sseicvt") | |
4304 | (set_attr "athlon_decode" "vector,double,*") | |
4305 | (set_attr "amdfam10_decode" "vector,double,*") | |
4306 | (set_attr "bdver1_decode" "double,direct,*") | |
6470d004 | 4307 | (set_attr "btver2_decode" "double,double,double") |
887423c0 | 4308 | (set_attr "length_vex" "*,*,4") |
4309 | (set_attr "prefix_rex" "1,1,*") | |
4c1099de | 4310 | (set_attr "prefix" "orig,orig,maybe_evex") |
2a466fea | 4311 | (set_attr "mode" "SF")]) |
4312 | ||
be60ab96 | 4313 | (define_insn "sse_cvtss2si<round_name>" |
2a466fea | 4314 | [(set (match_operand:SI 0 "register_operand" "=r,r") |
4315 | (unspec:SI | |
4316 | [(vec_select:SF | |
be60ab96 | 4317 | (match_operand:V4SF 1 "<round_nimm_predicate>" "v,<round_constraint2>") |
2a466fea | 4318 | (parallel [(const_int 0)]))] |
4319 | UNSPEC_FIX_NOTRUNC))] | |
4320 | "TARGET_SSE" | |
be60ab96 | 4321 | "%vcvtss2si\t{<round_op2>%1, %0|%0, %k1<round_op2>}" |
2a466fea | 4322 | [(set_attr "type" "sseicvt") |
4323 | (set_attr "athlon_decode" "double,vector") | |
97436e92 | 4324 | (set_attr "bdver1_decode" "double,double") |
2a466fea | 4325 | (set_attr "prefix_rep" "1") |
ed30e0a6 | 4326 | (set_attr "prefix" "maybe_vex") |
2a466fea | 4327 | (set_attr "mode" "SI")]) |
4328 | ||
4329 | (define_insn "sse_cvtss2si_2" | |
4330 | [(set (match_operand:SI 0 "register_operand" "=r,r") | |
e13e1b39 | 4331 | (unspec:SI [(match_operand:SF 1 "nonimmediate_operand" "v,m")] |
2a466fea | 4332 | UNSPEC_FIX_NOTRUNC))] |
4333 | "TARGET_SSE" | |
c358a059 | 4334 | "%vcvtss2si\t{%1, %0|%0, %k1}" |
2a466fea | 4335 | [(set_attr "type" "sseicvt") |
4336 | (set_attr "athlon_decode" "double,vector") | |
4337 | (set_attr "amdfam10_decode" "double,double") | |
97436e92 | 4338 | (set_attr "bdver1_decode" "double,double") |
2a466fea | 4339 | (set_attr "prefix_rep" "1") |
ed30e0a6 | 4340 | (set_attr "prefix" "maybe_vex") |
2a466fea | 4341 | (set_attr "mode" "SI")]) |
4342 | ||
be60ab96 | 4343 | (define_insn "sse_cvtss2siq<round_name>" |
2a466fea | 4344 | [(set (match_operand:DI 0 "register_operand" "=r,r") |
4345 | (unspec:DI | |
4346 | [(vec_select:SF | |
be60ab96 | 4347 | (match_operand:V4SF 1 "<round_nimm_predicate>" "v,<round_constraint2>") |
2a466fea | 4348 | (parallel [(const_int 0)]))] |
4349 | UNSPEC_FIX_NOTRUNC))] | |
4350 | "TARGET_SSE && TARGET_64BIT" | |
be60ab96 | 4351 | "%vcvtss2si{q}\t{<round_op2>%1, %0|%0, %k1<round_op2>}" |
2a466fea | 4352 | [(set_attr "type" "sseicvt") |
4353 | (set_attr "athlon_decode" "double,vector") | |
97436e92 | 4354 | (set_attr "bdver1_decode" "double,double") |
2a466fea | 4355 | (set_attr "prefix_rep" "1") |
ed30e0a6 | 4356 | (set_attr "prefix" "maybe_vex") |
2a466fea | 4357 | (set_attr "mode" "DI")]) |
4358 | ||
4359 | (define_insn "sse_cvtss2siq_2" | |
4360 | [(set (match_operand:DI 0 "register_operand" "=r,r") | |
4c1099de | 4361 | (unspec:DI [(match_operand:SF 1 "nonimmediate_operand" "v,m")] |
2a466fea | 4362 | UNSPEC_FIX_NOTRUNC))] |
4363 | "TARGET_SSE && TARGET_64BIT" | |
c358a059 | 4364 | "%vcvtss2si{q}\t{%1, %0|%0, %k1}" |
2a466fea | 4365 | [(set_attr "type" "sseicvt") |
4366 | (set_attr "athlon_decode" "double,vector") | |
4367 | (set_attr "amdfam10_decode" "double,double") | |
97436e92 | 4368 | (set_attr "bdver1_decode" "double,double") |
2a466fea | 4369 | (set_attr "prefix_rep" "1") |
ed30e0a6 | 4370 | (set_attr "prefix" "maybe_vex") |
2a466fea | 4371 | (set_attr "mode" "DI")]) |
4372 | ||
dbfe84d5 | 4373 | (define_insn "sse_cvttss2si<round_saeonly_name>" |
2a466fea | 4374 | [(set (match_operand:SI 0 "register_operand" "=r,r") |
4375 | (fix:SI | |
4376 | (vec_select:SF | |
dbfe84d5 | 4377 | (match_operand:V4SF 1 "<round_saeonly_nimm_predicate>" "v,<round_saeonly_constraint2>") |
2a466fea | 4378 | (parallel [(const_int 0)]))))] |
4379 | "TARGET_SSE" | |
dbfe84d5 | 4380 | "%vcvttss2si\t{<round_saeonly_op2>%1, %0|%0, %k1<round_saeonly_op2>}" |
2a466fea | 4381 | [(set_attr "type" "sseicvt") |
4382 | (set_attr "athlon_decode" "double,vector") | |
4383 | (set_attr "amdfam10_decode" "double,double") | |
97436e92 | 4384 | (set_attr "bdver1_decode" "double,double") |
2a466fea | 4385 | (set_attr "prefix_rep" "1") |
ed30e0a6 | 4386 | (set_attr "prefix" "maybe_vex") |
2a466fea | 4387 | (set_attr "mode" "SI")]) |
4388 | ||
dbfe84d5 | 4389 | (define_insn "sse_cvttss2siq<round_saeonly_name>" |
2a466fea | 4390 | [(set (match_operand:DI 0 "register_operand" "=r,r") |
4391 | (fix:DI | |
4392 | (vec_select:SF | |
dbfe84d5 | 4393 | (match_operand:V4SF 1 "<round_saeonly_nimm_predicate>" "v,<round_saeonly_constraint>") |
2a466fea | 4394 | (parallel [(const_int 0)]))))] |
4395 | "TARGET_SSE && TARGET_64BIT" | |
dbfe84d5 | 4396 | "%vcvttss2si{q}\t{<round_saeonly_op2>%1, %0|%0, %k1<round_saeonly_op2>}" |
2a466fea | 4397 | [(set_attr "type" "sseicvt") |
4398 | (set_attr "athlon_decode" "double,vector") | |
4399 | (set_attr "amdfam10_decode" "double,double") | |
97436e92 | 4400 | (set_attr "bdver1_decode" "double,double") |
2a466fea | 4401 | (set_attr "prefix_rep" "1") |
ed30e0a6 | 4402 | (set_attr "prefix" "maybe_vex") |
2a466fea | 4403 | (set_attr "mode" "DI")]) |
4404 | ||
be60ab96 | 4405 | (define_insn "cvtusi2<ssescalarmodesuffix>32<round_name>" |
697a43f8 | 4406 | [(set (match_operand:VF_128 0 "register_operand" "=v") |
4407 | (vec_merge:VF_128 | |
4408 | (vec_duplicate:VF_128 | |
4409 | (unsigned_float:<ssescalarmode> | |
be60ab96 | 4410 | (match_operand:SI 2 "<round_nimm_predicate>" "<round_constraint3>"))) |
697a43f8 | 4411 | (match_operand:VF_128 1 "register_operand" "v") |
4412 | (const_int 1)))] | |
be60ab96 | 4413 | "TARGET_AVX512F && <round_modev4sf_condition>" |
88048095 | 4414 | "vcvtusi2<ssescalarmodesuffix>\t{%2, <round_op3>%1, %0|%0, %1<round_op3>, %2}" |
697a43f8 | 4415 | [(set_attr "type" "sseicvt") |
4416 | (set_attr "prefix" "evex") | |
4417 | (set_attr "mode" "<ssescalarmode>")]) | |
4418 | ||
be60ab96 | 4419 | (define_insn "cvtusi2<ssescalarmodesuffix>64<round_name>" |
697a43f8 | 4420 | [(set (match_operand:VF_128 0 "register_operand" "=v") |
4421 | (vec_merge:VF_128 | |
4422 | (vec_duplicate:VF_128 | |
4423 | (unsigned_float:<ssescalarmode> | |
be60ab96 | 4424 | (match_operand:DI 2 "<round_nimm_predicate>" "<round_constraint3>"))) |
697a43f8 | 4425 | (match_operand:VF_128 1 "register_operand" "v") |
4426 | (const_int 1)))] | |
4427 | "TARGET_AVX512F && TARGET_64BIT" | |
88048095 | 4428 | "vcvtusi2<ssescalarmodesuffix>\t{%2, <round_op3>%1, %0|%0, %1<round_op3>, %2}" |
697a43f8 | 4429 | [(set_attr "type" "sseicvt") |
4430 | (set_attr "prefix" "evex") | |
4431 | (set_attr "mode" "<ssescalarmode>")]) | |
4432 | ||
be60ab96 | 4433 | (define_insn "float<sseintvecmodelower><mode>2<mask_name><round_name>" |
e13e1b39 | 4434 | [(set (match_operand:VF1 0 "register_operand" "=v") |
d6b69370 | 4435 | (float:VF1 |
be60ab96 | 4436 | (match_operand:<sseintvecmode> 1 "<round_nimm_predicate>" "<round_constraint>")))] |
4437 | "TARGET_SSE2 && <mask_mode512bit_condition> && <round_mode512bit_condition>" | |
4438 | "%vcvtdq2ps\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}" | |
2a466fea | 4439 | [(set_attr "type" "ssecvt") |
887423c0 | 4440 | (set_attr "prefix" "maybe_vex") |
d6b69370 | 4441 | (set_attr "mode" "<sseinsnmode>")]) |
2a466fea | 4442 | |
040236d9 | 4443 | (define_insn "ufloat<sseintvecmodelower><mode>2<mask_name><round_name>" |
4444 | [(set (match_operand:VF1_AVX512VL 0 "register_operand" "=v") | |
4445 | (unsigned_float:VF1_AVX512VL | |
4446 | (match_operand:<sseintvecmode> 1 "nonimmediate_operand" "<round_constraint>")))] | |
697a43f8 | 4447 | "TARGET_AVX512F" |
be60ab96 | 4448 | "vcvtudq2ps\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}" |
697a43f8 | 4449 | [(set_attr "type" "ssecvt") |
4450 | (set_attr "prefix" "evex") | |
040236d9 | 4451 | (set_attr "mode" "<MODE>")]) |
697a43f8 | 4452 | |
d6b69370 | 4453 | (define_expand "floatuns<sseintvecmodelower><mode>2" |
abd4f58b | 4454 | [(match_operand:VF1 0 "register_operand") |
4455 | (match_operand:<sseintvecmode> 1 "register_operand")] | |
e39ec8de | 4456 | "TARGET_SSE2 && (<MODE>mode == V4SFmode || TARGET_AVX2)" |
e42ab82a | 4457 | { |
1706116d | 4458 | if (<MODE>mode == V16SFmode) |
4459 | emit_insn (gen_ufloatv16siv16sf2 (operands[0], operands[1])); | |
f06d92e4 | 4460 | else |
4461 | if (TARGET_AVX512VL) | |
4462 | { | |
4463 | if (<MODE>mode == V4SFmode) | |
4464 | emit_insn (gen_ufloatv4siv4sf2 (operands[0], operands[1])); | |
4465 | else | |
4466 | emit_insn (gen_ufloatv8siv8sf2 (operands[0], operands[1])); | |
4467 | } | |
1706116d | 4468 | else |
4469 | ix86_expand_vector_convert_uns_vsivsf (operands[0], operands[1]); | |
4470 | ||
e39ec8de | 4471 | DONE; |
e42ab82a | 4472 | }) |
4473 | ||
ed30e0a6 | 4474 | |
bf8e1ae3 | 4475 | ;; For <sse2_avx_avx512f>_fix_notrunc<sf2simodelower><mode> insn pattern |
4476 | (define_mode_attr sf2simodelower | |
4477 | [(V16SI "v16sf") (V8SI "v8sf") (V4SI "v4sf")]) | |
4478 | ||
d135c232 | 4479 | (define_insn "<sse2_avx_avx512f>_fix_notrunc<sf2simodelower><mode><mask_name>" |
bf8e1ae3 | 4480 | [(set (match_operand:VI4_AVX 0 "register_operand" "=v") |
4481 | (unspec:VI4_AVX | |
4482 | [(match_operand:<ssePSmode> 1 "nonimmediate_operand" "vm")] | |
4483 | UNSPEC_FIX_NOTRUNC))] | |
d135c232 | 4484 | "TARGET_SSE2 && <mask_mode512bit_condition>" |
4485 | "%vcvtps2dq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}" | |
2a466fea | 4486 | [(set_attr "type" "ssecvt") |
887423c0 | 4487 | (set (attr "prefix_data16") |
4488 | (if_then_else | |
6be3efec | 4489 | (match_test "TARGET_AVX") |
887423c0 | 4490 | (const_string "*") |
4491 | (const_string "1"))) | |
4492 | (set_attr "prefix" "maybe_vex") | |
bf8e1ae3 | 4493 | (set_attr "mode" "<sseinsnmode>")]) |
2a466fea | 4494 | |
be60ab96 | 4495 | (define_insn "<mask_codefor>avx512f_fix_notruncv16sfv16si<mask_name><round_name>" |
697a43f8 | 4496 | [(set (match_operand:V16SI 0 "register_operand" "=v") |
4497 | (unspec:V16SI | |
be60ab96 | 4498 | [(match_operand:V16SF 1 "<round_nimm_predicate>" "<round_constraint>")] |
697a43f8 | 4499 | UNSPEC_FIX_NOTRUNC))] |
4500 | "TARGET_AVX512F" | |
be60ab96 | 4501 | "vcvtps2dq\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}" |
697a43f8 | 4502 | [(set_attr "type" "ssecvt") |
4503 | (set_attr "prefix" "evex") | |
4504 | (set_attr "mode" "XI")]) | |
4505 | ||
7da26bee | 4506 | (define_insn "<mask_codefor><avx512>_ufix_notrunc<sf2simodelower><mode><mask_name><round_name>" |
4507 | [(set (match_operand:VI4_AVX512VL 0 "register_operand" "=v") | |
4508 | (unspec:VI4_AVX512VL | |
4509 | [(match_operand:<ssePSmode> 1 "nonimmediate_operand" "<round_constraint>")] | |
697a43f8 | 4510 | UNSPEC_UNSIGNED_FIX_NOTRUNC))] |
4511 | "TARGET_AVX512F" | |
be60ab96 | 4512 | "vcvtps2udq\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}" |
697a43f8 | 4513 | [(set_attr "type" "ssecvt") |
4514 | (set_attr "prefix" "evex") | |
7da26bee | 4515 | (set_attr "mode" "<sseinsnmode>")]) |
697a43f8 | 4516 | |
9bb6f354 | 4517 | (define_insn "<mask_codefor>avx512dq_cvtps2qq<mode><mask_name><round_name>" |
4518 | [(set (match_operand:VI8_256_512 0 "register_operand" "=v") | |
4519 | (unspec:VI8_256_512 [(match_operand:<ssePSmode2> 1 "nonimmediate_operand" "<round_constraint>")] | |
4520 | UNSPEC_FIX_NOTRUNC))] | |
4521 | "TARGET_AVX512DQ && <round_mode512bit_condition>" | |
4522 | "vcvtps2qq\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}" | |
4523 | [(set_attr "type" "ssecvt") | |
4524 | (set_attr "prefix" "evex") | |
4525 | (set_attr "mode" "<sseinsnmode>")]) | |
4526 | ||
4527 | (define_insn "<mask_codefor>avx512dq_cvtps2qqv2di<mask_name>" | |
4528 | [(set (match_operand:V2DI 0 "register_operand" "=v") | |
4529 | (unspec:V2DI | |
4530 | [(vec_select:V2SF | |
4531 | (match_operand:V4SF 1 "nonimmediate_operand" "vm") | |
4532 | (parallel [(const_int 0) (const_int 1)]))] | |
4533 | UNSPEC_FIX_NOTRUNC))] | |
4534 | "TARGET_AVX512DQ && TARGET_AVX512VL" | |
4535 | "vcvtps2qq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}" | |
4536 | [(set_attr "type" "ssecvt") | |
4537 | (set_attr "prefix" "evex") | |
4538 | (set_attr "mode" "TI")]) | |
4539 | ||
4540 | (define_insn "<mask_codefor>avx512dq_cvtps2uqq<mode><mask_name><round_name>" | |
4541 | [(set (match_operand:VI8_256_512 0 "register_operand" "=v") | |
4542 | (unspec:VI8_256_512 [(match_operand:<ssePSmode2> 1 "nonimmediate_operand" "<round_constraint>")] | |
4543 | UNSPEC_UNSIGNED_FIX_NOTRUNC))] | |
4544 | "TARGET_AVX512DQ && <round_mode512bit_condition>" | |
4545 | "vcvtps2uqq\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}" | |
4546 | [(set_attr "type" "ssecvt") | |
4547 | (set_attr "prefix" "evex") | |
4548 | (set_attr "mode" "<sseinsnmode>")]) | |
4549 | ||
4550 | (define_insn "<mask_codefor>avx512dq_cvtps2uqqv2di<mask_name>" | |
4551 | [(set (match_operand:V2DI 0 "register_operand" "=v") | |
4552 | (unspec:V2DI | |
4553 | [(vec_select:V2SF | |
4554 | (match_operand:V4SF 1 "nonimmediate_operand" "vm") | |
4555 | (parallel [(const_int 0) (const_int 1)]))] | |
4556 | UNSPEC_UNSIGNED_FIX_NOTRUNC))] | |
4557 | "TARGET_AVX512DQ && TARGET_AVX512VL" | |
4558 | "vcvtps2uqq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}" | |
4559 | [(set_attr "type" "ssecvt") | |
4560 | (set_attr "prefix" "evex") | |
4561 | (set_attr "mode" "TI")]) | |
4562 | ||
dbfe84d5 | 4563 | (define_insn "<fixsuffix>fix_truncv16sfv16si2<mask_name><round_saeonly_name>" |
03ae25dc | 4564 | [(set (match_operand:V16SI 0 "register_operand" "=v") |
4565 | (any_fix:V16SI | |
dbfe84d5 | 4566 | (match_operand:V16SF 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")))] |
03ae25dc | 4567 | "TARGET_AVX512F" |
dbfe84d5 | 4568 | "vcvttps2<fixsuffix>dq\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}" |
03ae25dc | 4569 | [(set_attr "type" "ssecvt") |
4570 | (set_attr "prefix" "evex") | |
4571 | (set_attr "mode" "XI")]) | |
4572 | ||
d135c232 | 4573 | (define_insn "fix_truncv8sfv8si2<mask_name>" |
4574 | [(set (match_operand:V8SI 0 "register_operand" "=v") | |
4575 | (fix:V8SI (match_operand:V8SF 1 "nonimmediate_operand" "vm")))] | |
4576 | "TARGET_AVX && <mask_avx512vl_condition>" | |
4577 | "vcvttps2dq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}" | |
ed30e0a6 | 4578 | [(set_attr "type" "ssecvt") |
d135c232 | 4579 | (set_attr "prefix" "<mask_prefix>") |
887423c0 | 4580 | (set_attr "mode" "OI")]) |
ed30e0a6 | 4581 | |
d135c232 | 4582 | (define_insn "fix_truncv4sfv4si2<mask_name>" |
4583 | [(set (match_operand:V4SI 0 "register_operand" "=v") | |
4584 | (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "vm")))] | |
4585 | "TARGET_SSE2 && <mask_avx512vl_condition>" | |
4586 | "%vcvttps2dq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}" | |
2a466fea | 4587 | [(set_attr "type" "ssecvt") |
887423c0 | 4588 | (set (attr "prefix_rep") |
4589 | (if_then_else | |
6be3efec | 4590 | (match_test "TARGET_AVX") |
887423c0 | 4591 | (const_string "*") |
4592 | (const_string "1"))) | |
4593 | (set (attr "prefix_data16") | |
4594 | (if_then_else | |
6be3efec | 4595 | (match_test "TARGET_AVX") |
887423c0 | 4596 | (const_string "*") |
4597 | (const_string "0"))) | |
00a0e418 | 4598 | (set_attr "prefix_data16" "0") |
d135c232 | 4599 | (set_attr "prefix" "<mask_prefix2>") |
2a466fea | 4600 | (set_attr "mode" "TI")]) |
4601 | ||
f155d038 | 4602 | (define_expand "fixuns_trunc<mode><sseintvecmodelower>2" |
abd4f58b | 4603 | [(match_operand:<sseintvecmode> 0 "register_operand") |
4604 | (match_operand:VF1 1 "register_operand")] | |
da38df18 | 4605 | "TARGET_SSE2" |
f155d038 | 4606 | { |
1706116d | 4607 | if (<MODE>mode == V16SFmode) |
4608 | emit_insn (gen_ufix_truncv16sfv16si2 (operands[0], | |
4609 | operands[1])); | |
4610 | else | |
4611 | { | |
4612 | rtx tmp[3]; | |
4613 | tmp[0] = ix86_expand_adjust_ufix_to_sfix_si (operands[1], &tmp[2]); | |
4614 | tmp[1] = gen_reg_rtx (<sseintvecmode>mode); | |
4615 | emit_insn (gen_fix_trunc<mode><sseintvecmodelower>2 (tmp[1], tmp[0])); | |
4616 | emit_insn (gen_xor<sseintvecmodelower>3 (operands[0], tmp[1], tmp[2])); | |
4617 | } | |
c152f9e5 | 4618 | DONE; |
f155d038 | 4619 | }) |
4620 | ||
2a466fea | 4621 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; |
4622 | ;; | |
4623 | ;; Parallel double-precision floating point conversion operations | |
5802c0cb | 4624 | ;; |
4625 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | |
4626 | ||
2a466fea | 4627 | (define_insn "sse2_cvtpi2pd" |
4628 | [(set (match_operand:V2DF 0 "register_operand" "=x,x") | |
4629 | (float:V2DF (match_operand:V2SI 1 "nonimmediate_operand" "y,m")))] | |
5802c0cb | 4630 | "TARGET_SSE2" |
2a466fea | 4631 | "cvtpi2pd\t{%1, %0|%0, %1}" |
4632 | [(set_attr "type" "ssecvt") | |
4633 | (set_attr "unit" "mmx,*") | |
00a0e418 | 4634 | (set_attr "prefix_data16" "1,*") |
2a466fea | 4635 | (set_attr "mode" "V2DF")]) |
5802c0cb | 4636 | |
2a466fea | 4637 | (define_insn "sse2_cvtpd2pi" |
4638 | [(set (match_operand:V2SI 0 "register_operand" "=y") | |
4639 | (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")] | |
4640 | UNSPEC_FIX_NOTRUNC))] | |
5802c0cb | 4641 | "TARGET_SSE2" |
2a466fea | 4642 | "cvtpd2pi\t{%1, %0|%0, %1}" |
4643 | [(set_attr "type" "ssecvt") | |
4644 | (set_attr "unit" "mmx") | |
887423c0 | 4645 | (set_attr "bdver1_decode" "double") |
6470d004 | 4646 | (set_attr "btver2_decode" "direct") |
2a466fea | 4647 | (set_attr "prefix_data16" "1") |
887423c0 | 4648 | (set_attr "mode" "DI")]) |
5802c0cb | 4649 | |
2a466fea | 4650 | (define_insn "sse2_cvttpd2pi" |
4651 | [(set (match_operand:V2SI 0 "register_operand" "=y") | |
4652 | (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "xm")))] | |
5802c0cb | 4653 | "TARGET_SSE2" |
2a466fea | 4654 | "cvttpd2pi\t{%1, %0|%0, %1}" |
4655 | [(set_attr "type" "ssecvt") | |
4656 | (set_attr "unit" "mmx") | |
887423c0 | 4657 | (set_attr "bdver1_decode" "double") |
2a466fea | 4658 | (set_attr "prefix_data16" "1") |
887423c0 | 4659 | (set_attr "mode" "TI")]) |
ed30e0a6 | 4660 | |
2a466fea | 4661 | (define_insn "sse2_cvtsi2sd" |
3cd66da2 | 4662 | [(set (match_operand:V2DF 0 "register_operand" "=x,x,v") |
5802c0cb | 4663 | (vec_merge:V2DF |
2a466fea | 4664 | (vec_duplicate:V2DF |
887423c0 | 4665 | (float:DF (match_operand:SI 2 "nonimmediate_operand" "r,m,rm"))) |
3cd66da2 | 4666 | (match_operand:V2DF 1 "register_operand" "0,0,v") |
5802c0cb | 4667 | (const_int 1)))] |
5802c0cb | 4668 | "TARGET_SSE2" |
887423c0 | 4669 | "@ |
4670 | cvtsi2sd\t{%2, %0|%0, %2} | |
4671 | cvtsi2sd\t{%2, %0|%0, %2} | |
4672 | vcvtsi2sd\t{%2, %1, %0|%0, %1, %2}" | |
4673 | [(set_attr "isa" "noavx,noavx,avx") | |
4674 | (set_attr "type" "sseicvt") | |
4675 | (set_attr "athlon_decode" "double,direct,*") | |
4676 | (set_attr "amdfam10_decode" "vector,double,*") | |
4677 | (set_attr "bdver1_decode" "double,direct,*") | |
6470d004 | 4678 | (set_attr "btver2_decode" "double,double,double") |
4c9faaa4 | 4679 | (set_attr "znver1_decode" "double,double,double") |
3cd66da2 | 4680 | (set_attr "prefix" "orig,orig,maybe_evex") |
ed30e0a6 | 4681 | (set_attr "mode" "DF")]) |
4682 | ||
be60ab96 | 4683 | (define_insn "sse2_cvtsi2sdq<round_name>" |
4c1099de | 4684 | [(set (match_operand:V2DF 0 "register_operand" "=x,x,v") |
5802c0cb | 4685 | (vec_merge:V2DF |
2a466fea | 4686 | (vec_duplicate:V2DF |
be60ab96 | 4687 | (float:DF (match_operand:DI 2 "<round_nimm_predicate>" "r,m,<round_constraint3>"))) |
4c1099de | 4688 | (match_operand:V2DF 1 "register_operand" "0,0,v") |
5802c0cb | 4689 | (const_int 1)))] |
2a466fea | 4690 | "TARGET_SSE2 && TARGET_64BIT" |
887423c0 | 4691 | "@ |
4692 | cvtsi2sdq\t{%2, %0|%0, %2} | |
4693 | cvtsi2sdq\t{%2, %0|%0, %2} | |
88048095 | 4694 | vcvtsi2sdq\t{%2, <round_op3>%1, %0|%0, %1<round_op3>, %2}" |
887423c0 | 4695 | [(set_attr "isa" "noavx,noavx,avx") |
4696 | (set_attr "type" "sseicvt") | |
4697 | (set_attr "athlon_decode" "double,direct,*") | |
4698 | (set_attr "amdfam10_decode" "vector,double,*") | |
4699 | (set_attr "bdver1_decode" "double,direct,*") | |
4700 | (set_attr "length_vex" "*,*,4") | |
4701 | (set_attr "prefix_rex" "1,1,*") | |
4c1099de | 4702 | (set_attr "prefix" "orig,orig,maybe_evex") |
887423c0 | 4703 | (set_attr "mode" "DF")]) |
2a466fea | 4704 | |
be60ab96 | 4705 | (define_insn "avx512f_vcvtss2usi<round_name>" |
697a43f8 | 4706 | [(set (match_operand:SI 0 "register_operand" "=r") |
4707 | (unspec:SI | |
4708 | [(vec_select:SF | |
be60ab96 | 4709 | (match_operand:V4SF 1 "<round_nimm_predicate>" "<round_constraint>") |
697a43f8 | 4710 | (parallel [(const_int 0)]))] |
4711 | UNSPEC_UNSIGNED_FIX_NOTRUNC))] | |
4712 | "TARGET_AVX512F" | |
be60ab96 | 4713 | "vcvtss2usi\t{<round_op2>%1, %0|%0, %1<round_op2>}" |
697a43f8 | 4714 | [(set_attr "type" "sseicvt") |
4715 | (set_attr "prefix" "evex") | |
4716 | (set_attr "mode" "SI")]) | |
4717 | ||
be60ab96 | 4718 | (define_insn "avx512f_vcvtss2usiq<round_name>" |
697a43f8 | 4719 | [(set (match_operand:DI 0 "register_operand" "=r") |
4720 | (unspec:DI | |
4721 | [(vec_select:SF | |
be60ab96 | 4722 | (match_operand:V4SF 1 "<round_nimm_predicate>" "<round_constraint>") |
697a43f8 | 4723 | (parallel [(const_int 0)]))] |
4724 | UNSPEC_UNSIGNED_FIX_NOTRUNC))] | |
4725 | "TARGET_AVX512F && TARGET_64BIT" | |
be60ab96 | 4726 | "vcvtss2usi\t{<round_op2>%1, %0|%0, %1<round_op2>}" |
697a43f8 | 4727 | [(set_attr "type" "sseicvt") |
4728 | (set_attr "prefix" "evex") | |
4729 | (set_attr "mode" "DI")]) | |
4730 | ||
dbfe84d5 | 4731 | (define_insn "avx512f_vcvttss2usi<round_saeonly_name>" |
697a43f8 | 4732 | [(set (match_operand:SI 0 "register_operand" "=r") |
4733 | (unsigned_fix:SI | |
4734 | (vec_select:SF | |
dbfe84d5 | 4735 | (match_operand:V4SF 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>") |
697a43f8 | 4736 | (parallel [(const_int 0)]))))] |
4737 | "TARGET_AVX512F" | |
dbfe84d5 | 4738 | "vcvttss2usi\t{<round_saeonly_op2>%1, %0|%0, %1<round_saeonly_op2>}" |
697a43f8 | 4739 | [(set_attr "type" "sseicvt") |
4740 | (set_attr "prefix" "evex") | |
4741 | (set_attr "mode" "SI")]) | |
4742 | ||
dbfe84d5 | 4743 | (define_insn "avx512f_vcvttss2usiq<round_saeonly_name>" |
697a43f8 | 4744 | [(set (match_operand:DI 0 "register_operand" "=r") |
4745 | (unsigned_fix:DI | |
4746 | (vec_select:SF | |
dbfe84d5 | 4747 | (match_operand:V4SF 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>") |
697a43f8 | 4748 | (parallel [(const_int 0)]))))] |
4749 | "TARGET_AVX512F && TARGET_64BIT" | |
dbfe84d5 | 4750 | "vcvttss2usi\t{<round_saeonly_op2>%1, %0|%0, %1<round_saeonly_op2>}" |
697a43f8 | 4751 | [(set_attr "type" "sseicvt") |
4752 | (set_attr "prefix" "evex") | |
4753 | (set_attr "mode" "DI")]) | |
4754 | ||
be60ab96 | 4755 | (define_insn "avx512f_vcvtsd2usi<round_name>" |
697a43f8 | 4756 | [(set (match_operand:SI 0 "register_operand" "=r") |
4757 | (unspec:SI | |
4758 | [(vec_select:DF | |
be60ab96 | 4759 | (match_operand:V2DF 1 "<round_nimm_predicate>" "<round_constraint>") |
697a43f8 | 4760 | (parallel [(const_int 0)]))] |
4761 | UNSPEC_UNSIGNED_FIX_NOTRUNC))] | |
4762 | "TARGET_AVX512F" | |
be60ab96 | 4763 | "vcvtsd2usi\t{<round_op2>%1, %0|%0, %1<round_op2>}" |
697a43f8 | 4764 | [(set_attr "type" "sseicvt") |
4765 | (set_attr "prefix" "evex") | |
4766 | (set_attr "mode" "SI")]) | |
4767 | ||
be60ab96 | 4768 | (define_insn "avx512f_vcvtsd2usiq<round_name>" |
697a43f8 | 4769 | [(set (match_operand:DI 0 "register_operand" "=r") |
4770 | (unspec:DI | |
4771 | [(vec_select:DF | |
be60ab96 | 4772 | (match_operand:V2DF 1 "<round_nimm_predicate>" "<round_constraint>") |
697a43f8 | 4773 | (parallel [(const_int 0)]))] |
4774 | UNSPEC_UNSIGNED_FIX_NOTRUNC))] | |
4775 | "TARGET_AVX512F && TARGET_64BIT" | |
be60ab96 | 4776 | "vcvtsd2usi\t{<round_op2>%1, %0|%0, %1<round_op2>}" |
697a43f8 | 4777 | [(set_attr "type" "sseicvt") |
4778 | (set_attr "prefix" "evex") | |
4779 | (set_attr "mode" "DI")]) | |
4780 | ||
dbfe84d5 | 4781 | (define_insn "avx512f_vcvttsd2usi<round_saeonly_name>" |
697a43f8 | 4782 | [(set (match_operand:SI 0 "register_operand" "=r") |
4783 | (unsigned_fix:SI | |
4784 | (vec_select:DF | |
dbfe84d5 | 4785 | (match_operand:V2DF 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>") |
697a43f8 | 4786 | (parallel [(const_int 0)]))))] |
4787 | "TARGET_AVX512F" | |
dbfe84d5 | 4788 | "vcvttsd2usi\t{<round_saeonly_op2>%1, %0|%0, %1<round_saeonly_op2>}" |
697a43f8 | 4789 | [(set_attr "type" "sseicvt") |
4790 | (set_attr "prefix" "evex") | |
4791 | (set_attr "mode" "SI")]) | |
4792 | ||
dbfe84d5 | 4793 | (define_insn "avx512f_vcvttsd2usiq<round_saeonly_name>" |
697a43f8 | 4794 | [(set (match_operand:DI 0 "register_operand" "=r") |
4795 | (unsigned_fix:DI | |
4796 | (vec_select:DF | |
dbfe84d5 | 4797 | (match_operand:V2DF 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>") |
697a43f8 | 4798 | (parallel [(const_int 0)]))))] |
4799 | "TARGET_AVX512F && TARGET_64BIT" | |
dbfe84d5 | 4800 | "vcvttsd2usi\t{<round_saeonly_op2>%1, %0|%0, %1<round_saeonly_op2>}" |
697a43f8 | 4801 | [(set_attr "type" "sseicvt") |
4802 | (set_attr "prefix" "evex") | |
4803 | (set_attr "mode" "DI")]) | |
4804 | ||
be60ab96 | 4805 | (define_insn "sse2_cvtsd2si<round_name>" |
2a466fea | 4806 | [(set (match_operand:SI 0 "register_operand" "=r,r") |
4807 | (unspec:SI | |
4808 | [(vec_select:DF | |
be60ab96 | 4809 | (match_operand:V2DF 1 "<round_nimm_predicate>" "v,<round_constraint2>") |
2a466fea | 4810 | (parallel [(const_int 0)]))] |
4811 | UNSPEC_FIX_NOTRUNC))] | |
5802c0cb | 4812 | "TARGET_SSE2" |
be60ab96 | 4813 | "%vcvtsd2si\t{<round_op2>%1, %0|%0, %q1<round_op2>}" |
2a466fea | 4814 | [(set_attr "type" "sseicvt") |
4815 | (set_attr "athlon_decode" "double,vector") | |
97436e92 | 4816 | (set_attr "bdver1_decode" "double,double") |
6470d004 | 4817 | (set_attr "btver2_decode" "double,double") |
2a466fea | 4818 | (set_attr "prefix_rep" "1") |
ed30e0a6 | 4819 | (set_attr "prefix" "maybe_vex") |
2a466fea | 4820 | (set_attr "mode" "SI")]) |
5802c0cb | 4821 | |
2a466fea | 4822 | (define_insn "sse2_cvtsd2si_2" |
4823 | [(set (match_operand:SI 0 "register_operand" "=r,r") | |
e13e1b39 | 4824 | (unspec:SI [(match_operand:DF 1 "nonimmediate_operand" "v,m")] |
2a466fea | 4825 | UNSPEC_FIX_NOTRUNC))] |
5802c0cb | 4826 | "TARGET_SSE2" |
c358a059 | 4827 | "%vcvtsd2si\t{%1, %0|%0, %q1}" |
2a466fea | 4828 | [(set_attr "type" "sseicvt") |
4829 | (set_attr "athlon_decode" "double,vector") | |
4830 | (set_attr "amdfam10_decode" "double,double") | |
97436e92 | 4831 | (set_attr "bdver1_decode" "double,double") |
2a466fea | 4832 | (set_attr "prefix_rep" "1") |
ed30e0a6 | 4833 | (set_attr "prefix" "maybe_vex") |
2a466fea | 4834 | (set_attr "mode" "SI")]) |
5802c0cb | 4835 | |
be60ab96 | 4836 | (define_insn "sse2_cvtsd2siq<round_name>" |
2a466fea | 4837 | [(set (match_operand:DI 0 "register_operand" "=r,r") |
4838 | (unspec:DI | |
4839 | [(vec_select:DF | |
be60ab96 | 4840 | (match_operand:V2DF 1 "<round_nimm_predicate>" "v,<round_constraint2>") |
2a466fea | 4841 | (parallel [(const_int 0)]))] |
4842 | UNSPEC_FIX_NOTRUNC))] | |
4843 | "TARGET_SSE2 && TARGET_64BIT" | |
be60ab96 | 4844 | "%vcvtsd2si{q}\t{<round_op2>%1, %0|%0, %q1<round_op2>}" |
2a466fea | 4845 | [(set_attr "type" "sseicvt") |
4846 | (set_attr "athlon_decode" "double,vector") | |
97436e92 | 4847 | (set_attr "bdver1_decode" "double,double") |
2a466fea | 4848 | (set_attr "prefix_rep" "1") |
ed30e0a6 | 4849 | (set_attr "prefix" "maybe_vex") |
2a466fea | 4850 | (set_attr "mode" "DI")]) |
5802c0cb | 4851 | |
2a466fea | 4852 | (define_insn "sse2_cvtsd2siq_2" |
4853 | [(set (match_operand:DI 0 "register_operand" "=r,r") | |
e13e1b39 | 4854 | (unspec:DI [(match_operand:DF 1 "nonimmediate_operand" "v,m")] |
2a466fea | 4855 | UNSPEC_FIX_NOTRUNC))] |
4856 | "TARGET_SSE2 && TARGET_64BIT" | |
c358a059 | 4857 | "%vcvtsd2si{q}\t{%1, %0|%0, %q1}" |
2a466fea | 4858 | [(set_attr "type" "sseicvt") |
4859 | (set_attr "athlon_decode" "double,vector") | |
4860 | (set_attr "amdfam10_decode" "double,double") | |
97436e92 | 4861 | (set_attr "bdver1_decode" "double,double") |
2a466fea | 4862 | (set_attr "prefix_rep" "1") |
ed30e0a6 | 4863 | (set_attr "prefix" "maybe_vex") |
2a466fea | 4864 | (set_attr "mode" "DI")]) |
5802c0cb | 4865 | |
dbfe84d5 | 4866 | (define_insn "sse2_cvttsd2si<round_saeonly_name>" |
2a466fea | 4867 | [(set (match_operand:SI 0 "register_operand" "=r,r") |
4868 | (fix:SI | |
4869 | (vec_select:DF | |
dbfe84d5 | 4870 | (match_operand:V2DF 1 "<round_saeonly_nimm_predicate>" "v,<round_saeonly_constraint2>") |
2a466fea | 4871 | (parallel [(const_int 0)]))))] |
5802c0cb | 4872 | "TARGET_SSE2" |
dbfe84d5 | 4873 | "%vcvttsd2si\t{<round_saeonly_op2>%1, %0|%0, %q1<round_saeonly_op2>}" |
2a466fea | 4874 | [(set_attr "type" "sseicvt") |
2a466fea | 4875 | (set_attr "athlon_decode" "double,vector") |
97436e92 | 4876 | (set_attr "amdfam10_decode" "double,double") |
887423c0 | 4877 | (set_attr "bdver1_decode" "double,double") |
6470d004 | 4878 | (set_attr "btver2_decode" "double,double") |
887423c0 | 4879 | (set_attr "prefix_rep" "1") |
4880 | (set_attr "prefix" "maybe_vex") | |
4881 | (set_attr "mode" "SI")]) | |
2a466fea | 4882 | |
dbfe84d5 | 4883 | (define_insn "sse2_cvttsd2siq<round_saeonly_name>" |
2a466fea | 4884 | [(set (match_operand:DI 0 "register_operand" "=r,r") |
4885 | (fix:DI | |
4886 | (vec_select:DF | |
dbfe84d5 | 4887 | (match_operand:V2DF 1 "<round_saeonly_nimm_predicate>" "v,<round_saeonly_constraint2>") |
2a466fea | 4888 | (parallel [(const_int 0)]))))] |
4889 | "TARGET_SSE2 && TARGET_64BIT" | |
dbfe84d5 | 4890 | "%vcvttsd2si{q}\t{<round_saeonly_op2>%1, %0|%0, %q1<round_saeonly_op2>}" |
2a466fea | 4891 | [(set_attr "type" "sseicvt") |
2a466fea | 4892 | (set_attr "athlon_decode" "double,vector") |
97436e92 | 4893 | (set_attr "amdfam10_decode" "double,double") |
887423c0 | 4894 | (set_attr "bdver1_decode" "double,double") |
4895 | (set_attr "prefix_rep" "1") | |
4896 | (set_attr "prefix" "maybe_vex") | |
4897 | (set_attr "mode" "DI")]) | |
5802c0cb | 4898 | |
6615b722 | 4899 | ;; For float<si2dfmode><mode>2 insn pattern |
4900 | (define_mode_attr si2dfmode | |
4901 | [(V8DF "V8SI") (V4DF "V4SI")]) | |
4902 | (define_mode_attr si2dfmodelower | |
4903 | [(V8DF "v8si") (V4DF "v4si")]) | |
4904 | ||
5220cab6 | 4905 | (define_insn "float<si2dfmodelower><mode>2<mask_name>" |
6615b722 | 4906 | [(set (match_operand:VF2_512_256 0 "register_operand" "=v") |
4907 | (float:VF2_512_256 (match_operand:<si2dfmode> 1 "nonimmediate_operand" "vm")))] | |
5220cab6 | 4908 | "TARGET_AVX && <mask_mode512bit_condition>" |
4909 | "vcvtdq2pd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}" | |
ed30e0a6 | 4910 | [(set_attr "type" "ssecvt") |
6615b722 | 4911 | (set_attr "prefix" "maybe_vex") |
4912 | (set_attr "mode" "<MODE>")]) | |
ed30e0a6 | 4913 | |
d3d65e42 | 4914 | (define_insn "<floatsuffix>float<sseintvecmodelower><mode>2<mask_name><round_name>" |
4915 | [(set (match_operand:VF2_AVX512VL 0 "register_operand" "=v") | |
4916 | (any_float:VF2_AVX512VL | |
4917 | (match_operand:<sseintvecmode> 1 "nonimmediate_operand" "vm")))] | |
4918 | "TARGET_AVX512DQ" | |
4919 | "vcvt<floatsuffix>qq2pd\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}" | |
4920 | [(set_attr "type" "ssecvt") | |
4921 | (set_attr "prefix" "evex") | |
4922 | (set_attr "mode" "<MODE>")]) | |
4923 | ||
4924 | ;; For <floatsuffix>float<sselondveclower><mode> insn patterns | |
4925 | (define_mode_attr qq2pssuff | |
4926 | [(V8SF "") (V4SF "{y}")]) | |
4927 | ||
4928 | (define_mode_attr sselongvecmode | |
4929 | [(V8SF "V8DI") (V4SF "V4DI")]) | |
4930 | ||
4931 | (define_mode_attr sselongvecmodelower | |
4932 | [(V8SF "v8di") (V4SF "v4di")]) | |
4933 | ||
4934 | (define_mode_attr sseintvecmode3 | |
4935 | [(V8SF "XI") (V4SF "OI") | |
4936 | (V8DF "OI") (V4DF "TI")]) | |
4937 | ||
4938 | (define_insn "<floatsuffix>float<sselongvecmodelower><mode>2<mask_name><round_name>" | |
4939 | [(set (match_operand:VF1_128_256VL 0 "register_operand" "=v") | |
4940 | (any_float:VF1_128_256VL | |
4941 | (match_operand:<sselongvecmode> 1 "nonimmediate_operand" "<round_constraint>")))] | |
4942 | "TARGET_AVX512DQ && <round_modev8sf_condition>" | |
4943 | "vcvt<floatsuffix>qq2ps<qq2pssuff>\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}" | |
4944 | [(set_attr "type" "ssecvt") | |
4945 | (set_attr "prefix" "evex") | |
4946 | (set_attr "mode" "<MODE>")]) | |
4947 | ||
4948 | (define_insn "*<floatsuffix>floatv2div2sf2" | |
4949 | [(set (match_operand:V4SF 0 "register_operand" "=v") | |
4950 | (vec_concat:V4SF | |
4951 | (any_float:V2SF (match_operand:V2DI 1 "nonimmediate_operand" "vm")) | |
4952 | (const_vector:V2SF [(const_int 0) (const_int 0)])))] | |
4953 | "TARGET_AVX512DQ && TARGET_AVX512VL" | |
4954 | "vcvt<floatsuffix>qq2ps{x}\t{%1, %0|%0, %1}" | |
4955 | [(set_attr "type" "ssecvt") | |
4956 | (set_attr "prefix" "evex") | |
4957 | (set_attr "mode" "V4SF")]) | |
4958 | ||
4959 | (define_insn "<floatsuffix>floatv2div2sf2_mask" | |
4960 | [(set (match_operand:V4SF 0 "register_operand" "=v") | |
4961 | (vec_concat:V4SF | |
4962 | (vec_merge:V2SF | |
4963 | (any_float:V2SF (match_operand:V2DI 1 "nonimmediate_operand" "vm")) | |
4964 | (vec_select:V2SF | |
4965 | (match_operand:V4SF 2 "vector_move_operand" "0C") | |
4966 | (parallel [(const_int 0) (const_int 1)])) | |
4967 | (match_operand:QI 3 "register_operand" "Yk")) | |
4968 | (const_vector:V2SF [(const_int 0) (const_int 0)])))] | |
4969 | "TARGET_AVX512DQ && TARGET_AVX512VL" | |
4970 | "vcvt<floatsuffix>qq2ps{x}\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}" | |
4971 | [(set_attr "type" "ssecvt") | |
4972 | (set_attr "prefix" "evex") | |
4973 | (set_attr "mode" "V4SF")]) | |
4974 | ||
4975 | (define_insn "ufloat<si2dfmodelower><mode>2<mask_name>" | |
4976 | [(set (match_operand:VF2_512_256VL 0 "register_operand" "=v") | |
4977 | (unsigned_float:VF2_512_256VL | |
4978 | (match_operand:<si2dfmode> 1 "nonimmediate_operand" "vm")))] | |
4979 | "TARGET_AVX512F" | |
4980 | "vcvtudq2pd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}" | |
4981 | [(set_attr "type" "ssecvt") | |
4982 | (set_attr "prefix" "evex") | |
4983 | (set_attr "mode" "<MODE>")]) | |
4984 | ||
4985 | (define_insn "ufloatv2siv2df2<mask_name>" | |
4986 | [(set (match_operand:V2DF 0 "register_operand" "=v") | |
4987 | (unsigned_float:V2DF | |
4988 | (vec_select:V2SI | |
4989 | (match_operand:V4SI 1 "nonimmediate_operand" "vm") | |
4990 | (parallel [(const_int 0) (const_int 1)]))))] | |
4991 | "TARGET_AVX512VL" | |
5220cab6 | 4992 | "vcvtudq2pd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}" |
697a43f8 | 4993 | [(set_attr "type" "ssecvt") |
4994 | (set_attr "prefix" "evex") | |
d3d65e42 | 4995 | (set_attr "mode" "V2DF")]) |
697a43f8 | 4996 | |
4997 | (define_insn "avx512f_cvtdq2pd512_2" | |
4998 | [(set (match_operand:V8DF 0 "register_operand" "=v") | |
4999 | (float:V8DF | |
5000 | (vec_select:V8SI | |
5001 | (match_operand:V16SI 1 "nonimmediate_operand" "vm") | |
5002 | (parallel [(const_int 0) (const_int 1) | |
5003 | (const_int 2) (const_int 3) | |
5004 | (const_int 4) (const_int 5) | |
5005 | (const_int 6) (const_int 7)]))))] | |
0c4232b3 | 5006 | "TARGET_AVX512F" |
697a43f8 | 5007 | "vcvtdq2pd\t{%t1, %0|%0, %t1}" |
5008 | [(set_attr "type" "ssecvt") | |
5009 | (set_attr "prefix" "evex") | |
5010 | (set_attr "mode" "V8DF")]) | |
5011 | ||
ded0808e | 5012 | (define_insn "avx_cvtdq2pd256_2" |
0c4232b3 | 5013 | [(set (match_operand:V4DF 0 "register_operand" "=v") |
8cedf886 | 5014 | (float:V4DF |
5015 | (vec_select:V4SI | |
0c4232b3 | 5016 | (match_operand:V8SI 1 "nonimmediate_operand" "vm") |
887423c0 | 5017 | (parallel [(const_int 0) (const_int 1) |
5018 | (const_int 2) (const_int 3)]))))] | |
8cedf886 | 5019 | "TARGET_AVX" |
5020 | "vcvtdq2pd\t{%x1, %0|%0, %x1}" | |
5021 | [(set_attr "type" "ssecvt") | |
0c4232b3 | 5022 | (set_attr "prefix" "maybe_evex") |
8cedf886 | 5023 | (set_attr "mode" "V4DF")]) |
5024 | ||
0c4232b3 | 5025 | (define_insn "sse2_cvtdq2pd<mask_name>" |
5026 | [(set (match_operand:V2DF 0 "register_operand" "=v") | |
2a466fea | 5027 | (float:V2DF |
5028 | (vec_select:V2SI | |
0c4232b3 | 5029 | (match_operand:V4SI 1 "nonimmediate_operand" "vm") |
2a466fea | 5030 | (parallel [(const_int 0) (const_int 1)]))))] |
0c4232b3 | 5031 | "TARGET_SSE2 && <mask_avx512vl_condition>" |
5032 | "%vcvtdq2pd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}" | |
2a466fea | 5033 | [(set_attr "type" "ssecvt") |
ed30e0a6 | 5034 | (set_attr "prefix" "maybe_vex") |
8c1dfa94 | 5035 | (set_attr "ssememalign" "64") |
5802c0cb | 5036 | (set_attr "mode" "V2DF")]) |
5037 | ||
be60ab96 | 5038 | (define_insn "<mask_codefor>avx512f_cvtpd2dq512<mask_name><round_name>" |
697a43f8 | 5039 | [(set (match_operand:V8SI 0 "register_operand" "=v") |
5220cab6 | 5040 | (unspec:V8SI |
be60ab96 | 5041 | [(match_operand:V8DF 1 "<round_nimm_predicate>" "<round_constraint>")] |
5220cab6 | 5042 | UNSPEC_FIX_NOTRUNC))] |
697a43f8 | 5043 | "TARGET_AVX512F" |
be60ab96 | 5044 | "vcvtpd2dq\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}" |
697a43f8 | 5045 | [(set_attr "type" "ssecvt") |
5046 | (set_attr "prefix" "evex") | |
5047 | (set_attr "mode" "OI")]) | |
5048 | ||
0c4232b3 | 5049 | (define_insn "avx_cvtpd2dq256<mask_name>" |
5050 | [(set (match_operand:V4SI 0 "register_operand" "=v") | |
5051 | (unspec:V4SI [(match_operand:V4DF 1 "nonimmediate_operand" "vm")] | |
ed30e0a6 | 5052 | UNSPEC_FIX_NOTRUNC))] |
0c4232b3 | 5053 | "TARGET_AVX && <mask_avx512vl_condition>" |
5054 | "vcvtpd2dq{y}\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}" | |
ed30e0a6 | 5055 | [(set_attr "type" "ssecvt") |
0c4232b3 | 5056 | (set_attr "prefix" "<mask_prefix>") |
ed30e0a6 | 5057 | (set_attr "mode" "OI")]) |
5058 | ||
83c4576f | 5059 | (define_expand "avx_cvtpd2dq256_2" |
abd4f58b | 5060 | [(set (match_operand:V8SI 0 "register_operand") |
83c4576f | 5061 | (vec_concat:V8SI |
abd4f58b | 5062 | (unspec:V4SI [(match_operand:V4DF 1 "nonimmediate_operand")] |
83c4576f | 5063 | UNSPEC_FIX_NOTRUNC) |
5064 | (match_dup 2)))] | |
5065 | "TARGET_AVX" | |
5066 | "operands[2] = CONST0_RTX (V4SImode);") | |
5067 | ||
5068 | (define_insn "*avx_cvtpd2dq256_2" | |
5069 | [(set (match_operand:V8SI 0 "register_operand" "=x") | |
5070 | (vec_concat:V8SI | |
5071 | (unspec:V4SI [(match_operand:V4DF 1 "nonimmediate_operand" "xm")] | |
5072 | UNSPEC_FIX_NOTRUNC) | |
abd4f58b | 5073 | (match_operand:V4SI 2 "const0_operand")))] |
83c4576f | 5074 | "TARGET_AVX" |
5075 | "vcvtpd2dq{y}\t{%1, %x0|%x0, %1}" | |
5076 | [(set_attr "type" "ssecvt") | |
5077 | (set_attr "prefix" "vex") | |
6470d004 | 5078 | (set_attr "btver2_decode" "vector") |
83c4576f | 5079 | (set_attr "mode" "OI")]) |
5080 | ||
0c4232b3 | 5081 | (define_insn "sse2_cvtpd2dq<mask_name>" |
5082 | [(set (match_operand:V4SI 0 "register_operand" "=v") | |
2a466fea | 5083 | (vec_concat:V4SI |
0c4232b3 | 5084 | (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "vm")] |
2a466fea | 5085 | UNSPEC_FIX_NOTRUNC) |
0c4232b3 | 5086 | (const_vector:V2SI [(const_int 0) (const_int 0)])))] |
5087 | "TARGET_SSE2 && <mask_avx512vl_condition>" | |
887423c0 | 5088 | { |
5089 | if (TARGET_AVX) | |
0c4232b3 | 5090 | return "vcvtpd2dq{x}\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"; |
887423c0 | 5091 | else |
5092 | return "cvtpd2dq\t{%1, %0|%0, %1}"; | |
5093 | } | |
2a466fea | 5094 | [(set_attr "type" "ssecvt") |
5095 | (set_attr "prefix_rep" "1") | |
00a0e418 | 5096 | (set_attr "prefix_data16" "0") |
ed30e0a6 | 5097 | (set_attr "prefix" "maybe_vex") |
2a466fea | 5098 | (set_attr "mode" "TI") |
97436e92 | 5099 | (set_attr "amdfam10_decode" "double") |
68ff067d | 5100 | (set_attr "athlon_decode" "vector") |
97436e92 | 5101 | (set_attr "bdver1_decode" "double")]) |
5802c0cb | 5102 | |
0c4232b3 | 5103 | ;; For ufix_notrunc* insn patterns |
5104 | (define_mode_attr pd2udqsuff | |
5105 | [(V8DF "") (V4DF "{y}")]) | |
5106 | ||
5107 | (define_insn "ufix_notrunc<mode><si2dfmodelower>2<mask_name><round_name>" | |
5108 | [(set (match_operand:<si2dfmode> 0 "register_operand" "=v") | |
5109 | (unspec:<si2dfmode> | |
5110 | [(match_operand:VF2_512_256VL 1 "nonimmediate_operand" "<round_constraint>")] | |
697a43f8 | 5111 | UNSPEC_UNSIGNED_FIX_NOTRUNC))] |
5112 | "TARGET_AVX512F" | |
0c4232b3 | 5113 | "vcvtpd2udq<pd2udqsuff>\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}" |
697a43f8 | 5114 | [(set_attr "type" "ssecvt") |
5115 | (set_attr "prefix" "evex") | |
0c4232b3 | 5116 | (set_attr "mode" "<sseinsnmode>")]) |
5117 | ||
5118 | (define_insn "ufix_notruncv2dfv2si2<mask_name>" | |
5119 | [(set (match_operand:V4SI 0 "register_operand" "=v") | |
5120 | (vec_concat:V4SI | |
5121 | (unspec:V2SI | |
5122 | [(match_operand:V2DF 1 "nonimmediate_operand" "vm")] | |
5123 | UNSPEC_UNSIGNED_FIX_NOTRUNC) | |
5124 | (const_vector:V2SI [(const_int 0) (const_int 0)])))] | |
5125 | "TARGET_AVX512VL" | |
5126 | "vcvtpd2udq{x}\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}" | |
5127 | [(set_attr "type" "ssecvt") | |
5128 | (set_attr "prefix" "evex") | |
5129 | (set_attr "mode" "TI")]) | |
697a43f8 | 5130 | |
dbfe84d5 | 5131 | (define_insn "<fixsuffix>fix_truncv8dfv8si2<mask_name><round_saeonly_name>" |
6615b722 | 5132 | [(set (match_operand:V8SI 0 "register_operand" "=v") |
5220cab6 | 5133 | (any_fix:V8SI |
dbfe84d5 | 5134 | (match_operand:V8DF 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")))] |
6615b722 | 5135 | "TARGET_AVX512F" |
dbfe84d5 | 5136 | "vcvttpd2<fixsuffix>dq\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}" |
6615b722 | 5137 | [(set_attr "type" "ssecvt") |
5138 | (set_attr "prefix" "evex") | |
5139 | (set_attr "mode" "OI")]) | |
5140 | ||
05ecc201 | 5141 | (define_insn "ufix_truncv2dfv2si2<mask_name>" |
5142 | [(set (match_operand:V4SI 0 "register_operand" "=v") | |
5143 | (vec_concat:V4SI | |
5144 | (unsigned_fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "vm")) | |
5145 | (const_vector:V2SI [(const_int 0) (const_int 0)])))] | |
5146 | "TARGET_AVX512VL" | |
5147 | "vcvttpd2udq{x}\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}" | |
ed30e0a6 | 5148 | [(set_attr "type" "ssecvt") |
05ecc201 | 5149 | (set_attr "prefix" "evex") |
5150 | (set_attr "mode" "TI")]) | |
5151 | ||
5152 | (define_insn "fix_truncv4dfv4si2<mask_name>" | |
5153 | [(set (match_operand:V4SI 0 "register_operand" "=v") | |
5154 | (fix:V4SI (match_operand:V4DF 1 "nonimmediate_operand" "vm")))] | |
5155 | "TARGET_AVX || (TARGET_AVX512VL && TARGET_AVX512F)" | |
5156 | "vcvttpd2dq{y}\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}" | |
5157 | [(set_attr "type" "ssecvt") | |
5158 | (set_attr "prefix" "maybe_evex") | |
5159 | (set_attr "mode" "OI")]) | |
5160 | ||
5161 | (define_insn "ufix_truncv4dfv4si2<mask_name>" | |
5162 | [(set (match_operand:V4SI 0 "register_operand" "=v") | |
5163 | (unsigned_fix:V4SI (match_operand:V4DF 1 "nonimmediate_operand" "vm")))] | |
5164 | "TARGET_AVX512VL && TARGET_AVX512F" | |
5165 | "vcvttpd2udq{y}\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}" | |
5166 | [(set_attr "type" "ssecvt") | |
5167 | (set_attr "prefix" "maybe_evex") | |
ed30e0a6 | 5168 | (set_attr "mode" "OI")]) |
5169 | ||
05ecc201 | 5170 | (define_insn "<fixsuffix>fix_trunc<mode><sseintvecmodelower>2<mask_name><round_saeonly_name>" |
5171 | [(set (match_operand:<sseintvecmode> 0 "register_operand" "=v") | |
5172 | (any_fix:<sseintvecmode> | |
5173 | (match_operand:VF2_AVX512VL 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")))] | |
5174 | "TARGET_AVX512DQ && <round_saeonly_mode512bit_condition>" | |
5175 | "vcvttpd2<fixsuffix>qq\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}" | |
5176 | [(set_attr "type" "ssecvt") | |
5177 | (set_attr "prefix" "evex") | |
5178 | (set_attr "mode" "<sseintvecmode2>")]) | |
5179 | ||
5180 | (define_insn "fix_notrunc<mode><sseintvecmodelower>2<mask_name><round_name>" | |
5181 | [(set (match_operand:<sseintvecmode> 0 "register_operand" "=v") | |
5182 | (unspec:<sseintvecmode> | |
5183 | [(match_operand:VF2_AVX512VL 1 "<round_nimm_predicate>" "<round_constraint>")] | |
5184 | UNSPEC_FIX_NOTRUNC))] | |
5185 | "TARGET_AVX512DQ && <round_mode512bit_condition>" | |
5186 | "vcvtpd2qq\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}" | |
5187 | [(set_attr "type" "ssecvt") | |
5188 | (set_attr "prefix" "evex") | |
5189 | (set_attr "mode" "<sseintvecmode2>")]) | |
5190 | ||
5191 | (define_insn "ufix_notrunc<mode><sseintvecmodelower>2<mask_name><round_name>" | |
5192 | [(set (match_operand:<sseintvecmode> 0 "register_operand" "=v") | |
5193 | (unspec:<sseintvecmode> | |
5194 | [(match_operand:VF2_AVX512VL 1 "nonimmediate_operand" "<round_constraint>")] | |
5195 | UNSPEC_UNSIGNED_FIX_NOTRUNC))] | |
5196 | "TARGET_AVX512DQ && <round_mode512bit_condition>" | |
5197 | "vcvtpd2uqq\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}" | |
5198 | [(set_attr "type" "ssecvt") | |
5199 | (set_attr "prefix" "evex") | |
5200 | (set_attr "mode" "<sseintvecmode2>")]) | |
5201 | ||
0dc2f097 | 5202 | (define_insn "<fixsuffix>fix_trunc<mode><sselongvecmodelower>2<mask_name><round_saeonly_name>" |
5203 | [(set (match_operand:<sselongvecmode> 0 "register_operand" "=v") | |
5204 | (any_fix:<sselongvecmode> | |
5205 | (match_operand:VF1_128_256VL 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")))] | |
5206 | "TARGET_AVX512DQ && <round_saeonly_modev8sf_condition>" | |
5207 | "vcvttps2<fixsuffix>qq\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}" | |
5208 | [(set_attr "type" "ssecvt") | |
5209 | (set_attr "prefix" "evex") | |
5210 | (set_attr "mode" "<sseintvecmode3>")]) | |
5211 | ||
5212 | (define_insn "<fixsuffix>fix_truncv2sfv2di2<mask_name>" | |
5213 | [(set (match_operand:V2DI 0 "register_operand" "=v") | |
5214 | (any_fix:V2DI | |
5215 | (vec_select:V2SF | |
5216 | (match_operand:V4SF 1 "nonimmediate_operand" "vm") | |
5217 | (parallel [(const_int 0) (const_int 1)]))))] | |
5218 | "TARGET_AVX512DQ && TARGET_AVX512VL" | |
5219 | "vcvttps2<fixsuffix>qq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}" | |
5220 | [(set_attr "type" "ssecvt") | |
5221 | (set_attr "prefix" "evex") | |
5222 | (set_attr "mode" "TI")]) | |
5223 | ||
5224 | (define_insn "ufix_trunc<mode><sseintvecmodelower>2<mask_name>" | |
5225 | [(set (match_operand:<sseintvecmode> 0 "register_operand" "=v") | |
5226 | (unsigned_fix:<sseintvecmode> | |
5227 | (match_operand:VF1_128_256VL 1 "nonimmediate_operand" "vm")))] | |
5228 | "TARGET_AVX512VL" | |
5229 | "vcvttps2udq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}" | |
5230 | [(set_attr "type" "ssecvt") | |
5231 | (set_attr "prefix" "evex") | |
5232 | (set_attr "mode" "<sseintvecmode2>")]) | |
5233 | ||
83c4576f | 5234 | (define_expand "avx_cvttpd2dq256_2" |
abd4f58b | 5235 | [(set (match_operand:V8SI 0 "register_operand") |
83c4576f | 5236 | (vec_concat:V8SI |
abd4f58b | 5237 | (fix:V4SI (match_operand:V4DF 1 "nonimmediate_operand")) |
83c4576f | 5238 | (match_dup 2)))] |
5239 | "TARGET_AVX" | |
5240 | "operands[2] = CONST0_RTX (V4SImode);") | |
5241 | ||
0c4232b3 | 5242 | (define_insn "sse2_cvttpd2dq<mask_name>" |
5243 | [(set (match_operand:V4SI 0 "register_operand" "=v") | |
2a466fea | 5244 | (vec_concat:V4SI |
0c4232b3 | 5245 | (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "vm")) |
5246 | (const_vector:V2SI [(const_int 0) (const_int 0)])))] | |
5247 | "TARGET_SSE2 && <mask_avx512vl_condition>" | |
887423c0 | 5248 | { |
5249 | if (TARGET_AVX) | |
0c4232b3 | 5250 | return "vcvttpd2dq{x}\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"; |
887423c0 | 5251 | else |
5252 | return "cvttpd2dq\t{%1, %0|%0, %1}"; | |
5253 | } | |
2a466fea | 5254 | [(set_attr "type" "ssecvt") |
97436e92 | 5255 | (set_attr "amdfam10_decode" "double") |
68ff067d | 5256 | (set_attr "athlon_decode" "vector") |
887423c0 | 5257 | (set_attr "bdver1_decode" "double") |
5258 | (set_attr "prefix" "maybe_vex") | |
5259 | (set_attr "mode" "TI")]) | |
ed30e0a6 | 5260 | |
0b7cc9c6 | 5261 | (define_insn "sse2_cvtsd2ss<round_name>" |
e13e1b39 | 5262 | [(set (match_operand:V4SF 0 "register_operand" "=x,x,v") |
2a466fea | 5263 | (vec_merge:V4SF |
5264 | (vec_duplicate:V4SF | |
5265 | (float_truncate:V2SF | |
0b7cc9c6 | 5266 | (match_operand:V2DF 2 "nonimmediate_operand" "x,m,<round_constraint>"))) |
e13e1b39 | 5267 | (match_operand:V4SF 1 "register_operand" "0,0,v") |
2a466fea | 5268 | (const_int 1)))] |
5269 | "TARGET_SSE2" | |
887423c0 | 5270 | "@ |
5271 | cvtsd2ss\t{%2, %0|%0, %2} | |
c358a059 | 5272 | cvtsd2ss\t{%2, %0|%0, %q2} |
0b7cc9c6 | 5273 | vcvtsd2ss\t{<round_op3>%2, %1, %0|%0, %1, %q2<round_op3>}" |
887423c0 | 5274 | [(set_attr "isa" "noavx,noavx,avx") |
5275 | (set_attr "type" "ssecvt") | |
5276 | (set_attr "athlon_decode" "vector,double,*") | |
5277 | (set_attr "amdfam10_decode" "vector,double,*") | |
5278 | (set_attr "bdver1_decode" "direct,direct,*") | |
6470d004 | 5279 | (set_attr "btver2_decode" "double,double,double") |
0b7cc9c6 | 5280 | (set_attr "prefix" "orig,orig,<round_prefix>") |
2a466fea | 5281 | (set_attr "mode" "SF")]) |
79eddd43 | 5282 | |
0b7cc9c6 | 5283 | (define_insn "sse2_cvtss2sd<round_saeonly_name>" |
e13e1b39 | 5284 | [(set (match_operand:V2DF 0 "register_operand" "=x,x,v") |
79eddd43 | 5285 | (vec_merge:V2DF |
2a466fea | 5286 | (float_extend:V2DF |
5287 | (vec_select:V2SF | |
0dc2f097 | 5288 | (match_operand:V4SF 2 "<round_saeonly_nimm_predicate>" "x,m,<round_saeonly_constraint>") |
2a466fea | 5289 | (parallel [(const_int 0) (const_int 1)]))) |
e13e1b39 | 5290 | (match_operand:V2DF 1 "register_operand" "0,0,v") |
79eddd43 | 5291 | (const_int 1)))] |
5292 | "TARGET_SSE2" | |
887423c0 | 5293 | "@ |
5294 | cvtss2sd\t{%2, %0|%0, %2} | |
c358a059 | 5295 | cvtss2sd\t{%2, %0|%0, %k2} |
0b7cc9c6 | 5296 | vcvtss2sd\t{<round_saeonly_op3>%2, %1, %0|%0, %1, %k2<round_saeonly_op3>}" |
887423c0 | 5297 | [(set_attr "isa" "noavx,noavx,avx") |
5298 | (set_attr "type" "ssecvt") | |
5299 | (set_attr "amdfam10_decode" "vector,double,*") | |
5300 | (set_attr "athlon_decode" "direct,direct,*") | |
5301 | (set_attr "bdver1_decode" "direct,direct,*") | |
6470d004 | 5302 | (set_attr "btver2_decode" "double,double,double") |
0b7cc9c6 | 5303 | (set_attr "prefix" "orig,orig,<round_saeonly_prefix>") |
5802c0cb | 5304 | (set_attr "mode" "DF")]) |
5305 | ||
be60ab96 | 5306 | (define_insn "<mask_codefor>avx512f_cvtpd2ps512<mask_name><round_name>" |
697a43f8 | 5307 | [(set (match_operand:V8SF 0 "register_operand" "=v") |
5308 | (float_truncate:V8SF | |
be60ab96 | 5309 | (match_operand:V8DF 1 "<round_nimm_predicate>" "<round_constraint>")))] |
697a43f8 | 5310 | "TARGET_AVX512F" |
be60ab96 | 5311 | "vcvtpd2ps\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}" |
697a43f8 | 5312 | [(set_attr "type" "ssecvt") |
5313 | (set_attr "prefix" "evex") | |
5314 | (set_attr "mode" "V8SF")]) | |
5315 | ||
0dc2f097 | 5316 | (define_insn "avx_cvtpd2ps256<mask_name>" |
5317 | [(set (match_operand:V4SF 0 "register_operand" "=v") | |
ed30e0a6 | 5318 | (float_truncate:V4SF |
0dc2f097 | 5319 | (match_operand:V4DF 1 "nonimmediate_operand" "vm")))] |
5320 | "TARGET_AVX && <mask_avx512vl_condition>" | |
5321 | "vcvtpd2ps{y}\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}" | |
ed30e0a6 | 5322 | [(set_attr "type" "ssecvt") |
0dc2f097 | 5323 | (set_attr "prefix" "maybe_evex") |
6470d004 | 5324 | (set_attr "btver2_decode" "vector") |
ed30e0a6 | 5325 | (set_attr "mode" "V4SF")]) |
5326 | ||
2a466fea | 5327 | (define_expand "sse2_cvtpd2ps" |
abd4f58b | 5328 | [(set (match_operand:V4SF 0 "register_operand") |
2a466fea | 5329 | (vec_concat:V4SF |
5330 | (float_truncate:V2SF | |
abd4f58b | 5331 | (match_operand:V2DF 1 "nonimmediate_operand")) |
2a466fea | 5332 | (match_dup 2)))] |
5802c0cb | 5333 | "TARGET_SSE2" |
2a466fea | 5334 | "operands[2] = CONST0_RTX (V2SFmode);") |
5802c0cb | 5335 | |
0dc2f097 | 5336 | (define_expand "sse2_cvtpd2ps_mask" |
5337 | [(set (match_operand:V4SF 0 "register_operand") | |
5338 | (vec_merge:V4SF | |
5339 | (vec_concat:V4SF | |
5340 | (float_truncate:V2SF | |
5341 | (match_operand:V2DF 1 "nonimmediate_operand")) | |
5342 | (match_dup 4)) | |
5343 | (match_operand:V4SF 2 "register_operand") | |
5344 | (match_operand:QI 3 "register_operand")))] | |
5345 | "TARGET_SSE2" | |
5346 | "operands[4] = CONST0_RTX (V2SFmode);") | |
5347 | ||
5348 | (define_insn "*sse2_cvtpd2ps<mask_name>" | |
5349 | [(set (match_operand:V4SF 0 "register_operand" "=v") | |
2a466fea | 5350 | (vec_concat:V4SF |
5351 | (float_truncate:V2SF | |
0dc2f097 | 5352 | (match_operand:V2DF 1 "nonimmediate_operand" "vm")) |
abd4f58b | 5353 | (match_operand:V2SF 2 "const0_operand")))] |
0dc2f097 | 5354 | "TARGET_SSE2 && <mask_avx512vl_condition>" |
887423c0 | 5355 | { |
5356 | if (TARGET_AVX) | |
0dc2f097 | 5357 | return "vcvtpd2ps{x}\t{%1, %0<mask_operand3>|%0<mask_operand3>, %1}"; |
887423c0 | 5358 | else |
5359 | return "cvtpd2ps\t{%1, %0|%0, %1}"; | |
5360 | } | |
2a466fea | 5361 | [(set_attr "type" "ssecvt") |
97436e92 | 5362 | (set_attr "amdfam10_decode" "double") |
68ff067d | 5363 | (set_attr "athlon_decode" "vector") |
887423c0 | 5364 | (set_attr "bdver1_decode" "double") |
5365 | (set_attr "prefix_data16" "1") | |
5366 | (set_attr "prefix" "maybe_vex") | |
5367 | (set_attr "mode" "V4SF")]) | |
79eddd43 | 5368 | |
6615b722 | 5369 | ;; For <sse2_avx_avx512f>_cvtps2pd<avxsizesuffix> insn pattern |
5370 | (define_mode_attr sf2dfmode | |
5371 | [(V8DF "V8SF") (V4DF "V4SF")]) | |
5372 | ||
dbfe84d5 | 5373 | (define_insn "<sse2_avx_avx512f>_cvtps2pd<avxsizesuffix><mask_name><round_saeonly_name>" |
6615b722 | 5374 | [(set (match_operand:VF2_512_256 0 "register_operand" "=v") |
5375 | (float_extend:VF2_512_256 | |
dbfe84d5 | 5376 | (match_operand:<sf2dfmode> 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")))] |
5377 | "TARGET_AVX && <mask_mode512bit_condition> && <round_saeonly_mode512bit_condition>" | |
5378 | "vcvtps2pd\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}" | |
ed30e0a6 | 5379 | [(set_attr "type" "ssecvt") |
6615b722 | 5380 | (set_attr "prefix" "maybe_vex") |
5381 | (set_attr "mode" "<MODE>")]) | |
ed30e0a6 | 5382 | |
8cedf886 | 5383 | (define_insn "*avx_cvtps2pd256_2" |
5384 | [(set (match_operand:V4DF 0 "register_operand" "=x") | |
5385 | (float_extend:V4DF | |
5386 | (vec_select:V4SF | |
5387 | (match_operand:V8SF 1 "nonimmediate_operand" "xm") | |
887423c0 | 5388 | (parallel [(const_int 0) (const_int 1) |
5389 | (const_int 2) (const_int 3)]))))] | |
8cedf886 | 5390 | "TARGET_AVX" |
5391 | "vcvtps2pd\t{%x1, %0|%0, %x1}" | |
5392 | [(set_attr "type" "ssecvt") | |
5393 | (set_attr "prefix" "vex") | |
5394 | (set_attr "mode" "V4DF")]) | |
5395 | ||
697a43f8 | 5396 | (define_insn "vec_unpacks_lo_v16sf" |
5397 | [(set (match_operand:V8DF 0 "register_operand" "=v") | |
5398 | (float_extend:V8DF | |
5399 | (vec_select:V8SF | |
5400 | (match_operand:V16SF 1 "nonimmediate_operand" "vm") | |
5401 | (parallel [(const_int 0) (const_int 1) | |
5402 | (const_int 2) (const_int 3) | |
5403 | (const_int 4) (const_int 5) | |
5404 | (const_int 6) (const_int 7)]))))] | |
5405 | "TARGET_AVX512F" | |
5406 | "vcvtps2pd\t{%t1, %0|%0, %t1}" | |
5407 | [(set_attr "type" "ssecvt") | |
5408 | (set_attr "prefix" "evex") | |
5409 | (set_attr "mode" "V8DF")]) | |
5410 | ||
54f53cd0 | 5411 | (define_insn "<avx512>_cvt<ssemodesuffix>2mask<mode>" |
5412 | [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk") | |
5413 | (unspec:<avx512fmaskmode> | |
5414 | [(match_operand:VI12_AVX512VL 1 "register_operand" "v")] | |
5415 | UNSPEC_CVTINT2MASK))] | |
5416 | "TARGET_AVX512BW" | |
5417 | "vpmov<ssemodesuffix>2m\t{%1, %0|%0, %1}" | |
5418 | [(set_attr "prefix" "evex") | |
5419 | (set_attr "mode" "<sseinsnmode>")]) | |
5420 | ||
5421 | (define_insn "<avx512>_cvt<ssemodesuffix>2mask<mode>" | |
5422 | [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk") | |
5423 | (unspec:<avx512fmaskmode> | |
5424 | [(match_operand:VI48_AVX512VL 1 "register_operand" "v")] | |
5425 | UNSPEC_CVTINT2MASK))] | |
5426 | "TARGET_AVX512DQ" | |
5427 | "vpmov<ssemodesuffix>2m\t{%1, %0|%0, %1}" | |
5428 | [(set_attr "prefix" "evex") | |
5429 | (set_attr "mode" "<sseinsnmode>")]) | |
5430 | ||
5431 | (define_expand "<avx512>_cvtmask2<ssemodesuffix><mode>" | |
5432 | [(set (match_operand:VI12_AVX512VL 0 "register_operand") | |
5433 | (vec_merge:VI12_AVX512VL | |
5434 | (match_dup 2) | |
5435 | (match_dup 3) | |
5436 | (match_operand:<avx512fmaskmode> 1 "register_operand")))] | |
5437 | "TARGET_AVX512BW" | |
5438 | { | |
5439 | operands[2] = CONSTM1_RTX (<MODE>mode); | |
5440 | operands[3] = CONST0_RTX (<MODE>mode); | |
5441 | }) | |
5442 | ||
5443 | (define_insn "*<avx512>_cvtmask2<ssemodesuffix><mode>" | |
5444 | [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v") | |
5445 | (vec_merge:VI12_AVX512VL | |
5446 | (match_operand:VI12_AVX512VL 2 "constm1_operand") | |
5447 | (match_operand:VI12_AVX512VL 3 "const0_operand") | |
5448 | (match_operand:<avx512fmaskmode> 1 "register_operand" "Yk")))] | |
5449 | "TARGET_AVX512BW" | |
5450 | "vpmovm2<ssemodesuffix>\t{%1, %0|%0, %1}" | |
5451 | [(set_attr "prefix" "evex") | |
5452 | (set_attr "mode" "<sseinsnmode>")]) | |
5453 | ||
5454 | (define_expand "<avx512>_cvtmask2<ssemodesuffix><mode>" | |
5455 | [(set (match_operand:VI48_AVX512VL 0 "register_operand") | |
5456 | (vec_merge:VI48_AVX512VL | |
5457 | (match_dup 2) | |
5458 | (match_dup 3) | |
5459 | (match_operand:<avx512fmaskmode> 1 "register_operand")))] | |
5460 | "TARGET_AVX512DQ" | |
5461 | "{ | |
5462 | operands[2] = CONSTM1_RTX (<MODE>mode); | |
5463 | operands[3] = CONST0_RTX (<MODE>mode); | |
5464 | }") | |
5465 | ||
5466 | (define_insn "*<avx512>_cvtmask2<ssemodesuffix><mode>" | |
5467 | [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v") | |
5468 | (vec_merge:VI48_AVX512VL | |
5469 | (match_operand:VI48_AVX512VL 2 "constm1_operand") | |
5470 | (match_operand:VI48_AVX512VL 3 "const0_operand") | |
5471 | (match_operand:<avx512fmaskmode> 1 "register_operand" "Yk")))] | |
5472 | "TARGET_AVX512DQ" | |
5473 | "vpmovm2<ssemodesuffix>\t{%1, %0|%0, %1}" | |
5474 | [(set_attr "prefix" "evex") | |
5475 | (set_attr "mode" "<sseinsnmode>")]) | |
5476 | ||
0dc2f097 | 5477 | (define_insn "sse2_cvtps2pd<mask_name>" |
5478 | [(set (match_operand:V2DF 0 "register_operand" "=v") | |
2a466fea | 5479 | (float_extend:V2DF |
5480 | (vec_select:V2SF | |
0dc2f097 | 5481 | (match_operand:V4SF 1 "nonimmediate_operand" "vm") |
2a466fea | 5482 | (parallel [(const_int 0) (const_int 1)]))))] |
0dc2f097 | 5483 | "TARGET_SSE2 && <mask_avx512vl_condition>" |
5484 | "%vcvtps2pd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}" | |
2a466fea | 5485 | [(set_attr "type" "ssecvt") |
97436e92 | 5486 | (set_attr "amdfam10_decode" "direct") |
68ff067d | 5487 | (set_attr "athlon_decode" "double") |
887423c0 | 5488 | (set_attr "bdver1_decode" "double") |
5489 | (set_attr "prefix_data16" "0") | |
5490 | (set_attr "prefix" "maybe_vex") | |
5491 | (set_attr "mode" "V2DF")]) | |
5802c0cb | 5492 | |
2a466fea | 5493 | (define_expand "vec_unpacks_hi_v4sf" |
5494 | [(set (match_dup 2) | |
5495 | (vec_select:V4SF | |
5496 | (vec_concat:V8SF | |
5497 | (match_dup 2) | |
abd4f58b | 5498 | (match_operand:V4SF 1 "nonimmediate_operand")) |
887423c0 | 5499 | (parallel [(const_int 6) (const_int 7) |
5500 | (const_int 2) (const_int 3)]))) | |
abd4f58b | 5501 | (set (match_operand:V2DF 0 "register_operand") |
2a466fea | 5502 | (float_extend:V2DF |
5503 | (vec_select:V2SF | |
5504 | (match_dup 2) | |
5505 | (parallel [(const_int 0) (const_int 1)]))))] | |
5bd1ff1d | 5506 | "TARGET_SSE2" |
5507 | "operands[2] = gen_reg_rtx (V4SFmode);") | |
d5d869ee | 5508 | |
8cedf886 | 5509 | (define_expand "vec_unpacks_hi_v8sf" |
5510 | [(set (match_dup 2) | |
5511 | (vec_select:V4SF | |
9f8bc6e8 | 5512 | (match_operand:V8SF 1 "register_operand") |
887423c0 | 5513 | (parallel [(const_int 4) (const_int 5) |
5514 | (const_int 6) (const_int 7)]))) | |
abd4f58b | 5515 | (set (match_operand:V4DF 0 "register_operand") |
8cedf886 | 5516 | (float_extend:V4DF |
5517 | (match_dup 2)))] | |
5518 | "TARGET_AVX" | |
887423c0 | 5519 | "operands[2] = gen_reg_rtx (V4SFmode);") |
8cedf886 | 5520 | |
697a43f8 | 5521 | (define_expand "vec_unpacks_hi_v16sf" |
5522 | [(set (match_dup 2) | |
5523 | (vec_select:V8SF | |
9f8bc6e8 | 5524 | (match_operand:V16SF 1 "register_operand") |
697a43f8 | 5525 | (parallel [(const_int 8) (const_int 9) |
5526 | (const_int 10) (const_int 11) | |
5527 | (const_int 12) (const_int 13) | |
5528 | (const_int 14) (const_int 15)]))) | |
5529 | (set (match_operand:V8DF 0 "register_operand") | |
5530 | (float_extend:V8DF | |
5531 | (match_dup 2)))] | |
5532 | "TARGET_AVX512F" | |
5533 | "operands[2] = gen_reg_rtx (V8SFmode);") | |
5534 | ||
2a466fea | 5535 | (define_expand "vec_unpacks_lo_v4sf" |
abd4f58b | 5536 | [(set (match_operand:V2DF 0 "register_operand") |
2a466fea | 5537 | (float_extend:V2DF |
5538 | (vec_select:V2SF | |
abd4f58b | 5539 | (match_operand:V4SF 1 "nonimmediate_operand") |
2a466fea | 5540 | (parallel [(const_int 0) (const_int 1)]))))] |
5541 | "TARGET_SSE2") | |
5802c0cb | 5542 | |
8cedf886 | 5543 | (define_expand "vec_unpacks_lo_v8sf" |
abd4f58b | 5544 | [(set (match_operand:V4DF 0 "register_operand") |
8cedf886 | 5545 | (float_extend:V4DF |
5546 | (vec_select:V4SF | |
abd4f58b | 5547 | (match_operand:V8SF 1 "nonimmediate_operand") |
887423c0 | 5548 | (parallel [(const_int 0) (const_int 1) |
5549 | (const_int 2) (const_int 3)]))))] | |
8cedf886 | 5550 | "TARGET_AVX") |
5551 | ||
ded0808e | 5552 | (define_mode_attr sseunpackfltmode |
9abbf9e6 | 5553 | [(V8HI "V4SF") (V4SI "V2DF") (V16HI "V8SF") |
5554 | (V8SI "V4DF") (V32HI "V16SF") (V16SI "V8DF")]) | |
ded0808e | 5555 | |
5556 | (define_expand "vec_unpacks_float_hi_<mode>" | |
abd4f58b | 5557 | [(match_operand:<sseunpackfltmode> 0 "register_operand") |
9abbf9e6 | 5558 | (match_operand:VI2_AVX512F 1 "register_operand")] |
5802c0cb | 5559 | "TARGET_SSE2" |
2a466fea | 5560 | { |
ded0808e | 5561 | rtx tmp = gen_reg_rtx (<sseunpackmode>mode); |
5802c0cb | 5562 | |
ded0808e | 5563 | emit_insn (gen_vec_unpacks_hi_<mode> (tmp, operands[1])); |
d1f9b275 | 5564 | emit_insn (gen_rtx_SET (operands[0], |
ded0808e | 5565 | gen_rtx_FLOAT (<sseunpackfltmode>mode, tmp))); |
2a466fea | 5566 | DONE; |
5567 | }) | |
5802c0cb | 5568 | |
ded0808e | 5569 | (define_expand "vec_unpacks_float_lo_<mode>" |
abd4f58b | 5570 | [(match_operand:<sseunpackfltmode> 0 "register_operand") |
9abbf9e6 | 5571 | (match_operand:VI2_AVX512F 1 "register_operand")] |
76405cce | 5572 | "TARGET_SSE2" |
5573 | { | |
ded0808e | 5574 | rtx tmp = gen_reg_rtx (<sseunpackmode>mode); |
76405cce | 5575 | |
ded0808e | 5576 | emit_insn (gen_vec_unpacks_lo_<mode> (tmp, operands[1])); |
d1f9b275 | 5577 | emit_insn (gen_rtx_SET (operands[0], |
ded0808e | 5578 | gen_rtx_FLOAT (<sseunpackfltmode>mode, tmp))); |
2a466fea | 5579 | DONE; |
5580 | }) | |
5802c0cb | 5581 | |
ded0808e | 5582 | (define_expand "vec_unpacku_float_hi_<mode>" |
abd4f58b | 5583 | [(match_operand:<sseunpackfltmode> 0 "register_operand") |
9abbf9e6 | 5584 | (match_operand:VI2_AVX512F 1 "register_operand")] |
5802c0cb | 5585 | "TARGET_SSE2" |
2a466fea | 5586 | { |
ded0808e | 5587 | rtx tmp = gen_reg_rtx (<sseunpackmode>mode); |
5802c0cb | 5588 | |
ded0808e | 5589 | emit_insn (gen_vec_unpacku_hi_<mode> (tmp, operands[1])); |
d1f9b275 | 5590 | emit_insn (gen_rtx_SET (operands[0], |
ded0808e | 5591 | gen_rtx_FLOAT (<sseunpackfltmode>mode, tmp))); |
2a466fea | 5592 | DONE; |
5593 | }) | |
5802c0cb | 5594 | |
ded0808e | 5595 | (define_expand "vec_unpacku_float_lo_<mode>" |
abd4f58b | 5596 | [(match_operand:<sseunpackfltmode> 0 "register_operand") |
9abbf9e6 | 5597 | (match_operand:VI2_AVX512F 1 "register_operand")] |
5802c0cb | 5598 | "TARGET_SSE2" |
2a466fea | 5599 | { |
ded0808e | 5600 | rtx tmp = gen_reg_rtx (<sseunpackmode>mode); |
5802c0cb | 5601 | |
ded0808e | 5602 | emit_insn (gen_vec_unpacku_lo_<mode> (tmp, operands[1])); |
d1f9b275 | 5603 | emit_insn (gen_rtx_SET (operands[0], |
ded0808e | 5604 | gen_rtx_FLOAT (<sseunpackfltmode>mode, tmp))); |
2a466fea | 5605 | DONE; |
5606 | }) | |
5802c0cb | 5607 | |
2a466fea | 5608 | (define_expand "vec_unpacks_float_hi_v4si" |
5609 | [(set (match_dup 2) | |
5610 | (vec_select:V4SI | |
abd4f58b | 5611 | (match_operand:V4SI 1 "nonimmediate_operand") |
887423c0 | 5612 | (parallel [(const_int 2) (const_int 3) |
5613 | (const_int 2) (const_int 3)]))) | |
abd4f58b | 5614 | (set (match_operand:V2DF 0 "register_operand") |
5deb404d | 5615 | (float:V2DF |
2a466fea | 5616 | (vec_select:V2SI |
5617 | (match_dup 2) | |
5618 | (parallel [(const_int 0) (const_int 1)]))))] | |
5bd1ff1d | 5619 | "TARGET_SSE2" |
5620 | "operands[2] = gen_reg_rtx (V4SImode);") | |
5802c0cb | 5621 | |
2a466fea | 5622 | (define_expand "vec_unpacks_float_lo_v4si" |
abd4f58b | 5623 | [(set (match_operand:V2DF 0 "register_operand") |
2a466fea | 5624 | (float:V2DF |
5625 | (vec_select:V2SI | |
abd4f58b | 5626 | (match_operand:V4SI 1 "nonimmediate_operand") |
2a466fea | 5627 | (parallel [(const_int 0) (const_int 1)]))))] |
5628 | "TARGET_SSE2") | |
5629 | ||
8cedf886 | 5630 | (define_expand "vec_unpacks_float_hi_v8si" |
5631 | [(set (match_dup 2) | |
5632 | (vec_select:V4SI | |
abd4f58b | 5633 | (match_operand:V8SI 1 "nonimmediate_operand") |
887423c0 | 5634 | (parallel [(const_int 4) (const_int 5) |
5635 | (const_int 6) (const_int 7)]))) | |
abd4f58b | 5636 | (set (match_operand:V4DF 0 "register_operand") |
5deb404d | 5637 | (float:V4DF |
8cedf886 | 5638 | (match_dup 2)))] |
5639 | "TARGET_AVX" | |
5640 | "operands[2] = gen_reg_rtx (V4SImode);") | |
5641 | ||
5642 | (define_expand "vec_unpacks_float_lo_v8si" | |
abd4f58b | 5643 | [(set (match_operand:V4DF 0 "register_operand") |
8cedf886 | 5644 | (float:V4DF |
5645 | (vec_select:V4SI | |
abd4f58b | 5646 | (match_operand:V8SI 1 "nonimmediate_operand") |
887423c0 | 5647 | (parallel [(const_int 0) (const_int 1) |
5648 | (const_int 2) (const_int 3)]))))] | |
8cedf886 | 5649 | "TARGET_AVX") |
5650 | ||
697a43f8 | 5651 | (define_expand "vec_unpacks_float_hi_v16si" |
5652 | [(set (match_dup 2) | |
5653 | (vec_select:V8SI | |
5654 | (match_operand:V16SI 1 "nonimmediate_operand") | |
5655 | (parallel [(const_int 8) (const_int 9) | |
5656 | (const_int 10) (const_int 11) | |
5657 | (const_int 12) (const_int 13) | |
5658 | (const_int 14) (const_int 15)]))) | |
5659 | (set (match_operand:V8DF 0 "register_operand") | |
5660 | (float:V8DF | |
5661 | (match_dup 2)))] | |
5662 | "TARGET_AVX512F" | |
5663 | "operands[2] = gen_reg_rtx (V8SImode);") | |
5664 | ||
5665 | (define_expand "vec_unpacks_float_lo_v16si" | |
5666 | [(set (match_operand:V8DF 0 "register_operand") | |
5667 | (float:V8DF | |
5668 | (vec_select:V8SI | |
5669 | (match_operand:V16SI 1 "nonimmediate_operand") | |
5670 | (parallel [(const_int 0) (const_int 1) | |
5671 | (const_int 2) (const_int 3) | |
5672 | (const_int 4) (const_int 5) | |
5673 | (const_int 6) (const_int 7)]))))] | |
5674 | "TARGET_AVX512F") | |
5675 | ||
ea3bb9f4 | 5676 | (define_expand "vec_unpacku_float_hi_v4si" |
5677 | [(set (match_dup 5) | |
5678 | (vec_select:V4SI | |
abd4f58b | 5679 | (match_operand:V4SI 1 "nonimmediate_operand") |
887423c0 | 5680 | (parallel [(const_int 2) (const_int 3) |
5681 | (const_int 2) (const_int 3)]))) | |
ea3bb9f4 | 5682 | (set (match_dup 6) |
5deb404d | 5683 | (float:V2DF |
ea3bb9f4 | 5684 | (vec_select:V2SI |
5685 | (match_dup 5) | |
5686 | (parallel [(const_int 0) (const_int 1)])))) | |
5687 | (set (match_dup 7) | |
5688 | (lt:V2DF (match_dup 6) (match_dup 3))) | |
5689 | (set (match_dup 8) | |
5690 | (and:V2DF (match_dup 7) (match_dup 4))) | |
abd4f58b | 5691 | (set (match_operand:V2DF 0 "register_operand") |
ea3bb9f4 | 5692 | (plus:V2DF (match_dup 6) (match_dup 8)))] |
5bd1ff1d | 5693 | "TARGET_SSE2" |
ea3bb9f4 | 5694 | { |
5695 | REAL_VALUE_TYPE TWO32r; | |
5696 | rtx x; | |
5697 | int i; | |
5698 | ||
5699 | real_ldexp (&TWO32r, &dconst1, 32); | |
5700 | x = const_double_from_real_value (TWO32r, DFmode); | |
5701 | ||
5702 | operands[3] = force_reg (V2DFmode, CONST0_RTX (V2DFmode)); | |
8cedf886 | 5703 | operands[4] = force_reg (V2DFmode, |
5704 | ix86_build_const_vector (V2DFmode, 1, x)); | |
ea3bb9f4 | 5705 | |
5706 | operands[5] = gen_reg_rtx (V4SImode); | |
33541f98 | 5707 | |
ea3bb9f4 | 5708 | for (i = 6; i < 9; i++) |
5709 | operands[i] = gen_reg_rtx (V2DFmode); | |
5710 | }) | |
5711 | ||
5712 | (define_expand "vec_unpacku_float_lo_v4si" | |
5713 | [(set (match_dup 5) | |
5714 | (float:V2DF | |
5715 | (vec_select:V2SI | |
abd4f58b | 5716 | (match_operand:V4SI 1 "nonimmediate_operand") |
ea3bb9f4 | 5717 | (parallel [(const_int 0) (const_int 1)])))) |
5718 | (set (match_dup 6) | |
5719 | (lt:V2DF (match_dup 5) (match_dup 3))) | |
5720 | (set (match_dup 7) | |
5721 | (and:V2DF (match_dup 6) (match_dup 4))) | |
abd4f58b | 5722 | (set (match_operand:V2DF 0 "register_operand") |
ea3bb9f4 | 5723 | (plus:V2DF (match_dup 5) (match_dup 7)))] |
5724 | "TARGET_SSE2" | |
5725 | { | |
5726 | REAL_VALUE_TYPE TWO32r; | |
5727 | rtx x; | |
5728 | int i; | |
5729 | ||
5730 | real_ldexp (&TWO32r, &dconst1, 32); | |
5731 | x = const_double_from_real_value (TWO32r, DFmode); | |
5732 | ||
5733 | operands[3] = force_reg (V2DFmode, CONST0_RTX (V2DFmode)); | |
8cedf886 | 5734 | operands[4] = force_reg (V2DFmode, |
5735 | ix86_build_const_vector (V2DFmode, 1, x)); | |
ea3bb9f4 | 5736 | |
5737 | for (i = 5; i < 8; i++) | |
5738 | operands[i] = gen_reg_rtx (V2DFmode); | |
5739 | }) | |
5740 | ||
ded0808e | 5741 | (define_expand "vec_unpacku_float_hi_v8si" |
abd4f58b | 5742 | [(match_operand:V4DF 0 "register_operand") |
5743 | (match_operand:V8SI 1 "register_operand")] | |
ded0808e | 5744 | "TARGET_AVX" |
5745 | { | |
5746 | REAL_VALUE_TYPE TWO32r; | |
5747 | rtx x, tmp[6]; | |
5748 | int i; | |
5749 | ||
5750 | real_ldexp (&TWO32r, &dconst1, 32); | |
5751 | x = const_double_from_real_value (TWO32r, DFmode); | |
5752 | ||
5753 | tmp[0] = force_reg (V4DFmode, CONST0_RTX (V4DFmode)); | |
5754 | tmp[1] = force_reg (V4DFmode, ix86_build_const_vector (V4DFmode, 1, x)); | |
5755 | tmp[5] = gen_reg_rtx (V4SImode); | |
5756 | ||
5757 | for (i = 2; i < 5; i++) | |
5758 | tmp[i] = gen_reg_rtx (V4DFmode); | |
5759 | emit_insn (gen_vec_extract_hi_v8si (tmp[5], operands[1])); | |
5f813a0a | 5760 | emit_insn (gen_floatv4siv4df2 (tmp[2], tmp[5])); |
d1f9b275 | 5761 | emit_insn (gen_rtx_SET (tmp[3], gen_rtx_LT (V4DFmode, tmp[2], tmp[0]))); |
ded0808e | 5762 | emit_insn (gen_andv4df3 (tmp[4], tmp[3], tmp[1])); |
5763 | emit_insn (gen_addv4df3 (operands[0], tmp[2], tmp[4])); | |
5764 | DONE; | |
5765 | }) | |
5766 | ||
1706116d | 5767 | (define_expand "vec_unpacku_float_hi_v16si" |
5768 | [(match_operand:V8DF 0 "register_operand") | |
5769 | (match_operand:V16SI 1 "register_operand")] | |
5770 | "TARGET_AVX512F" | |
5771 | { | |
5772 | REAL_VALUE_TYPE TWO32r; | |
5773 | rtx k, x, tmp[4]; | |
5774 | ||
5775 | real_ldexp (&TWO32r, &dconst1, 32); | |
5776 | x = const_double_from_real_value (TWO32r, DFmode); | |
5777 | ||
5778 | tmp[0] = force_reg (V8DFmode, CONST0_RTX (V8DFmode)); | |
5779 | tmp[1] = force_reg (V8DFmode, ix86_build_const_vector (V8DFmode, 1, x)); | |
5780 | tmp[2] = gen_reg_rtx (V8DFmode); | |
5781 | tmp[3] = gen_reg_rtx (V8SImode); | |
5782 | k = gen_reg_rtx (QImode); | |
5783 | ||
5784 | emit_insn (gen_vec_extract_hi_v16si (tmp[3], operands[1])); | |
5785 | emit_insn (gen_floatv8siv8df2 (tmp[2], tmp[3])); | |
d1f9b275 | 5786 | emit_insn (gen_rtx_SET (k, gen_rtx_LT (QImode, tmp[2], tmp[0]))); |
1706116d | 5787 | emit_insn (gen_addv8df3_mask (tmp[2], tmp[2], tmp[1], tmp[2], k)); |
5788 | emit_move_insn (operands[0], tmp[2]); | |
5789 | DONE; | |
5790 | }) | |
5791 | ||
ded0808e | 5792 | (define_expand "vec_unpacku_float_lo_v8si" |
abd4f58b | 5793 | [(match_operand:V4DF 0 "register_operand") |
5794 | (match_operand:V8SI 1 "nonimmediate_operand")] | |
ded0808e | 5795 | "TARGET_AVX" |
5796 | { | |
5797 | REAL_VALUE_TYPE TWO32r; | |
5798 | rtx x, tmp[5]; | |
5799 | int i; | |
5800 | ||
5801 | real_ldexp (&TWO32r, &dconst1, 32); | |
5802 | x = const_double_from_real_value (TWO32r, DFmode); | |
5803 | ||
5804 | tmp[0] = force_reg (V4DFmode, CONST0_RTX (V4DFmode)); | |
5805 | tmp[1] = force_reg (V4DFmode, ix86_build_const_vector (V4DFmode, 1, x)); | |
5806 | ||
5807 | for (i = 2; i < 5; i++) | |
5808 | tmp[i] = gen_reg_rtx (V4DFmode); | |
5809 | emit_insn (gen_avx_cvtdq2pd256_2 (tmp[2], operands[1])); | |
d1f9b275 | 5810 | emit_insn (gen_rtx_SET (tmp[3], gen_rtx_LT (V4DFmode, tmp[2], tmp[0]))); |
ded0808e | 5811 | emit_insn (gen_andv4df3 (tmp[4], tmp[3], tmp[1])); |
5812 | emit_insn (gen_addv4df3 (operands[0], tmp[2], tmp[4])); | |
5813 | DONE; | |
5814 | }) | |
5815 | ||
5220cab6 | 5816 | (define_expand "vec_unpacku_float_lo_v16si" |
5817 | [(match_operand:V8DF 0 "register_operand") | |
5818 | (match_operand:V16SI 1 "nonimmediate_operand")] | |
5819 | "TARGET_AVX512F" | |
5820 | { | |
5821 | REAL_VALUE_TYPE TWO32r; | |
5822 | rtx k, x, tmp[3]; | |
5823 | ||
5824 | real_ldexp (&TWO32r, &dconst1, 32); | |
5825 | x = const_double_from_real_value (TWO32r, DFmode); | |
5826 | ||
5827 | tmp[0] = force_reg (V8DFmode, CONST0_RTX (V8DFmode)); | |
5828 | tmp[1] = force_reg (V8DFmode, ix86_build_const_vector (V8DFmode, 1, x)); | |
5829 | tmp[2] = gen_reg_rtx (V8DFmode); | |
5830 | k = gen_reg_rtx (QImode); | |
5831 | ||
5832 | emit_insn (gen_avx512f_cvtdq2pd512_2 (tmp[2], operands[1])); | |
d1f9b275 | 5833 | emit_insn (gen_rtx_SET (k, gen_rtx_LT (QImode, tmp[2], tmp[0]))); |
5220cab6 | 5834 | emit_insn (gen_addv8df3_mask (tmp[2], tmp[2], tmp[1], tmp[2], k)); |
5835 | emit_move_insn (operands[0], tmp[2]); | |
5836 | DONE; | |
5837 | }) | |
5838 | ||
6615b722 | 5839 | (define_expand "vec_pack_trunc_<mode>" |
8cedf886 | 5840 | [(set (match_dup 3) |
6615b722 | 5841 | (float_truncate:<sf2dfmode> |
5842 | (match_operand:VF2_512_256 1 "nonimmediate_operand"))) | |
8cedf886 | 5843 | (set (match_dup 4) |
6615b722 | 5844 | (float_truncate:<sf2dfmode> |
5845 | (match_operand:VF2_512_256 2 "nonimmediate_operand"))) | |
5846 | (set (match_operand:<ssePSmode> 0 "register_operand") | |
5847 | (vec_concat:<ssePSmode> | |
8cedf886 | 5848 | (match_dup 3) |
5849 | (match_dup 4)))] | |
5850 | "TARGET_AVX" | |
5851 | { | |
6615b722 | 5852 | operands[3] = gen_reg_rtx (<sf2dfmode>mode); |
5853 | operands[4] = gen_reg_rtx (<sf2dfmode>mode); | |
8cedf886 | 5854 | }) |
5855 | ||
2a466fea | 5856 | (define_expand "vec_pack_trunc_v2df" |
abd4f58b | 5857 | [(match_operand:V4SF 0 "register_operand") |
5858 | (match_operand:V2DF 1 "nonimmediate_operand") | |
5859 | (match_operand:V2DF 2 "nonimmediate_operand")] | |
5802c0cb | 5860 | "TARGET_SSE2" |
2a466fea | 5861 | { |
71eeaf66 | 5862 | rtx tmp0, tmp1; |
5802c0cb | 5863 | |
f00377d6 | 5864 | if (TARGET_AVX && !TARGET_PREFER_AVX128 && optimize_insn_for_speed_p ()) |
71eeaf66 | 5865 | { |
5866 | tmp0 = gen_reg_rtx (V4DFmode); | |
5867 | tmp1 = force_reg (V2DFmode, operands[1]); | |
5802c0cb | 5868 | |
71eeaf66 | 5869 | emit_insn (gen_avx_vec_concatv4df (tmp0, tmp1, operands[2])); |
5870 | emit_insn (gen_avx_cvtpd2ps256 (operands[0], tmp0)); | |
5871 | } | |
5872 | else | |
5873 | { | |
5874 | tmp0 = gen_reg_rtx (V4SFmode); | |
5875 | tmp1 = gen_reg_rtx (V4SFmode); | |
5876 | ||
5877 | emit_insn (gen_sse2_cvtpd2ps (tmp0, operands[1])); | |
5878 | emit_insn (gen_sse2_cvtpd2ps (tmp1, operands[2])); | |
5879 | emit_insn (gen_sse_movlhps (operands[0], tmp0, tmp1)); | |
5880 | } | |
2a466fea | 5881 | DONE; |
5882 | }) | |
8d1e0693 | 5883 | |
6615b722 | 5884 | (define_expand "vec_pack_sfix_trunc_v8df" |
5885 | [(match_operand:V16SI 0 "register_operand") | |
5886 | (match_operand:V8DF 1 "nonimmediate_operand") | |
5887 | (match_operand:V8DF 2 "nonimmediate_operand")] | |
5888 | "TARGET_AVX512F" | |
5889 | { | |
5890 | rtx r1, r2; | |
5891 | ||
5892 | r1 = gen_reg_rtx (V8SImode); | |
5893 | r2 = gen_reg_rtx (V8SImode); | |
5894 | ||
5895 | emit_insn (gen_fix_truncv8dfv8si2 (r1, operands[1])); | |
5896 | emit_insn (gen_fix_truncv8dfv8si2 (r2, operands[2])); | |
5897 | emit_insn (gen_avx_vec_concatv16si (operands[0], r1, r2)); | |
5898 | DONE; | |
5899 | }) | |
5900 | ||
83c4576f | 5901 | (define_expand "vec_pack_sfix_trunc_v4df" |
abd4f58b | 5902 | [(match_operand:V8SI 0 "register_operand") |
5903 | (match_operand:V4DF 1 "nonimmediate_operand") | |
5904 | (match_operand:V4DF 2 "nonimmediate_operand")] | |
83c4576f | 5905 | "TARGET_AVX" |
5906 | { | |
5907 | rtx r1, r2; | |
5908 | ||
71eeaf66 | 5909 | r1 = gen_reg_rtx (V4SImode); |
5910 | r2 = gen_reg_rtx (V4SImode); | |
83c4576f | 5911 | |
71eeaf66 | 5912 | emit_insn (gen_fix_truncv4dfv4si2 (r1, operands[1])); |
5913 | emit_insn (gen_fix_truncv4dfv4si2 (r2, operands[2])); | |
5914 | emit_insn (gen_avx_vec_concatv8si (operands[0], r1, r2)); | |
83c4576f | 5915 | DONE; |
5916 | }) | |
5917 | ||
2a466fea | 5918 | (define_expand "vec_pack_sfix_trunc_v2df" |
abd4f58b | 5919 | [(match_operand:V4SI 0 "register_operand") |
5920 | (match_operand:V2DF 1 "nonimmediate_operand") | |
5921 | (match_operand:V2DF 2 "nonimmediate_operand")] | |
8d1e0693 | 5922 | "TARGET_SSE2" |
2a466fea | 5923 | { |
09e640e6 | 5924 | rtx tmp0, tmp1, tmp2; |
8d1e0693 | 5925 | |
f00377d6 | 5926 | if (TARGET_AVX && !TARGET_PREFER_AVX128 && optimize_insn_for_speed_p ()) |
71eeaf66 | 5927 | { |
5928 | tmp0 = gen_reg_rtx (V4DFmode); | |
5929 | tmp1 = force_reg (V2DFmode, operands[1]); | |
8d1e0693 | 5930 | |
71eeaf66 | 5931 | emit_insn (gen_avx_vec_concatv4df (tmp0, tmp1, operands[2])); |
5932 | emit_insn (gen_fix_truncv4dfv4si2 (operands[0], tmp0)); | |
5933 | } | |
5934 | else | |
5935 | { | |
5936 | tmp0 = gen_reg_rtx (V4SImode); | |
5937 | tmp1 = gen_reg_rtx (V4SImode); | |
09e640e6 | 5938 | tmp2 = gen_reg_rtx (V2DImode); |
71eeaf66 | 5939 | |
5940 | emit_insn (gen_sse2_cvttpd2dq (tmp0, operands[1])); | |
5941 | emit_insn (gen_sse2_cvttpd2dq (tmp1, operands[2])); | |
09e640e6 | 5942 | emit_insn (gen_vec_interleave_lowv2di (tmp2, |
5943 | gen_lowpart (V2DImode, tmp0), | |
5944 | gen_lowpart (V2DImode, tmp1))); | |
5945 | emit_move_insn (operands[0], gen_lowpart (V4SImode, tmp2)); | |
71eeaf66 | 5946 | } |
2a466fea | 5947 | DONE; |
5948 | }) | |
8d1e0693 | 5949 | |
c152f9e5 | 5950 | (define_mode_attr ssepackfltmode |
6615b722 | 5951 | [(V8DF "V16SI") (V4DF "V8SI") (V2DF "V4SI")]) |
c152f9e5 | 5952 | |
5953 | (define_expand "vec_pack_ufix_trunc_<mode>" | |
abd4f58b | 5954 | [(match_operand:<ssepackfltmode> 0 "register_operand") |
1706116d | 5955 | (match_operand:VF2 1 "register_operand") |
5956 | (match_operand:VF2 2 "register_operand")] | |
da38df18 | 5957 | "TARGET_SSE2" |
c152f9e5 | 5958 | { |
1706116d | 5959 | if (<MODE>mode == V8DFmode) |
da38df18 | 5960 | { |
1706116d | 5961 | rtx r1, r2; |
5962 | ||
5963 | r1 = gen_reg_rtx (V8SImode); | |
5964 | r2 = gen_reg_rtx (V8SImode); | |
5965 | ||
5966 | emit_insn (gen_ufix_truncv8dfv8si2 (r1, operands[1])); | |
5967 | emit_insn (gen_ufix_truncv8dfv8si2 (r2, operands[2])); | |
5968 | emit_insn (gen_avx_vec_concatv16si (operands[0], r1, r2)); | |
da38df18 | 5969 | } |
5970 | else | |
5971 | { | |
1706116d | 5972 | rtx tmp[7]; |
5973 | tmp[0] = ix86_expand_adjust_ufix_to_sfix_si (operands[1], &tmp[2]); | |
5974 | tmp[1] = ix86_expand_adjust_ufix_to_sfix_si (operands[2], &tmp[3]); | |
5975 | tmp[4] = gen_reg_rtx (<ssepackfltmode>mode); | |
5976 | emit_insn (gen_vec_pack_sfix_trunc_<mode> (tmp[4], tmp[0], tmp[1])); | |
5977 | if (<ssepackfltmode>mode == V4SImode || TARGET_AVX2) | |
5978 | { | |
5979 | tmp[5] = gen_reg_rtx (<ssepackfltmode>mode); | |
5980 | ix86_expand_vec_extract_even_odd (tmp[5], tmp[2], tmp[3], 0); | |
5981 | } | |
5982 | else | |
5983 | { | |
5984 | tmp[5] = gen_reg_rtx (V8SFmode); | |
5985 | ix86_expand_vec_extract_even_odd (tmp[5], gen_lowpart (V8SFmode, tmp[2]), | |
5986 | gen_lowpart (V8SFmode, tmp[3]), 0); | |
5987 | tmp[5] = gen_lowpart (V8SImode, tmp[5]); | |
5988 | } | |
5989 | tmp[6] = expand_simple_binop (<ssepackfltmode>mode, XOR, tmp[4], tmp[5], | |
5990 | operands[0], 0, OPTAB_DIRECT); | |
5991 | if (tmp[6] != operands[0]) | |
5992 | emit_move_insn (operands[0], tmp[6]); | |
da38df18 | 5993 | } |
1706116d | 5994 | |
c152f9e5 | 5995 | DONE; |
5996 | }) | |
5997 | ||
83c4576f | 5998 | (define_expand "vec_pack_sfix_v4df" |
abd4f58b | 5999 | [(match_operand:V8SI 0 "register_operand") |
6000 | (match_operand:V4DF 1 "nonimmediate_operand") | |
6001 | (match_operand:V4DF 2 "nonimmediate_operand")] | |
83c4576f | 6002 | "TARGET_AVX" |
6003 | { | |
6004 | rtx r1, r2; | |
6005 | ||
71eeaf66 | 6006 | r1 = gen_reg_rtx (V4SImode); |
6007 | r2 = gen_reg_rtx (V4SImode); | |
83c4576f | 6008 | |
71eeaf66 | 6009 | emit_insn (gen_avx_cvtpd2dq256 (r1, operands[1])); |
6010 | emit_insn (gen_avx_cvtpd2dq256 (r2, operands[2])); | |
6011 | emit_insn (gen_avx_vec_concatv8si (operands[0], r1, r2)); | |
83c4576f | 6012 | DONE; |
6013 | }) | |
6014 | ||
2a466fea | 6015 | (define_expand "vec_pack_sfix_v2df" |
abd4f58b | 6016 | [(match_operand:V4SI 0 "register_operand") |
6017 | (match_operand:V2DF 1 "nonimmediate_operand") | |
6018 | (match_operand:V2DF 2 "nonimmediate_operand")] | |
8d1e0693 | 6019 | "TARGET_SSE2" |
2a466fea | 6020 | { |
09e640e6 | 6021 | rtx tmp0, tmp1, tmp2; |
2a466fea | 6022 | |
f00377d6 | 6023 | if (TARGET_AVX && !TARGET_PREFER_AVX128 && optimize_insn_for_speed_p ()) |
71eeaf66 | 6024 | { |
6025 | tmp0 = gen_reg_rtx (V4DFmode); | |
6026 | tmp1 = force_reg (V2DFmode, operands[1]); | |
2a466fea | 6027 | |
71eeaf66 | 6028 | emit_insn (gen_avx_vec_concatv4df (tmp0, tmp1, operands[2])); |
6029 | emit_insn (gen_avx_cvtpd2dq256 (operands[0], tmp0)); | |
6030 | } | |
6031 | else | |
6032 | { | |
6033 | tmp0 = gen_reg_rtx (V4SImode); | |
6034 | tmp1 = gen_reg_rtx (V4SImode); | |
09e640e6 | 6035 | tmp2 = gen_reg_rtx (V2DImode); |
71eeaf66 | 6036 | |
6037 | emit_insn (gen_sse2_cvtpd2dq (tmp0, operands[1])); | |
6038 | emit_insn (gen_sse2_cvtpd2dq (tmp1, operands[2])); | |
09e640e6 | 6039 | emit_insn (gen_vec_interleave_lowv2di (tmp2, |
6040 | gen_lowpart (V2DImode, tmp0), | |
6041 | gen_lowpart (V2DImode, tmp1))); | |
6042 | emit_move_insn (operands[0], gen_lowpart (V4SImode, tmp2)); | |
71eeaf66 | 6043 | } |
2a466fea | 6044 | DONE; |
6045 | }) | |
8d1e0693 | 6046 | |
5802c0cb | 6047 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; |
6048 | ;; | |
2a466fea | 6049 | ;; Parallel single-precision floating point element swizzling |
5802c0cb | 6050 | ;; |
6051 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | |
6052 | ||
2485795e | 6053 | (define_expand "sse_movhlps_exp" |
abd4f58b | 6054 | [(set (match_operand:V4SF 0 "nonimmediate_operand") |
7c839b3f | 6055 | (vec_select:V4SF |
6056 | (vec_concat:V8SF | |
abd4f58b | 6057 | (match_operand:V4SF 1 "nonimmediate_operand") |
6058 | (match_operand:V4SF 2 "nonimmediate_operand")) | |
7c839b3f | 6059 | (parallel [(const_int 6) |
6060 | (const_int 7) | |
6061 | (const_int 2) | |
6062 | (const_int 3)])))] | |
6063 | "TARGET_SSE" | |
cc05a422 | 6064 | { |
6065 | rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands); | |
33541f98 | 6066 | |
cc05a422 | 6067 | emit_insn (gen_sse_movhlps (dst, operands[1], operands[2])); |
6068 | ||
6069 | /* Fix up the destination if needed. */ | |
6070 | if (dst != operands[0]) | |
6071 | emit_move_insn (operands[0], dst); | |
6072 | ||
6073 | DONE; | |
6074 | }) | |
7c839b3f | 6075 | |
2485795e | 6076 | (define_insn "sse_movhlps" |
45c0368c | 6077 | [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,x,x,m") |
2a466fea | 6078 | (vec_select:V4SF |
6079 | (vec_concat:V8SF | |
45c0368c | 6080 | (match_operand:V4SF 1 "nonimmediate_operand" " 0,x,0,x,0") |
6081 | (match_operand:V4SF 2 "nonimmediate_operand" " x,x,o,o,x")) | |
2a466fea | 6082 | (parallel [(const_int 6) |
6083 | (const_int 7) | |
6084 | (const_int 2) | |
6085 | (const_int 3)])))] | |
5c752e47 | 6086 | "TARGET_SSE && !(MEM_P (operands[1]) && MEM_P (operands[2]))" |
2a466fea | 6087 | "@ |
6088 | movhlps\t{%2, %0|%0, %2} | |
45c0368c | 6089 | vmovhlps\t{%2, %1, %0|%0, %1, %2} |
2a466fea | 6090 | movlps\t{%H2, %0|%0, %H2} |
45c0368c | 6091 | vmovlps\t{%H2, %1, %0|%0, %1, %H2} |
c358a059 | 6092 | %vmovhps\t{%2, %0|%q0, %2}" |
d1c8b778 | 6093 | [(set_attr "isa" "noavx,avx,noavx,avx,*") |
45c0368c | 6094 | (set_attr "type" "ssemov") |
8c1dfa94 | 6095 | (set_attr "ssememalign" "64") |
45c0368c | 6096 | (set_attr "prefix" "orig,vex,orig,vex,maybe_vex") |
6097 | (set_attr "mode" "V4SF,V4SF,V2SF,V2SF,V2SF")]) | |
5802c0cb | 6098 | |
2485795e | 6099 | (define_expand "sse_movlhps_exp" |
abd4f58b | 6100 | [(set (match_operand:V4SF 0 "nonimmediate_operand") |
7c839b3f | 6101 | (vec_select:V4SF |
6102 | (vec_concat:V8SF | |
abd4f58b | 6103 | (match_operand:V4SF 1 "nonimmediate_operand") |
6104 | (match_operand:V4SF 2 "nonimmediate_operand")) | |
7c839b3f | 6105 | (parallel [(const_int 0) |
6106 | (const_int 1) | |
6107 | (const_int 4) | |
6108 | (const_int 5)])))] | |
6109 | "TARGET_SSE" | |
cc05a422 | 6110 | { |
6111 | rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands); | |
33541f98 | 6112 | |
cc05a422 | 6113 | emit_insn (gen_sse_movlhps (dst, operands[1], operands[2])); |
6114 | ||
6115 | /* Fix up the destination if needed. */ | |
6116 | if (dst != operands[0]) | |
6117 | emit_move_insn (operands[0], dst); | |
6118 | ||
6119 | DONE; | |
6120 | }) | |
7c839b3f | 6121 | |
2485795e | 6122 | (define_insn "sse_movlhps" |
45c0368c | 6123 | [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,x,x,o") |
2a466fea | 6124 | (vec_select:V4SF |
6125 | (vec_concat:V8SF | |
45c0368c | 6126 | (match_operand:V4SF 1 "nonimmediate_operand" " 0,x,0,x,0") |
c358a059 | 6127 | (match_operand:V4SF 2 "nonimmediate_operand" " x,x,m,m,x")) |
2a466fea | 6128 | (parallel [(const_int 0) |
6129 | (const_int 1) | |
6130 | (const_int 4) | |
6131 | (const_int 5)])))] | |
6132 | "TARGET_SSE && ix86_binary_operator_ok (UNKNOWN, V4SFmode, operands)" | |
6133 | "@ | |
6134 | movlhps\t{%2, %0|%0, %2} | |
45c0368c | 6135 | vmovlhps\t{%2, %1, %0|%0, %1, %2} |
c358a059 | 6136 | movhps\t{%2, %0|%0, %q2} |
6137 | vmovhps\t{%2, %1, %0|%0, %1, %q2} | |
45c0368c | 6138 | %vmovlps\t{%2, %H0|%H0, %2}" |
d1c8b778 | 6139 | [(set_attr "isa" "noavx,avx,noavx,avx,*") |
45c0368c | 6140 | (set_attr "type" "ssemov") |
8c1dfa94 | 6141 | (set_attr "ssememalign" "64") |
45c0368c | 6142 | (set_attr "prefix" "orig,vex,orig,vex,maybe_vex") |
6143 | (set_attr "mode" "V4SF,V4SF,V2SF,V2SF,V2SF")]) | |
5802c0cb | 6144 | |
5220cab6 | 6145 | (define_insn "<mask_codefor>avx512f_unpckhps512<mask_name>" |
697a43f8 | 6146 | [(set (match_operand:V16SF 0 "register_operand" "=v") |
6147 | (vec_select:V16SF | |
6148 | (vec_concat:V32SF | |
6149 | (match_operand:V16SF 1 "register_operand" "v") | |
6150 | (match_operand:V16SF 2 "nonimmediate_operand" "vm")) | |
6151 | (parallel [(const_int 2) (const_int 18) | |
6152 | (const_int 3) (const_int 19) | |
6153 | (const_int 6) (const_int 22) | |
6154 | (const_int 7) (const_int 23) | |
6155 | (const_int 10) (const_int 26) | |
6156 | (const_int 11) (const_int 27) | |
6157 | (const_int 14) (const_int 30) | |
6158 | (const_int 15) (const_int 31)])))] | |
6159 | "TARGET_AVX512F" | |
5220cab6 | 6160 | "vunpckhps\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}" |
697a43f8 | 6161 | [(set_attr "type" "sselog") |
6162 | (set_attr "prefix" "evex") | |
6163 | (set_attr "mode" "V16SF")]) | |
6164 | ||
d6e05290 | 6165 | ;; Recall that the 256-bit unpck insns only shuffle within their lanes. |
ee780bf5 | 6166 | (define_insn "avx_unpckhps256<mask_name>" |
6167 | [(set (match_operand:V8SF 0 "register_operand" "=v") | |
ed30e0a6 | 6168 | (vec_select:V8SF |
6169 | (vec_concat:V16SF | |
ee780bf5 | 6170 | (match_operand:V8SF 1 "register_operand" "v") |
6171 | (match_operand:V8SF 2 "nonimmediate_operand" "vm")) | |
5e56456b | 6172 | (parallel [(const_int 2) (const_int 10) |
6173 | (const_int 3) (const_int 11) | |
6174 | (const_int 6) (const_int 14) | |
6175 | (const_int 7) (const_int 15)])))] | |
ee780bf5 | 6176 | "TARGET_AVX && <mask_avx512vl_condition>" |
6177 | "vunpckhps\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}" | |
ed30e0a6 | 6178 | [(set_attr "type" "sselog") |
6179 | (set_attr "prefix" "vex") | |
6180 | (set_attr "mode" "V8SF")]) | |
6181 | ||
8cedf886 | 6182 | (define_expand "vec_interleave_highv8sf" |
6183 | [(set (match_dup 3) | |
6184 | (vec_select:V8SF | |
6185 | (vec_concat:V16SF | |
6186 | (match_operand:V8SF 1 "register_operand" "x") | |
6187 | (match_operand:V8SF 2 "nonimmediate_operand" "xm")) | |
6188 | (parallel [(const_int 0) (const_int 8) | |
6189 | (const_int 1) (const_int 9) | |
6190 | (const_int 4) (const_int 12) | |
6191 | (const_int 5) (const_int 13)]))) | |
6192 | (set (match_dup 4) | |
6193 | (vec_select:V8SF | |
6194 | (vec_concat:V16SF | |
6195 | (match_dup 1) | |
6196 | (match_dup 2)) | |
6197 | (parallel [(const_int 2) (const_int 10) | |
6198 | (const_int 3) (const_int 11) | |
6199 | (const_int 6) (const_int 14) | |
6200 | (const_int 7) (const_int 15)]))) | |
abd4f58b | 6201 | (set (match_operand:V8SF 0 "register_operand") |
33d0986a | 6202 | (vec_select:V8SF |
6203 | (vec_concat:V16SF | |
8cedf886 | 6204 | (match_dup 3) |
33d0986a | 6205 | (match_dup 4)) |
6206 | (parallel [(const_int 4) (const_int 5) | |
6207 | (const_int 6) (const_int 7) | |
6208 | (const_int 12) (const_int 13) | |
6209 | (const_int 14) (const_int 15)])))] | |
8cedf886 | 6210 | "TARGET_AVX" |
6211 | { | |
6212 | operands[3] = gen_reg_rtx (V8SFmode); | |
6213 | operands[4] = gen_reg_rtx (V8SFmode); | |
6214 | }) | |
6215 | ||
ee780bf5 | 6216 | (define_insn "vec_interleave_highv4sf<mask_name>" |
6217 | [(set (match_operand:V4SF 0 "register_operand" "=x,v") | |
2a466fea | 6218 | (vec_select:V4SF |
6219 | (vec_concat:V8SF | |
ee780bf5 | 6220 | (match_operand:V4SF 1 "register_operand" "0,v") |
6221 | (match_operand:V4SF 2 "nonimmediate_operand" "xm,vm")) | |
2a466fea | 6222 | (parallel [(const_int 2) (const_int 6) |
6223 | (const_int 3) (const_int 7)])))] | |
ee780bf5 | 6224 | "TARGET_SSE && <mask_avx512vl_condition>" |
45c0368c | 6225 | "@ |
6226 | unpckhps\t{%2, %0|%0, %2} | |
ee780bf5 | 6227 | vunpckhps\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}" |
45c0368c | 6228 | [(set_attr "isa" "noavx,avx") |
6229 | (set_attr "type" "sselog") | |
6230 | (set_attr "prefix" "orig,vex") | |
2a466fea | 6231 | (set_attr "mode" "V4SF")]) |
5802c0cb | 6232 | |
5220cab6 | 6233 | (define_insn "<mask_codefor>avx512f_unpcklps512<mask_name>" |
697a43f8 | 6234 | [(set (match_operand:V16SF 0 "register_operand" "=v") |
6235 | (vec_select:V16SF | |
6236 | (vec_concat:V32SF | |
6237 | (match_operand:V16SF 1 "register_operand" "v") | |
6238 | (match_operand:V16SF 2 "nonimmediate_operand" "vm")) | |
6239 | (parallel [(const_int 0) (const_int 16) | |
6240 | (const_int 1) (const_int 17) | |
6241 | (const_int 4) (const_int 20) | |
6242 | (const_int 5) (const_int 21) | |
6243 | (const_int 8) (const_int 24) | |
6244 | (const_int 9) (const_int 25) | |
6245 | (const_int 12) (const_int 28) | |
6246 | (const_int 13) (const_int 29)])))] | |
6247 | "TARGET_AVX512F" | |
5220cab6 | 6248 | "vunpcklps\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}" |
697a43f8 | 6249 | [(set_attr "type" "sselog") |
6250 | (set_attr "prefix" "evex") | |
6251 | (set_attr "mode" "V16SF")]) | |
6252 | ||
d6e05290 | 6253 | ;; Recall that the 256-bit unpck insns only shuffle within their lanes. |
ee780bf5 | 6254 | (define_insn "avx_unpcklps256<mask_name>" |
6255 | [(set (match_operand:V8SF 0 "register_operand" "=v") | |
ed30e0a6 | 6256 | (vec_select:V8SF |
6257 | (vec_concat:V16SF | |
ee780bf5 | 6258 | (match_operand:V8SF 1 "register_operand" "v") |
6259 | (match_operand:V8SF 2 "nonimmediate_operand" "vm")) | |
5e56456b | 6260 | (parallel [(const_int 0) (const_int 8) |
6261 | (const_int 1) (const_int 9) | |
6262 | (const_int 4) (const_int 12) | |
6263 | (const_int 5) (const_int 13)])))] | |
ee780bf5 | 6264 | "TARGET_AVX && <mask_avx512vl_condition>" |
6265 | "vunpcklps\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}" | |
ed30e0a6 | 6266 | [(set_attr "type" "sselog") |
6267 | (set_attr "prefix" "vex") | |
6268 | (set_attr "mode" "V8SF")]) | |
6269 | ||
ee780bf5 | 6270 | (define_insn "unpcklps128_mask" |
6271 | [(set (match_operand:V4SF 0 "register_operand" "=v") | |
6272 | (vec_merge:V4SF | |
6273 | (vec_select:V4SF | |
6274 | (vec_concat:V8SF | |
6275 | (match_operand:V4SF 1 "register_operand" "v") | |
6276 | (match_operand:V4SF 2 "nonimmediate_operand" "vm")) | |
6277 | (parallel [(const_int 0) (const_int 4) | |
6278 | (const_int 1) (const_int 5)])) | |
6279 | (match_operand:V4SF 3 "vector_move_operand" "0C") | |
6280 | (match_operand:QI 4 "register_operand" "Yk")))] | |
6281 | "TARGET_AVX512VL" | |
6282 | "vunpcklps\t{%2, %1, %0%{%4%}%N3|%0%{%4%}%N3, %1, %2}" | |
6283 | [(set_attr "type" "sselog") | |
6284 | (set_attr "prefix" "evex") | |
6285 | (set_attr "mode" "V4SF")]) | |
6286 | ||
8cedf886 | 6287 | (define_expand "vec_interleave_lowv8sf" |
6288 | [(set (match_dup 3) | |
6289 | (vec_select:V8SF | |
6290 | (vec_concat:V16SF | |
6291 | (match_operand:V8SF 1 "register_operand" "x") | |
6292 | (match_operand:V8SF 2 "nonimmediate_operand" "xm")) | |
6293 | (parallel [(const_int 0) (const_int 8) | |
6294 | (const_int 1) (const_int 9) | |
6295 | (const_int 4) (const_int 12) | |
6296 | (const_int 5) (const_int 13)]))) | |
6297 | (set (match_dup 4) | |
6298 | (vec_select:V8SF | |
6299 | (vec_concat:V16SF | |
6300 | (match_dup 1) | |
6301 | (match_dup 2)) | |
6302 | (parallel [(const_int 2) (const_int 10) | |
6303 | (const_int 3) (const_int 11) | |
6304 | (const_int 6) (const_int 14) | |
6305 | (const_int 7) (const_int 15)]))) | |
abd4f58b | 6306 | (set (match_operand:V8SF 0 "register_operand") |
33d0986a | 6307 | (vec_select:V8SF |
6308 | (vec_concat:V16SF | |
8cedf886 | 6309 | (match_dup 3) |
33d0986a | 6310 | (match_dup 4)) |
6311 | (parallel [(const_int 0) (const_int 1) | |
6312 | (const_int 2) (const_int 3) | |
6313 | (const_int 8) (const_int 9) | |
6314 | (const_int 10) (const_int 11)])))] | |
8cedf886 | 6315 | "TARGET_AVX" |
6316 | { | |
6317 | operands[3] = gen_reg_rtx (V8SFmode); | |
6318 | operands[4] = gen_reg_rtx (V8SFmode); | |
6319 | }) | |
6320 | ||
d6e05290 | 6321 | (define_insn "vec_interleave_lowv4sf" |
45c0368c | 6322 | [(set (match_operand:V4SF 0 "register_operand" "=x,x") |
2a466fea | 6323 | (vec_select:V4SF |
6324 | (vec_concat:V8SF | |
45c0368c | 6325 | (match_operand:V4SF 1 "register_operand" "0,x") |
6326 | (match_operand:V4SF 2 "nonimmediate_operand" "xm,xm")) | |
2a466fea | 6327 | (parallel [(const_int 0) (const_int 4) |
6328 | (const_int 1) (const_int 5)])))] | |
6329 | "TARGET_SSE" | |
45c0368c | 6330 | "@ |
6331 | unpcklps\t{%2, %0|%0, %2} | |
6332 | vunpcklps\t{%2, %1, %0|%0, %1, %2}" | |
6333 | [(set_attr "isa" "noavx,avx") | |
6334 | (set_attr "type" "sselog") | |
6335 | (set_attr "prefix" "orig,vex") | |
2a466fea | 6336 | (set_attr "mode" "V4SF")]) |
5802c0cb | 6337 | |
2a466fea | 6338 | ;; These are modeled with the same vec_concat as the others so that we |
6339 | ;; capture users of shufps that can use the new instructions | |
adea432f | 6340 | (define_insn "avx_movshdup256<mask_name>" |
6341 | [(set (match_operand:V8SF 0 "register_operand" "=v") | |
ed30e0a6 | 6342 | (vec_select:V8SF |
6343 | (vec_concat:V16SF | |
adea432f | 6344 | (match_operand:V8SF 1 "nonimmediate_operand" "vm") |
ed30e0a6 | 6345 | (match_dup 1)) |
6346 | (parallel [(const_int 1) (const_int 1) | |
6347 | (const_int 3) (const_int 3) | |
6348 | (const_int 5) (const_int 5) | |
6349 | (const_int 7) (const_int 7)])))] | |
adea432f | 6350 | "TARGET_AVX && <mask_avx512vl_condition>" |
6351 | "vmovshdup\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}" | |
ed30e0a6 | 6352 | [(set_attr "type" "sse") |
6353 | (set_attr "prefix" "vex") | |
6354 | (set_attr "mode" "V8SF")]) | |
6355 | ||
adea432f | 6356 | (define_insn "sse3_movshdup<mask_name>" |
6357 | [(set (match_operand:V4SF 0 "register_operand" "=v") | |
2a466fea | 6358 | (vec_select:V4SF |
6359 | (vec_concat:V8SF | |
adea432f | 6360 | (match_operand:V4SF 1 "nonimmediate_operand" "vm") |
2a466fea | 6361 | (match_dup 1)) |
6362 | (parallel [(const_int 1) | |
6363 | (const_int 1) | |
6364 | (const_int 7) | |
6365 | (const_int 7)])))] | |
adea432f | 6366 | "TARGET_SSE3 && <mask_avx512vl_condition>" |
6367 | "%vmovshdup\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}" | |
2a466fea | 6368 | [(set_attr "type" "sse") |
1f346cbc | 6369 | (set_attr "prefix_rep" "1") |
ed30e0a6 | 6370 | (set_attr "prefix" "maybe_vex") |
2a466fea | 6371 | (set_attr "mode" "V4SF")]) |
5802c0cb | 6372 | |
5220cab6 | 6373 | (define_insn "<mask_codefor>avx512f_movshdup512<mask_name>" |
697a43f8 | 6374 | [(set (match_operand:V16SF 0 "register_operand" "=v") |
6375 | (vec_select:V16SF | |
6376 | (vec_concat:V32SF | |
6377 | (match_operand:V16SF 1 "nonimmediate_operand" "vm") | |
6378 | (match_dup 1)) | |
6379 | (parallel [(const_int 1) (const_int 1) | |
6380 | (const_int 3) (const_int 3) | |
6381 | (const_int 5) (const_int 5) | |
6382 | (const_int 7) (const_int 7) | |
6383 | (const_int 9) (const_int 9) | |
6384 | (const_int 11) (const_int 11) | |
6385 | (const_int 13) (const_int 13) | |
6386 | (const_int 15) (const_int 15)])))] | |
6387 | "TARGET_AVX512F" | |
5220cab6 | 6388 | "vmovshdup\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}" |
697a43f8 | 6389 | [(set_attr "type" "sse") |
6390 | (set_attr "prefix" "evex") | |
6391 | (set_attr "mode" "V16SF")]) | |
6392 | ||
adea432f | 6393 | (define_insn "avx_movsldup256<mask_name>" |
6394 | [(set (match_operand:V8SF 0 "register_operand" "=v") | |
ed30e0a6 | 6395 | (vec_select:V8SF |
6396 | (vec_concat:V16SF | |
adea432f | 6397 | (match_operand:V8SF 1 "nonimmediate_operand" "vm") |
ed30e0a6 | 6398 | (match_dup 1)) |
6399 | (parallel [(const_int 0) (const_int 0) | |
6400 | (const_int 2) (const_int 2) | |
6401 | (const_int 4) (const_int 4) | |
6402 | (const_int 6) (const_int 6)])))] | |
adea432f | 6403 | "TARGET_AVX && <mask_avx512vl_condition>" |
6404 | "vmovsldup\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}" | |
ed30e0a6 | 6405 | [(set_attr "type" "sse") |
6406 | (set_attr "prefix" "vex") | |
6407 | (set_attr "mode" "V8SF")]) | |
6408 | ||
adea432f | 6409 | (define_insn "sse3_movsldup<mask_name>" |
6410 | [(set (match_operand:V4SF 0 "register_operand" "=v") | |
2a466fea | 6411 | (vec_select:V4SF |
6412 | (vec_concat:V8SF | |
adea432f | 6413 | (match_operand:V4SF 1 "nonimmediate_operand" "vm") |
2a466fea | 6414 | (match_dup 1)) |
6415 | (parallel [(const_int 0) | |
6416 | (const_int 0) | |
6417 | (const_int 6) | |
6418 | (const_int 6)])))] | |
adea432f | 6419 | "TARGET_SSE3 && <mask_avx512vl_condition>" |
6420 | "%vmovsldup\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}" | |
2a466fea | 6421 | [(set_attr "type" "sse") |
1f346cbc | 6422 | (set_attr "prefix_rep" "1") |
ed30e0a6 | 6423 | (set_attr "prefix" "maybe_vex") |
2a466fea | 6424 | (set_attr "mode" "V4SF")]) |
5802c0cb | 6425 | |
5220cab6 | 6426 | (define_insn "<mask_codefor>avx512f_movsldup512<mask_name>" |
697a43f8 | 6427 | [(set (match_operand:V16SF 0 "register_operand" "=v") |
6428 | (vec_select:V16SF | |
6429 | (vec_concat:V32SF | |
6430 | (match_operand:V16SF 1 "nonimmediate_operand" "vm") | |
6431 | (match_dup 1)) | |
6432 | (parallel [(const_int 0) (const_int 0) | |
6433 | (const_int 2) (const_int 2) | |
6434 | (const_int 4) (const_int 4) | |
6435 | (const_int 6) (const_int 6) | |
6436 | (const_int 8) (const_int 8) | |
6437 | (const_int 10) (const_int 10) | |
6438 | (const_int 12) (const_int 12) | |
6439 | (const_int 14) (const_int 14)])))] | |
6440 | "TARGET_AVX512F" | |
5220cab6 | 6441 | "vmovsldup\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}" |
697a43f8 | 6442 | [(set_attr "type" "sse") |
6443 | (set_attr "prefix" "evex") | |
6444 | (set_attr "mode" "V16SF")]) | |
6445 | ||
bb2fa3d8 | 6446 | (define_expand "avx_shufps256<mask_expand4_name>" |
abd4f58b | 6447 | [(match_operand:V8SF 0 "register_operand") |
6448 | (match_operand:V8SF 1 "register_operand") | |
6449 | (match_operand:V8SF 2 "nonimmediate_operand") | |
6450 | (match_operand:SI 3 "const_int_operand")] | |
ed30e0a6 | 6451 | "TARGET_AVX" |
6452 | { | |
6453 | int mask = INTVAL (operands[3]); | |
bb2fa3d8 | 6454 | emit_insn (gen_avx_shufps256_1<mask_expand4_name> (operands[0], |
6455 | operands[1], | |
6456 | operands[2], | |
6457 | GEN_INT ((mask >> 0) & 3), | |
6458 | GEN_INT ((mask >> 2) & 3), | |
6459 | GEN_INT (((mask >> 4) & 3) + 8), | |
6460 | GEN_INT (((mask >> 6) & 3) + 8), | |
6461 | GEN_INT (((mask >> 0) & 3) + 4), | |
6462 | GEN_INT (((mask >> 2) & 3) + 4), | |
6463 | GEN_INT (((mask >> 4) & 3) + 12), | |
6464 | GEN_INT (((mask >> 6) & 3) + 12) | |
6465 | <mask_expand4_args>)); | |
ed30e0a6 | 6466 | DONE; |
6467 | }) | |
6468 | ||
6469 | ;; One bit in mask selects 2 elements. | |
bb2fa3d8 | 6470 | (define_insn "avx_shufps256_1<mask_name>" |
6471 | [(set (match_operand:V8SF 0 "register_operand" "=v") | |
ed30e0a6 | 6472 | (vec_select:V8SF |
6473 | (vec_concat:V16SF | |
bb2fa3d8 | 6474 | (match_operand:V8SF 1 "register_operand" "v") |
6475 | (match_operand:V8SF 2 "nonimmediate_operand" "vm")) | |
abd4f58b | 6476 | (parallel [(match_operand 3 "const_0_to_3_operand" ) |
6477 | (match_operand 4 "const_0_to_3_operand" ) | |
6478 | (match_operand 5 "const_8_to_11_operand" ) | |
6479 | (match_operand 6 "const_8_to_11_operand" ) | |
6480 | (match_operand 7 "const_4_to_7_operand" ) | |
6481 | (match_operand 8 "const_4_to_7_operand" ) | |
6482 | (match_operand 9 "const_12_to_15_operand") | |
6483 | (match_operand 10 "const_12_to_15_operand")])))] | |
ed30e0a6 | 6484 | "TARGET_AVX |
bb2fa3d8 | 6485 | && <mask_avx512vl_condition> |
ed30e0a6 | 6486 | && (INTVAL (operands[3]) == (INTVAL (operands[7]) - 4) |
6487 | && INTVAL (operands[4]) == (INTVAL (operands[8]) - 4) | |
6488 | && INTVAL (operands[5]) == (INTVAL (operands[9]) - 4) | |
6489 | && INTVAL (operands[6]) == (INTVAL (operands[10]) - 4))" | |
6490 | { | |
6491 | int mask; | |
6492 | mask = INTVAL (operands[3]); | |
6493 | mask |= INTVAL (operands[4]) << 2; | |
6494 | mask |= (INTVAL (operands[5]) - 8) << 4; | |
6495 | mask |= (INTVAL (operands[6]) - 8) << 6; | |
6496 | operands[3] = GEN_INT (mask); | |
6497 | ||
bb2fa3d8 | 6498 | return "vshufps\t{%3, %2, %1, %0<mask_operand11>|%0<mask_operand11>, %1, %2, %3}"; |
ed30e0a6 | 6499 | } |
77aff08f | 6500 | [(set_attr "type" "sseshuf") |
00a0e418 | 6501 | (set_attr "length_immediate" "1") |
bb2fa3d8 | 6502 | (set_attr "prefix" "<mask_prefix>") |
ed30e0a6 | 6503 | (set_attr "mode" "V8SF")]) |
6504 | ||
bb2fa3d8 | 6505 | (define_expand "sse_shufps<mask_expand4_name>" |
abd4f58b | 6506 | [(match_operand:V4SF 0 "register_operand") |
6507 | (match_operand:V4SF 1 "register_operand") | |
6508 | (match_operand:V4SF 2 "nonimmediate_operand") | |
6509 | (match_operand:SI 3 "const_int_operand")] | |
2a466fea | 6510 | "TARGET_SSE" |
6511 | { | |
6512 | int mask = INTVAL (operands[3]); | |
bb2fa3d8 | 6513 | emit_insn (gen_sse_shufps_v4sf<mask_expand4_name> (operands[0], |
6514 | operands[1], | |
6515 | operands[2], | |
6516 | GEN_INT ((mask >> 0) & 3), | |
6517 | GEN_INT ((mask >> 2) & 3), | |
6518 | GEN_INT (((mask >> 4) & 3) + 4), | |
6519 | GEN_INT (((mask >> 6) & 3) + 4) | |
6520 | <mask_expand4_args>)); | |
2a466fea | 6521 | DONE; |
6522 | }) | |
5802c0cb | 6523 | |
bb2fa3d8 | 6524 | (define_insn "sse_shufps_v4sf_mask" |
6525 | [(set (match_operand:V4SF 0 "register_operand" "=v") | |
6526 | (vec_merge:V4SF | |
6527 | (vec_select:V4SF | |
6528 | (vec_concat:V8SF | |
6529 | (match_operand:V4SF 1 "register_operand" "v") | |
6530 | (match_operand:V4SF 2 "nonimmediate_operand" "vm")) | |
6531 | (parallel [(match_operand 3 "const_0_to_3_operand") | |
6532 | (match_operand 4 "const_0_to_3_operand") | |
6533 | (match_operand 5 "const_4_to_7_operand") | |
6534 | (match_operand 6 "const_4_to_7_operand")])) | |
6535 | (match_operand:V4SF 7 "vector_move_operand" "0C") | |
6536 | (match_operand:QI 8 "register_operand" "Yk")))] | |
6537 | "TARGET_AVX512VL" | |
6538 | { | |
6539 | int mask = 0; | |
6540 | mask |= INTVAL (operands[3]) << 0; | |
6541 | mask |= INTVAL (operands[4]) << 2; | |
6542 | mask |= (INTVAL (operands[5]) - 4) << 4; | |
6543 | mask |= (INTVAL (operands[6]) - 4) << 6; | |
6544 | operands[3] = GEN_INT (mask); | |
6545 | ||
6546 | return "vshufps\t{%3, %2, %1, %0%{%8%}%N7|%0%{%8%}%N7, %1, %2, %3}"; | |
6547 | } | |
6548 | [(set_attr "type" "sseshuf") | |
6549 | (set_attr "length_immediate" "1") | |
6550 | (set_attr "prefix" "evex") | |
6551 | (set_attr "mode" "V4SF")]) | |
6552 | ||
56c7c824 | 6553 | (define_insn "sse_shufps_<mode>" |
6fe5844b | 6554 | [(set (match_operand:VI4F_128 0 "register_operand" "=x,x") |
6555 | (vec_select:VI4F_128 | |
63d5e521 | 6556 | (vec_concat:<ssedoublevecmode> |
6fe5844b | 6557 | (match_operand:VI4F_128 1 "register_operand" "0,x") |
6558 | (match_operand:VI4F_128 2 "nonimmediate_operand" "xm,xm")) | |
abd4f58b | 6559 | (parallel [(match_operand 3 "const_0_to_3_operand") |
6560 | (match_operand 4 "const_0_to_3_operand") | |
6561 | (match_operand 5 "const_4_to_7_operand") | |
6562 | (match_operand 6 "const_4_to_7_operand")])))] | |
2a466fea | 6563 | "TARGET_SSE" |
6564 | { | |
6565 | int mask = 0; | |
6566 | mask |= INTVAL (operands[3]) << 0; | |
6567 | mask |= INTVAL (operands[4]) << 2; | |
6568 | mask |= (INTVAL (operands[5]) - 4) << 4; | |
6569 | mask |= (INTVAL (operands[6]) - 4) << 6; | |
6570 | operands[3] = GEN_INT (mask); | |
5802c0cb | 6571 | |
45c0368c | 6572 | switch (which_alternative) |
6573 | { | |
6574 | case 0: | |
6575 | return "shufps\t{%3, %2, %0|%0, %2, %3}"; | |
6576 | case 1: | |
6577 | return "vshufps\t{%3, %2, %1, %0|%0, %1, %2, %3}"; | |
6578 | default: | |
6579 | gcc_unreachable (); | |
6580 | } | |
2a466fea | 6581 | } |
45c0368c | 6582 | [(set_attr "isa" "noavx,avx") |
77aff08f | 6583 | (set_attr "type" "sseshuf") |
00a0e418 | 6584 | (set_attr "length_immediate" "1") |
45c0368c | 6585 | (set_attr "prefix" "orig,vex") |
2a466fea | 6586 | (set_attr "mode" "V4SF")]) |
5802c0cb | 6587 | |
2a466fea | 6588 | (define_insn "sse_storehps" |
6589 | [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x") | |
6590 | (vec_select:V2SF | |
6591 | (match_operand:V4SF 1 "nonimmediate_operand" "x,x,o") | |
6592 | (parallel [(const_int 2) (const_int 3)])))] | |
6593 | "TARGET_SSE" | |
6594 | "@ | |
c358a059 | 6595 | %vmovhps\t{%1, %0|%q0, %1} |
ed30e0a6 | 6596 | %vmovhlps\t{%1, %d0|%d0, %1} |
6597 | %vmovlps\t{%H1, %d0|%d0, %H1}" | |
2a466fea | 6598 | [(set_attr "type" "ssemov") |
8c1dfa94 | 6599 | (set_attr "ssememalign" "64") |
ed30e0a6 | 6600 | (set_attr "prefix" "maybe_vex") |
2a466fea | 6601 | (set_attr "mode" "V2SF,V4SF,V2SF")]) |
6602 | ||
2485795e | 6603 | (define_expand "sse_loadhps_exp" |
abd4f58b | 6604 | [(set (match_operand:V4SF 0 "nonimmediate_operand") |
7c839b3f | 6605 | (vec_concat:V4SF |
6606 | (vec_select:V2SF | |
abd4f58b | 6607 | (match_operand:V4SF 1 "nonimmediate_operand") |
7c839b3f | 6608 | (parallel [(const_int 0) (const_int 1)])) |
abd4f58b | 6609 | (match_operand:V2SF 2 "nonimmediate_operand")))] |
7c839b3f | 6610 | "TARGET_SSE" |
cc05a422 | 6611 | { |
6612 | rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands); | |
33541f98 | 6613 | |
cc05a422 | 6614 | emit_insn (gen_sse_loadhps (dst, operands[1], operands[2])); |
6615 | ||
6616 | /* Fix up the destination if needed. */ | |
6617 | if (dst != operands[0]) | |
6618 | emit_move_insn (operands[0], dst); | |
6619 | ||
6620 | DONE; | |
6621 | }) | |
7c839b3f | 6622 | |
2485795e | 6623 | (define_insn "sse_loadhps" |
45c0368c | 6624 | [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,x,x,o") |
5802c0cb | 6625 | (vec_concat:V4SF |
2a466fea | 6626 | (vec_select:V2SF |
45c0368c | 6627 | (match_operand:V4SF 1 "nonimmediate_operand" " 0,x,0,x,0") |
2a466fea | 6628 | (parallel [(const_int 0) (const_int 1)])) |
45c0368c | 6629 | (match_operand:V2SF 2 "nonimmediate_operand" " m,m,x,x,x")))] |
5c752e47 | 6630 | "TARGET_SSE" |
2a466fea | 6631 | "@ |
c358a059 | 6632 | movhps\t{%2, %0|%0, %q2} |
6633 | vmovhps\t{%2, %1, %0|%0, %1, %q2} | |
2a466fea | 6634 | movlhps\t{%2, %0|%0, %2} |
45c0368c | 6635 | vmovlhps\t{%2, %1, %0|%0, %1, %2} |
6636 | %vmovlps\t{%2, %H0|%H0, %2}" | |
d1c8b778 | 6637 | [(set_attr "isa" "noavx,avx,noavx,avx,*") |
45c0368c | 6638 | (set_attr "type" "ssemov") |
8c1dfa94 | 6639 | (set_attr "ssememalign" "64") |
45c0368c | 6640 | (set_attr "prefix" "orig,vex,orig,vex,maybe_vex") |
6641 | (set_attr "mode" "V2SF,V2SF,V4SF,V4SF,V2SF")]) | |
ed30e0a6 | 6642 | |
2a466fea | 6643 | (define_insn "sse_storelps" |
45c0368c | 6644 | [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x") |
2a466fea | 6645 | (vec_select:V2SF |
45c0368c | 6646 | (match_operand:V4SF 1 "nonimmediate_operand" " x,x,m") |
2a466fea | 6647 | (parallel [(const_int 0) (const_int 1)])))] |
5c752e47 | 6648 | "TARGET_SSE" |
2a466fea | 6649 | "@ |
c358a059 | 6650 | %vmovlps\t{%1, %0|%q0, %1} |
45c0368c | 6651 | %vmovaps\t{%1, %0|%0, %1} |
c358a059 | 6652 | %vmovlps\t{%1, %d0|%d0, %q1}" |
2a466fea | 6653 | [(set_attr "type" "ssemov") |
45c0368c | 6654 | (set_attr "prefix" "maybe_vex") |
2a466fea | 6655 | (set_attr "mode" "V2SF,V4SF,V2SF")]) |
6656 | ||
2485795e | 6657 | (define_expand "sse_loadlps_exp" |
abd4f58b | 6658 | [(set (match_operand:V4SF 0 "nonimmediate_operand") |
7c839b3f | 6659 | (vec_concat:V4SF |
abd4f58b | 6660 | (match_operand:V2SF 2 "nonimmediate_operand") |
7c839b3f | 6661 | (vec_select:V2SF |
abd4f58b | 6662 | (match_operand:V4SF 1 "nonimmediate_operand") |
7c839b3f | 6663 | (parallel [(const_int 2) (const_int 3)]))))] |
6664 | "TARGET_SSE" | |
cc05a422 | 6665 | { |
6666 | rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands); | |
33541f98 | 6667 | |
cc05a422 | 6668 | emit_insn (gen_sse_loadlps (dst, operands[1], operands[2])); |
6669 | ||
6670 | /* Fix up the destination if needed. */ | |
6671 | if (dst != operands[0]) | |
6672 | emit_move_insn (operands[0], dst); | |
6673 | ||
6674 | DONE; | |
6675 | }) | |
7c839b3f | 6676 | |
2485795e | 6677 | (define_insn "sse_loadlps" |
45c0368c | 6678 | [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,x,x,m") |
2a466fea | 6679 | (vec_concat:V4SF |
50c10b91 | 6680 | (match_operand:V2SF 2 "nonimmediate_operand" " 0,x,m,m,x") |
5802c0cb | 6681 | (vec_select:V2SF |
45c0368c | 6682 | (match_operand:V4SF 1 "nonimmediate_operand" " x,x,0,x,0") |
2a466fea | 6683 | (parallel [(const_int 2) (const_int 3)]))))] |
5c752e47 | 6684 | "TARGET_SSE" |
2a466fea | 6685 | "@ |
6686 | shufps\t{$0xe4, %1, %0|%0, %1, 0xe4} | |
45c0368c | 6687 | vshufps\t{$0xe4, %1, %2, %0|%0, %2, %1, 0xe4} |
c358a059 | 6688 | movlps\t{%2, %0|%0, %q2} |
6689 | vmovlps\t{%2, %1, %0|%0, %1, %q2} | |
6690 | %vmovlps\t{%2, %0|%q0, %2}" | |
d1c8b778 | 6691 | [(set_attr "isa" "noavx,avx,noavx,avx,*") |
77aff08f | 6692 | (set_attr "type" "sseshuf,sseshuf,ssemov,ssemov,ssemov") |
8c1dfa94 | 6693 | (set_attr "ssememalign" "64") |
45c0368c | 6694 | (set_attr "length_immediate" "1,1,*,*,*") |
6695 | (set_attr "prefix" "orig,vex,orig,vex,maybe_vex") | |
6696 | (set_attr "mode" "V4SF,V4SF,V2SF,V2SF,V2SF")]) | |
ed30e0a6 | 6697 | |
2a466fea | 6698 | (define_insn "sse_movss" |
45c0368c | 6699 | [(set (match_operand:V4SF 0 "register_operand" "=x,x") |
2a466fea | 6700 | (vec_merge:V4SF |
45c0368c | 6701 | (match_operand:V4SF 2 "register_operand" " x,x") |
6702 | (match_operand:V4SF 1 "register_operand" " 0,x") | |
2a466fea | 6703 | (const_int 1)))] |
6704 | "TARGET_SSE" | |
45c0368c | 6705 | "@ |
6706 | movss\t{%2, %0|%0, %2} | |
6707 | vmovss\t{%2, %1, %0|%0, %1, %2}" | |
6708 | [(set_attr "isa" "noavx,avx") | |
6709 | (set_attr "type" "ssemov") | |
6710 | (set_attr "prefix" "orig,vex") | |
2a466fea | 6711 | (set_attr "mode" "SF")]) |
bb8107e7 | 6712 | |
eea5ff47 | 6713 | (define_insn "avx2_vec_dup<mode>" |
03ae25dc | 6714 | [(set (match_operand:VF1_128_256 0 "register_operand" "=x") |
6715 | (vec_duplicate:VF1_128_256 | |
5deb404d | 6716 | (vec_select:SF |
6717 | (match_operand:V4SF 1 "register_operand" "x") | |
6718 | (parallel [(const_int 0)]))))] | |
6719 | "TARGET_AVX2" | |
6720 | "vbroadcastss\t{%1, %0|%0, %1}" | |
6721 | [(set_attr "type" "sselog1") | |
6722 | (set_attr "prefix" "vex") | |
eea5ff47 | 6723 | (set_attr "mode" "<MODE>")]) |
5deb404d | 6724 | |
541e350d | 6725 | (define_insn "avx2_vec_dupv8sf_1" |
6726 | [(set (match_operand:V8SF 0 "register_operand" "=x") | |
6727 | (vec_duplicate:V8SF | |
6728 | (vec_select:SF | |
6729 | (match_operand:V8SF 1 "register_operand" "x") | |
6730 | (parallel [(const_int 0)]))))] | |
6731 | "TARGET_AVX2" | |
6732 | "vbroadcastss\t{%x1, %0|%0, %x1}" | |
6733 | [(set_attr "type" "sselog1") | |
6734 | (set_attr "prefix" "vex") | |
6735 | (set_attr "mode" "V8SF")]) | |
6736 | ||
05e7532b | 6737 | (define_insn "avx512f_vec_dup<mode>_1" |
6738 | [(set (match_operand:VF_512 0 "register_operand" "=v") | |
6739 | (vec_duplicate:VF_512 | |
6740 | (vec_select:<ssescalarmode> | |
6741 | (match_operand:VF_512 1 "register_operand" "v") | |
6742 | (parallel [(const_int 0)]))))] | |
6743 | "TARGET_AVX512F" | |
6744 | "vbroadcast<bcstscalarsuff>\t{%x1, %0|%0, %x1}" | |
6745 | [(set_attr "type" "sselog1") | |
6746 | (set_attr "prefix" "evex") | |
6747 | (set_attr "mode" "<MODE>")]) | |
6748 | ||
d3d9aac1 | 6749 | ;; Although insertps takes register source, we prefer |
6750 | ;; unpcklps with register source since it is shorter. | |
6751 | (define_insn "*vec_concatv2sf_sse4_1" | |
0a281fd0 | 6752 | [(set (match_operand:V2SF 0 "register_operand" |
6753 | "=Yr,*x,x,Yr,*x,x,x,*y ,*y") | |
d3d9aac1 | 6754 | (vec_concat:V2SF |
0a281fd0 | 6755 | (match_operand:SF 1 "nonimmediate_operand" |
6756 | " 0, 0,x, 0,0, x,m, 0 , m") | |
6757 | (match_operand:SF 2 "vector_move_operand" | |
6758 | " Yr,*x,x, m,m, m,C,*ym, C")))] | |
6759 | "TARGET_SSE4_1 && !(MEM_P (operands[1]) && MEM_P (operands[2]))" | |
d3d9aac1 | 6760 | "@ |
0a32b282 | 6761 | unpcklps\t{%2, %0|%0, %2} |
d3d9aac1 | 6762 | unpcklps\t{%2, %0|%0, %2} |
45c0368c | 6763 | vunpcklps\t{%2, %1, %0|%0, %1, %2} |
d3d9aac1 | 6764 | insertps\t{$0x10, %2, %0|%0, %2, 0x10} |
0a32b282 | 6765 | insertps\t{$0x10, %2, %0|%0, %2, 0x10} |
45c0368c | 6766 | vinsertps\t{$0x10, %2, %1, %0|%0, %1, %2, 0x10} |
6767 | %vmovss\t{%1, %0|%0, %1} | |
d3d9aac1 | 6768 | punpckldq\t{%2, %0|%0, %2} |
6769 | movd\t{%1, %0|%0, %1}" | |
0a32b282 | 6770 | [(set_attr "isa" "noavx,noavx,avx,noavx,noavx,avx,*,*,*") |
6771 | (set_attr "type" "sselog,sselog,sselog,sselog,sselog,sselog,ssemov,mmxcvt,mmxmov") | |
6772 | (set_attr "prefix_data16" "*,*,*,1,1,*,*,*,*") | |
6773 | (set_attr "prefix_extra" "*,*,*,1,1,1,*,*,*") | |
6774 | (set_attr "length_immediate" "*,*,*,1,1,1,*,*,*") | |
6775 | (set_attr "prefix" "orig,orig,vex,orig,orig,vex,maybe_vex,orig,orig") | |
6776 | (set_attr "mode" "V4SF,V4SF,V4SF,V4SF,V4SF,V4SF,SF,DI,DI")]) | |
d3d9aac1 | 6777 | |
2a466fea | 6778 | ;; ??? In theory we can match memory for the MMX alternative, but allowing |
6779 | ;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE | |
6780 | ;; alternatives pretty much forces the MMX alternative to be chosen. | |
b4a46c88 | 6781 | (define_insn "*vec_concatv2sf_sse" |
2a466fea | 6782 | [(set (match_operand:V2SF 0 "register_operand" "=x,x,*y,*y") |
6783 | (vec_concat:V2SF | |
6784 | (match_operand:SF 1 "nonimmediate_operand" " 0,m, 0, m") | |
6785 | (match_operand:SF 2 "reg_or_0_operand" " x,C,*y, C")))] | |
6786 | "TARGET_SSE" | |
6787 | "@ | |
6788 | unpcklps\t{%2, %0|%0, %2} | |
6789 | movss\t{%1, %0|%0, %1} | |
6790 | punpckldq\t{%2, %0|%0, %2} | |
6791 | movd\t{%1, %0|%0, %1}" | |
6792 | [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov") | |
6793 | (set_attr "mode" "V4SF,SF,DI,DI")]) | |
8aa4e142 | 6794 | |
d3d379e7 | 6795 | (define_insn "*vec_concatv4sf" |
45c0368c | 6796 | [(set (match_operand:V4SF 0 "register_operand" "=x,x,x,x") |
2a466fea | 6797 | (vec_concat:V4SF |
45c0368c | 6798 | (match_operand:V2SF 1 "register_operand" " 0,x,0,x") |
6799 | (match_operand:V2SF 2 "nonimmediate_operand" " x,x,m,m")))] | |
2a466fea | 6800 | "TARGET_SSE" |
6801 | "@ | |
6802 | movlhps\t{%2, %0|%0, %2} | |
45c0368c | 6803 | vmovlhps\t{%2, %1, %0|%0, %1, %2} |
c358a059 | 6804 | movhps\t{%2, %0|%0, %q2} |
6805 | vmovhps\t{%2, %1, %0|%0, %1, %q2}" | |
45c0368c | 6806 | [(set_attr "isa" "noavx,avx,noavx,avx") |
6807 | (set_attr "type" "ssemov") | |
6808 | (set_attr "prefix" "orig,vex,orig,vex") | |
6809 | (set_attr "mode" "V4SF,V4SF,V2SF,V2SF")]) | |
8aa4e142 | 6810 | |
c262fafb | 6811 | (define_expand "vec_init<mode>" |
abd4f58b | 6812 | [(match_operand:V_128 0 "register_operand") |
6813 | (match_operand 1)] | |
2a466fea | 6814 | "TARGET_SSE" |
8aa4e142 | 6815 | { |
2a466fea | 6816 | ix86_expand_vector_init (false, operands[0], operands[1]); |
8aa4e142 | 6817 | DONE; |
6818 | }) | |
6819 | ||
45c0368c | 6820 | ;; Avoid combining registers from different units in a single alternative, |
6821 | ;; see comment above inline_secondary_memory_needed function in i386.c | |
1e541240 | 6822 | (define_insn "vec_set<mode>_0" |
6fe5844b | 6823 | [(set (match_operand:VI4F_128 0 "nonimmediate_operand" |
43483afb | 6824 | "=Yr,*v,v,Yi,x,x,v,Yr ,*x ,x ,m ,m ,m") |
6fe5844b | 6825 | (vec_merge:VI4F_128 |
6826 | (vec_duplicate:VI4F_128 | |
45c0368c | 6827 | (match_operand:<ssescalarmode> 2 "general_operand" |
43483afb | 6828 | " Yr,*v,m,r ,m,x,v,*rm,*rm,*rm,!x,!*re,!*fF")) |
6fe5844b | 6829 | (match_operand:VI4F_128 1 "vector_move_operand" |
0a32b282 | 6830 | " C , C,C,C ,C,0,v,0 ,0 ,x ,0 ,0 ,0") |
04e14b44 | 6831 | (const_int 1)))] |
1e541240 | 6832 | "TARGET_SSE" |
04e14b44 | 6833 | "@ |
0a32b282 | 6834 | %vinsertps\t{$0xe, %d2, %0|%0, %d2, 0xe} |
45c0368c | 6835 | %vinsertps\t{$0xe, %d2, %0|%0, %d2, 0xe} |
6836 | %vmov<ssescalarmodesuffix>\t{%2, %0|%0, %2} | |
6837 | %vmovd\t{%2, %0|%0, %2} | |
04e14b44 | 6838 | movss\t{%2, %0|%0, %2} |
1e541240 | 6839 | movss\t{%2, %0|%0, %2} |
45c0368c | 6840 | vmovss\t{%2, %1, %0|%0, %1, %2} |
04e14b44 | 6841 | pinsrd\t{$0, %2, %0|%0, %2, 0} |
0a32b282 | 6842 | pinsrd\t{$0, %2, %0|%0, %2, 0} |
45c0368c | 6843 | vpinsrd\t{$0, %2, %1, %0|%0, %1, %2, 0} |
6844 | # | |
6845 | # | |
04e14b44 | 6846 | #" |
0a32b282 | 6847 | [(set_attr "isa" "sse4,sse4,sse2,sse2,noavx,noavx,avx,sse4_noavx,sse4_noavx,avx,*,*,*") |
d1c8b778 | 6848 | (set (attr "type") |
0a32b282 | 6849 | (cond [(eq_attr "alternative" "0,1,7,8,9") |
d1c8b778 | 6850 | (const_string "sselog") |
0a32b282 | 6851 | (eq_attr "alternative" "11") |
d1c8b778 | 6852 | (const_string "imov") |
0a32b282 | 6853 | (eq_attr "alternative" "12") |
2c9cbc56 | 6854 | (const_string "fmov") |
d1c8b778 | 6855 | ] |
6856 | (const_string "ssemov"))) | |
0a32b282 | 6857 | (set_attr "prefix_extra" "*,*,*,*,*,*,*,1,1,1,*,*,*") |
6858 | (set_attr "length_immediate" "*,*,*,*,*,*,*,1,1,1,*,*,*") | |
6859 | (set_attr "prefix" "maybe_vex,maybe_vex,maybe_vex,maybe_vex,orig,orig,vex,orig,orig,vex,*,*,*") | |
6860 | (set_attr "mode" "SF,SF,<ssescalarmode>,SI,SF,SF,SF,TI,TI,TI,*,*,*")]) | |
2a466fea | 6861 | |
6862 | ;; A subset is vec_setv4sf. | |
6863 | (define_insn "*vec_setv4sf_sse4_1" | |
0a32b282 | 6864 | [(set (match_operand:V4SF 0 "register_operand" "=Yr,*x,x") |
2a466fea | 6865 | (vec_merge:V4SF |
6866 | (vec_duplicate:V4SF | |
0a32b282 | 6867 | (match_operand:SF 2 "nonimmediate_operand" "Yrm,*xm,xm")) |
6868 | (match_operand:V4SF 1 "register_operand" "0,0,x") | |
abd4f58b | 6869 | (match_operand:SI 3 "const_int_operand")))] |
d3d379e7 | 6870 | "TARGET_SSE4_1 |
6871 | && ((unsigned) exact_log2 (INTVAL (operands[3])) | |
6872 | < GET_MODE_NUNITS (V4SFmode))" | |
8aa4e142 | 6873 | { |
2a466fea | 6874 | operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])) << 4); |
45c0368c | 6875 | switch (which_alternative) |
6876 | { | |
6877 | case 0: | |
45c0368c | 6878 | case 1: |
0a32b282 | 6879 | return "insertps\t{%3, %2, %0|%0, %2, %3}"; |
6880 | case 2: | |
45c0368c | 6881 | return "vinsertps\t{%3, %2, %1, %0|%0, %1, %2, %3}"; |
6882 | default: | |
6883 | gcc_unreachable (); | |
6884 | } | |
2a466fea | 6885 | } |
0a32b282 | 6886 | [(set_attr "isa" "noavx,noavx,avx") |
45c0368c | 6887 | (set_attr "type" "sselog") |
0a32b282 | 6888 | (set_attr "prefix_data16" "1,1,*") |
00a0e418 | 6889 | (set_attr "prefix_extra" "1") |
6890 | (set_attr "length_immediate" "1") | |
0a32b282 | 6891 | (set_attr "prefix" "orig,orig,vex") |
ed30e0a6 | 6892 | (set_attr "mode" "V4SF")]) |
6893 | ||
2a466fea | 6894 | (define_insn "sse4_1_insertps" |
0a32b282 | 6895 | [(set (match_operand:V4SF 0 "register_operand" "=Yr,*x,x") |
6896 | (unspec:V4SF [(match_operand:V4SF 2 "nonimmediate_operand" "Yrm,*xm,xm") | |
6897 | (match_operand:V4SF 1 "register_operand" "0,0,x") | |
6898 | (match_operand:SI 3 "const_0_to_255_operand" "n,n,n")] | |
2a466fea | 6899 | UNSPEC_INSERTPS))] |
6900 | "TARGET_SSE4_1" | |
908f63e8 | 6901 | { |
6902 | if (MEM_P (operands[2])) | |
6903 | { | |
6904 | unsigned count_s = INTVAL (operands[3]) >> 6; | |
6905 | if (count_s) | |
6906 | operands[3] = GEN_INT (INTVAL (operands[3]) & 0x3f); | |
6907 | operands[2] = adjust_address_nv (operands[2], SFmode, count_s * 4); | |
6908 | } | |
6909 | switch (which_alternative) | |
6910 | { | |
6911 | case 0: | |
908f63e8 | 6912 | case 1: |
0a32b282 | 6913 | return "insertps\t{%3, %2, %0|%0, %2, %3}"; |
6914 | case 2: | |
908f63e8 | 6915 | return "vinsertps\t{%3, %2, %1, %0|%0, %1, %2, %3}"; |
6916 | default: | |
6917 | gcc_unreachable (); | |
6918 | } | |
6919 | } | |
0a32b282 | 6920 | [(set_attr "isa" "noavx,noavx,avx") |
45c0368c | 6921 | (set_attr "type" "sselog") |
0a32b282 | 6922 | (set_attr "prefix_data16" "1,1,*") |
2a466fea | 6923 | (set_attr "prefix_extra" "1") |
00a0e418 | 6924 | (set_attr "length_immediate" "1") |
0a32b282 | 6925 | (set_attr "prefix" "orig,orig,vex") |
2a466fea | 6926 | (set_attr "mode" "V4SF")]) |
8aa4e142 | 6927 | |
2a466fea | 6928 | (define_split |
abd4f58b | 6929 | [(set (match_operand:VI4F_128 0 "memory_operand") |
6fe5844b | 6930 | (vec_merge:VI4F_128 |
6931 | (vec_duplicate:VI4F_128 | |
abd4f58b | 6932 | (match_operand:<ssescalarmode> 1 "nonmemory_operand")) |
2a466fea | 6933 | (match_dup 0) |
6934 | (const_int 1)))] | |
6935 | "TARGET_SSE && reload_completed" | |
823a2ddd | 6936 | [(set (match_dup 0) (match_dup 1))] |
6937 | "operands[0] = adjust_address (operands[0], <ssescalarmode>mode, 0);") | |
8aa4e142 | 6938 | |
77d27e3a | 6939 | (define_expand "vec_set<mode>" |
abd4f58b | 6940 | [(match_operand:V 0 "register_operand") |
6941 | (match_operand:<ssescalarmode> 1 "register_operand") | |
6942 | (match_operand 2 "const_int_operand")] | |
2a466fea | 6943 | "TARGET_SSE" |
8aa4e142 | 6944 | { |
2a466fea | 6945 | ix86_expand_vector_set (false, operands[0], operands[1], |
6946 | INTVAL (operands[2])); | |
6947 | DONE; | |
8aa4e142 | 6948 | }) |
6949 | ||
2a466fea | 6950 | (define_insn_and_split "*vec_extractv4sf_0" |
6be36710 | 6951 | [(set (match_operand:SF 0 "nonimmediate_operand" "=x,m,f,r") |
2a466fea | 6952 | (vec_select:SF |
6be36710 | 6953 | (match_operand:V4SF 1 "nonimmediate_operand" "xm,x,m,m") |
2a466fea | 6954 | (parallel [(const_int 0)])))] |
6955 | "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))" | |
6956 | "#" | |
6957 | "&& reload_completed" | |
573c5512 | 6958 | [(set (match_dup 0) (match_dup 1))] |
bb8107e7 | 6959 | { |
573c5512 | 6960 | if (REG_P (operands[1])) |
6961 | operands[1] = gen_rtx_REG (SFmode, REGNO (operands[1])); | |
2a466fea | 6962 | else |
573c5512 | 6963 | operands[1] = adjust_address (operands[1], SFmode, 0); |
bb8107e7 | 6964 | }) |
6965 | ||
9525a1bb | 6966 | (define_insn_and_split "*sse4_1_extractps" |
0a32b282 | 6967 | [(set (match_operand:SF 0 "nonimmediate_operand" "=rm,rm,x,x") |
9525a1bb | 6968 | (vec_select:SF |
0a32b282 | 6969 | (match_operand:V4SF 1 "register_operand" "Yr,*x,0,x") |
6970 | (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n,n,n,n")])))] | |
9525a1bb | 6971 | "TARGET_SSE4_1" |
6972 | "@ | |
0a32b282 | 6973 | %vextractps\t{%2, %1, %0|%0, %1, %2} |
9525a1bb | 6974 | %vextractps\t{%2, %1, %0|%0, %1, %2} |
6975 | # | |
6976 | #" | |
6977 | "&& reload_completed && SSE_REG_P (operands[0])" | |
6978 | [(const_int 0)] | |
6979 | { | |
6980 | rtx dest = gen_rtx_REG (V4SFmode, REGNO (operands[0])); | |
6981 | switch (INTVAL (operands[2])) | |
6982 | { | |
6983 | case 1: | |
6984 | case 3: | |
6985 | emit_insn (gen_sse_shufps_v4sf (dest, operands[1], operands[1], | |
6986 | operands[2], operands[2], | |
6987 | GEN_INT (INTVAL (operands[2]) + 4), | |
6988 | GEN_INT (INTVAL (operands[2]) + 4))); | |
6989 | break; | |
6990 | case 2: | |
6991 | emit_insn (gen_vec_interleave_highv4sf (dest, operands[1], operands[1])); | |
6992 | break; | |
6993 | default: | |
6994 | /* 0 should be handled by the *vec_extractv4sf_0 pattern above. */ | |
6995 | gcc_unreachable (); | |
6996 | } | |
6997 | DONE; | |
6998 | } | |
0a32b282 | 6999 | [(set_attr "isa" "*,*,noavx,avx") |
7000 | (set_attr "type" "sselog,sselog,*,*") | |
7001 | (set_attr "prefix_data16" "1,1,*,*") | |
7002 | (set_attr "prefix_extra" "1,1,*,*") | |
7003 | (set_attr "length_immediate" "1,1,*,*") | |
7004 | (set_attr "prefix" "maybe_vex,maybe_vex,*,*") | |
7005 | (set_attr "mode" "V4SF,V4SF,*,*")]) | |
9525a1bb | 7006 | |
fe4df2ce | 7007 | (define_insn_and_split "*vec_extractv4sf_mem" |
9525a1bb | 7008 | [(set (match_operand:SF 0 "register_operand" "=x,*r,f") |
fe4df2ce | 7009 | (vec_select:SF |
7010 | (match_operand:V4SF 1 "memory_operand" "o,o,o") | |
7011 | (parallel [(match_operand 2 "const_0_to_3_operand" "n,n,n")])))] | |
9525a1bb | 7012 | "TARGET_SSE" |
7013 | "#" | |
7014 | "&& reload_completed" | |
823a2ddd | 7015 | [(set (match_dup 0) (match_dup 1))] |
9525a1bb | 7016 | { |
823a2ddd | 7017 | operands[1] = adjust_address (operands[1], SFmode, INTVAL (operands[2]) * 4); |
9525a1bb | 7018 | }) |
7019 | ||
fd1fee28 | 7020 | (define_mode_attr extract_type |
7021 | [(V16SF "avx512f") (V16SI "avx512f") (V8DF "avx512dq") (V8DI "avx512dq")]) | |
7022 | ||
7023 | (define_mode_attr extract_suf | |
7024 | [(V16SF "32x4") (V16SI "32x4") (V8DF "64x2") (V8DI "64x2")]) | |
7025 | ||
7026 | (define_mode_iterator AVX512_VEC | |
7027 | [(V8DF "TARGET_AVX512DQ") (V8DI "TARGET_AVX512DQ") V16SF V16SI]) | |
7028 | ||
7029 | (define_expand "<extract_type>_vextract<shuffletype><extract_suf>_mask" | |
5220cab6 | 7030 | [(match_operand:<ssequartermode> 0 "nonimmediate_operand") |
fd1fee28 | 7031 | (match_operand:AVX512_VEC 1 "register_operand") |
5220cab6 | 7032 | (match_operand:SI 2 "const_0_to_3_operand") |
7033 | (match_operand:<ssequartermode> 3 "nonimmediate_operand") | |
7034 | (match_operand:QI 4 "register_operand")] | |
7035 | "TARGET_AVX512F" | |
7036 | { | |
fd1fee28 | 7037 | int mask; |
7038 | mask = INTVAL (operands[2]); | |
7039 | ||
5220cab6 | 7040 | if (MEM_P (operands[0]) && GET_CODE (operands[3]) == CONST_VECTOR) |
7041 | operands[0] = force_reg (<ssequartermode>mode, operands[0]); | |
fd1fee28 | 7042 | |
7043 | if (<MODE>mode == V16SImode || <MODE>mode == V16SFmode) | |
7044 | emit_insn (gen_avx512f_vextract<shuffletype>32x4_1_mask (operands[0], | |
7045 | operands[1], GEN_INT (mask * 4), GEN_INT (mask * 4 + 1), | |
7046 | GEN_INT (mask * 4 + 2), GEN_INT (mask * 4 + 3), operands[3], | |
7047 | operands[4])); | |
7048 | else | |
7049 | emit_insn (gen_avx512dq_vextract<shuffletype>64x2_1_mask (operands[0], | |
7050 | operands[1], GEN_INT (mask * 2), GEN_INT (mask * 2 + 1), operands[3], | |
7051 | operands[4])); | |
5220cab6 | 7052 | DONE; |
7053 | }) | |
7054 | ||
fd1fee28 | 7055 | (define_insn "avx512dq_vextract<shuffletype>64x2_1_maskm" |
7056 | [(set (match_operand:<ssequartermode> 0 "memory_operand" "=m") | |
7057 | (vec_merge:<ssequartermode> | |
7058 | (vec_select:<ssequartermode> | |
7059 | (match_operand:V8FI 1 "register_operand" "v") | |
7060 | (parallel [(match_operand 2 "const_0_to_7_operand") | |
7061 | (match_operand 3 "const_0_to_7_operand")])) | |
7062 | (match_operand:<ssequartermode> 4 "memory_operand" "0") | |
7063 | (match_operand:QI 5 "register_operand" "k")))] | |
7064 | "TARGET_AVX512DQ | |
7065 | && (INTVAL (operands[2]) % 2 == 0) | |
648b0c25 | 7066 | && (INTVAL (operands[2]) == INTVAL (operands[3]) - 1) |
7067 | && rtx_equal_p (operands[4], operands[0])" | |
fd1fee28 | 7068 | { |
7069 | operands[2] = GEN_INT ((INTVAL (operands[2])) >> 1); | |
7070 | return "vextract<shuffletype>64x2\t{%2, %1, %0%{%5%}|%0%{%5%}, %1, %2}"; | |
7071 | } | |
7072 | [(set_attr "type" "sselog") | |
7073 | (set_attr "prefix_extra" "1") | |
7074 | (set_attr "length_immediate" "1") | |
7075 | (set_attr "memory" "store") | |
7076 | (set_attr "prefix" "evex") | |
7077 | (set_attr "mode" "<sseinsnmode>")]) | |
7078 | ||
5220cab6 | 7079 | (define_insn "avx512f_vextract<shuffletype>32x4_1_maskm" |
7080 | [(set (match_operand:<ssequartermode> 0 "memory_operand" "=m") | |
7081 | (vec_merge:<ssequartermode> | |
7082 | (vec_select:<ssequartermode> | |
7083 | (match_operand:V16FI 1 "register_operand" "v") | |
7084 | (parallel [(match_operand 2 "const_0_to_15_operand") | |
7085 | (match_operand 3 "const_0_to_15_operand") | |
7086 | (match_operand 4 "const_0_to_15_operand") | |
7087 | (match_operand 5 "const_0_to_15_operand")])) | |
7088 | (match_operand:<ssequartermode> 6 "memory_operand" "0") | |
a31e7f46 | 7089 | (match_operand:QI 7 "register_operand" "Yk")))] |
d948b265 | 7090 | "TARGET_AVX512F |
fd1fee28 | 7091 | && ((INTVAL (operands[2]) % 4 == 0) |
7092 | && INTVAL (operands[2]) == (INTVAL (operands[3]) - 1) | |
d948b265 | 7093 | && INTVAL (operands[3]) == (INTVAL (operands[4]) - 1) |
648b0c25 | 7094 | && INTVAL (operands[4]) == (INTVAL (operands[5]) - 1)) |
7095 | && rtx_equal_p (operands[6], operands[0])" | |
5220cab6 | 7096 | { |
7097 | operands[2] = GEN_INT ((INTVAL (operands[2])) >> 2); | |
7098 | return "vextract<shuffletype>32x4\t{%2, %1, %0%{%7%}|%0%{%7%}, %1, %2}"; | |
7099 | } | |
7100 | [(set_attr "type" "sselog") | |
7101 | (set_attr "prefix_extra" "1") | |
7102 | (set_attr "length_immediate" "1") | |
7103 | (set_attr "memory" "store") | |
7104 | (set_attr "prefix" "evex") | |
7105 | (set_attr "mode" "<sseinsnmode>")]) | |
7106 | ||
fd1fee28 | 7107 | (define_insn "<mask_codefor>avx512dq_vextract<shuffletype>64x2_1<mask_name>" |
7108 | [(set (match_operand:<ssequartermode> 0 "<store_mask_predicate>" "=<store_mask_constraint>") | |
7109 | (vec_select:<ssequartermode> | |
7110 | (match_operand:V8FI 1 "register_operand" "v") | |
7111 | (parallel [(match_operand 2 "const_0_to_7_operand") | |
7112 | (match_operand 3 "const_0_to_7_operand")])))] | |
7113 | "TARGET_AVX512DQ && (INTVAL (operands[2]) == INTVAL (operands[3]) - 1)" | |
7114 | { | |
7115 | operands[2] = GEN_INT ((INTVAL (operands[2])) >> 1); | |
7116 | return "vextract<shuffletype>64x2\t{%2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2}"; | |
7117 | } | |
7118 | [(set_attr "type" "sselog1") | |
7119 | (set_attr "prefix_extra" "1") | |
7120 | (set_attr "length_immediate" "1") | |
7121 | (set_attr "prefix" "evex") | |
7122 | (set_attr "mode" "<sseinsnmode>")]) | |
7123 | ||
5220cab6 | 7124 | (define_insn "<mask_codefor>avx512f_vextract<shuffletype>32x4_1<mask_name>" |
7125 | [(set (match_operand:<ssequartermode> 0 "<store_mask_predicate>" "=<store_mask_constraint>") | |
8e9989b0 | 7126 | (vec_select:<ssequartermode> |
7127 | (match_operand:V16FI 1 "register_operand" "v") | |
7128 | (parallel [(match_operand 2 "const_0_to_15_operand") | |
7129 | (match_operand 3 "const_0_to_15_operand") | |
7130 | (match_operand 4 "const_0_to_15_operand") | |
7131 | (match_operand 5 "const_0_to_15_operand")])))] | |
d948b265 | 7132 | "TARGET_AVX512F |
7133 | && (INTVAL (operands[2]) == (INTVAL (operands[3]) - 1) | |
7134 | && INTVAL (operands[3]) == (INTVAL (operands[4]) - 1) | |
7135 | && INTVAL (operands[4]) == (INTVAL (operands[5]) - 1))" | |
8e9989b0 | 7136 | { |
7137 | operands[2] = GEN_INT ((INTVAL (operands[2])) >> 2); | |
5220cab6 | 7138 | return "vextract<shuffletype>32x4\t{%2, %1, %0<mask_operand6>|%0<mask_operand6>, %1, %2}"; |
8e9989b0 | 7139 | } |
fd1fee28 | 7140 | [(set_attr "type" "sselog1") |
8e9989b0 | 7141 | (set_attr "prefix_extra" "1") |
7142 | (set_attr "length_immediate" "1") | |
8e9989b0 | 7143 | (set_attr "prefix" "evex") |
7144 | (set_attr "mode" "<sseinsnmode>")]) | |
7145 | ||
fd1fee28 | 7146 | (define_mode_attr extract_type_2 |
7147 | [(V16SF "avx512dq") (V16SI "avx512dq") (V8DF "avx512f") (V8DI "avx512f")]) | |
7148 | ||
7149 | (define_mode_attr extract_suf_2 | |
7150 | [(V16SF "32x8") (V16SI "32x8") (V8DF "64x4") (V8DI "64x4")]) | |
7151 | ||
7152 | (define_mode_iterator AVX512_VEC_2 | |
7153 | [(V16SF "TARGET_AVX512DQ") (V16SI "TARGET_AVX512DQ") V8DF V8DI]) | |
7154 | ||
7155 | (define_expand "<extract_type_2>_vextract<shuffletype><extract_suf_2>_mask" | |
5220cab6 | 7156 | [(match_operand:<ssehalfvecmode> 0 "nonimmediate_operand") |
fd1fee28 | 7157 | (match_operand:AVX512_VEC_2 1 "register_operand") |
5220cab6 | 7158 | (match_operand:SI 2 "const_0_to_1_operand") |
7159 | (match_operand:<ssehalfvecmode> 3 "nonimmediate_operand") | |
7160 | (match_operand:QI 4 "register_operand")] | |
7161 | "TARGET_AVX512F" | |
7162 | { | |
7163 | rtx (*insn)(rtx, rtx, rtx, rtx); | |
7164 | ||
7165 | if (MEM_P (operands[0]) && GET_CODE (operands[3]) == CONST_VECTOR) | |
7166 | operands[0] = force_reg (<ssequartermode>mode, operands[0]); | |
7167 | ||
7168 | switch (INTVAL (operands[2])) | |
7169 | { | |
7170 | case 0: | |
7171 | insn = gen_vec_extract_lo_<mode>_mask; | |
7172 | break; | |
7173 | case 1: | |
7174 | insn = gen_vec_extract_hi_<mode>_mask; | |
7175 | break; | |
7176 | default: | |
7177 | gcc_unreachable (); | |
7178 | } | |
7179 | ||
7180 | emit_insn (insn (operands[0], operands[1], operands[3], operands[4])); | |
7181 | DONE; | |
7182 | }) | |
7183 | ||
8e9989b0 | 7184 | (define_split |
7185 | [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand") | |
7186 | (vec_select:<ssehalfvecmode> | |
7187 | (match_operand:V8FI 1 "nonimmediate_operand") | |
7188 | (parallel [(const_int 0) (const_int 1) | |
7189 | (const_int 2) (const_int 3)])))] | |
7190 | "TARGET_AVX512F && !(MEM_P (operands[0]) && MEM_P (operands[1])) | |
e6007a65 | 7191 | && reload_completed |
7192 | && (TARGET_AVX512VL || (REG_P (operands[0]) && !EXT_REX_SSE_REG_P (operands[1])))" | |
8e9989b0 | 7193 | [(const_int 0)] |
7194 | { | |
7195 | rtx op1 = operands[1]; | |
7196 | if (REG_P (op1)) | |
7197 | op1 = gen_rtx_REG (<ssehalfvecmode>mode, REGNO (op1)); | |
7198 | else | |
7199 | op1 = gen_lowpart (<ssehalfvecmode>mode, op1); | |
7200 | emit_move_insn (operands[0], op1); | |
7201 | DONE; | |
7202 | }) | |
7203 | ||
5220cab6 | 7204 | (define_insn "vec_extract_lo_<mode>_maskm" |
7205 | [(set (match_operand:<ssehalfvecmode> 0 "memory_operand" "=m") | |
7206 | (vec_merge:<ssehalfvecmode> | |
7207 | (vec_select:<ssehalfvecmode> | |
7208 | (match_operand:V8FI 1 "register_operand" "v") | |
7209 | (parallel [(const_int 0) (const_int 1) | |
7210 | (const_int 2) (const_int 3)])) | |
7211 | (match_operand:<ssehalfvecmode> 2 "memory_operand" "0") | |
a31e7f46 | 7212 | (match_operand:QI 3 "register_operand" "Yk")))] |
648b0c25 | 7213 | "TARGET_AVX512F |
7214 | && rtx_equal_p (operands[2], operands[0])" | |
fd1fee28 | 7215 | "vextract<shuffletype>64x4\t{$0x0, %1, %0%{%3%}|%0%{%3%}, %1, 0x0}" |
7216 | [(set_attr "type" "sselog1") | |
5220cab6 | 7217 | (set_attr "prefix_extra" "1") |
7218 | (set_attr "length_immediate" "1") | |
7219 | (set_attr "prefix" "evex") | |
7220 | (set_attr "mode" "<sseinsnmode>")]) | |
7221 | ||
7222 | (define_insn "vec_extract_lo_<mode><mask_name>" | |
d442e138 | 7223 | [(set (match_operand:<ssehalfvecmode> 0 "<store_mask_predicate>" "=<store_mask_constraint>,v") |
8e9989b0 | 7224 | (vec_select:<ssehalfvecmode> |
d442e138 | 7225 | (match_operand:V8FI 1 "nonimmediate_operand" "v,m") |
8e9989b0 | 7226 | (parallel [(const_int 0) (const_int 1) |
7227 | (const_int 2) (const_int 3)])))] | |
7228 | "TARGET_AVX512F && !(MEM_P (operands[0]) && MEM_P (operands[1]))" | |
5220cab6 | 7229 | { |
e6007a65 | 7230 | if (<mask_applied> || !TARGET_AVX512VL) |
5220cab6 | 7231 | return "vextract<shuffletype>64x4\t{$0x0, %1, %0<mask_operand2>|%0<mask_operand2>, %1, 0x0}"; |
7232 | else | |
7233 | return "#"; | |
7234 | } | |
fd1fee28 | 7235 | [(set_attr "type" "sselog1") |
8e9989b0 | 7236 | (set_attr "prefix_extra" "1") |
7237 | (set_attr "length_immediate" "1") | |
8e9989b0 | 7238 | (set_attr "prefix" "evex") |
7239 | (set_attr "mode" "<sseinsnmode>")]) | |
7240 | ||
5220cab6 | 7241 | (define_insn "vec_extract_hi_<mode>_maskm" |
7242 | [(set (match_operand:<ssehalfvecmode> 0 "memory_operand" "=m") | |
7243 | (vec_merge:<ssehalfvecmode> | |
7244 | (vec_select:<ssehalfvecmode> | |
7245 | (match_operand:V8FI 1 "register_operand" "v") | |
7246 | (parallel [(const_int 4) (const_int 5) | |
7247 | (const_int 6) (const_int 7)])) | |
7248 | (match_operand:<ssehalfvecmode> 2 "memory_operand" "0") | |
a31e7f46 | 7249 | (match_operand:QI 3 "register_operand" "Yk")))] |
648b0c25 | 7250 | "TARGET_AVX512F |
7251 | && rtx_equal_p (operands[2], operands[0])" | |
5220cab6 | 7252 | "vextract<shuffletype>64x4\t{$0x1, %1, %0%{%3%}|%0%{%3%}, %1, 0x1}" |
7253 | [(set_attr "type" "sselog") | |
7254 | (set_attr "prefix_extra" "1") | |
7255 | (set_attr "length_immediate" "1") | |
7256 | (set_attr "memory" "store") | |
7257 | (set_attr "prefix" "evex") | |
7258 | (set_attr "mode" "<sseinsnmode>")]) | |
7259 | ||
7260 | (define_insn "vec_extract_hi_<mode><mask_name>" | |
7261 | [(set (match_operand:<ssehalfvecmode> 0 "<store_mask_predicate>" "=<store_mask_constraint>") | |
8e9989b0 | 7262 | (vec_select:<ssehalfvecmode> |
7263 | (match_operand:V8FI 1 "register_operand" "v") | |
7264 | (parallel [(const_int 4) (const_int 5) | |
7265 | (const_int 6) (const_int 7)])))] | |
7266 | "TARGET_AVX512F" | |
5220cab6 | 7267 | "vextract<shuffletype>64x4\t{$0x1, %1, %0<mask_operand2>|%0<mask_operand2>, %1, 0x1}" |
fd1fee28 | 7268 | [(set_attr "type" "sselog1") |
7269 | (set_attr "prefix_extra" "1") | |
7270 | (set_attr "length_immediate" "1") | |
7271 | (set_attr "prefix" "evex") | |
7272 | (set_attr "mode" "<sseinsnmode>")]) | |
7273 | ||
7274 | (define_insn "vec_extract_hi_<mode>_maskm" | |
7275 | [(set (match_operand:<ssehalfvecmode> 0 "memory_operand" "=m") | |
7276 | (vec_merge:<ssehalfvecmode> | |
7277 | (vec_select:<ssehalfvecmode> | |
7278 | (match_operand:V16FI 1 "register_operand" "v") | |
7279 | (parallel [(const_int 8) (const_int 9) | |
7280 | (const_int 10) (const_int 11) | |
7281 | (const_int 12) (const_int 13) | |
7282 | (const_int 14) (const_int 15)])) | |
7283 | (match_operand:<ssehalfvecmode> 2 "memory_operand" "0") | |
7284 | (match_operand:QI 3 "register_operand" "k")))] | |
648b0c25 | 7285 | "TARGET_AVX512DQ |
7286 | && rtx_equal_p (operands[2], operands[0])" | |
fd1fee28 | 7287 | "vextract<shuffletype>32x8\t{$0x1, %1, %0%{%3%}|%0%{%3%}, %1, 0x1}" |
7288 | [(set_attr "type" "sselog1") | |
8e9989b0 | 7289 | (set_attr "prefix_extra" "1") |
7290 | (set_attr "length_immediate" "1") | |
8e9989b0 | 7291 | (set_attr "prefix" "evex") |
7292 | (set_attr "mode" "<sseinsnmode>")]) | |
7293 | ||
30874fa3 | 7294 | (define_insn "vec_extract_hi_<mode><mask_name>" |
7295 | [(set (match_operand:<ssehalfvecmode> 0 "<store_mask_predicate>" "=<store_mask_constraint>,vm") | |
7296 | (vec_select:<ssehalfvecmode> | |
7297 | (match_operand:V16FI 1 "register_operand" "v,v") | |
7298 | (parallel [(const_int 8) (const_int 9) | |
7299 | (const_int 10) (const_int 11) | |
7300 | (const_int 12) (const_int 13) | |
7301 | (const_int 14) (const_int 15)])))] | |
fd1fee28 | 7302 | "TARGET_AVX512F && <mask_avx512dq_condition>" |
30874fa3 | 7303 | "@ |
7304 | vextract<shuffletype>32x8\t{$0x1, %1, %0<mask_operand2>|%0<mask_operand2>, %1, 0x1} | |
7305 | vextracti64x4\t{$0x1, %1, %0|%0, %1, 0x1}" | |
7306 | [(set_attr "type" "sselog1") | |
7307 | (set_attr "prefix_extra" "1") | |
7308 | (set_attr "isa" "avx512dq,noavx512dq") | |
7309 | (set_attr "length_immediate" "1") | |
7310 | (set_attr "prefix" "evex") | |
7311 | (set_attr "mode" "<sseinsnmode>")]) | |
7312 | ||
fd1fee28 | 7313 | (define_expand "avx512vl_vextractf128<mode>" |
7314 | [(match_operand:<ssehalfvecmode> 0 "nonimmediate_operand") | |
7315 | (match_operand:VI48F_256 1 "register_operand") | |
7316 | (match_operand:SI 2 "const_0_to_1_operand") | |
7317 | (match_operand:<ssehalfvecmode> 3 "vector_move_operand") | |
7318 | (match_operand:QI 4 "register_operand")] | |
7319 | "TARGET_AVX512DQ && TARGET_AVX512VL" | |
7320 | { | |
7321 | rtx (*insn)(rtx, rtx, rtx, rtx); | |
7322 | ||
7323 | if (MEM_P (operands[0]) && GET_CODE (operands[3]) == CONST_VECTOR) | |
7324 | operands[0] = force_reg (<ssehalfvecmode>mode, operands[0]); | |
7325 | ||
7326 | switch (INTVAL (operands[2])) | |
7327 | { | |
7328 | case 0: | |
7329 | insn = gen_vec_extract_lo_<mode>_mask; | |
7330 | break; | |
7331 | case 1: | |
7332 | insn = gen_vec_extract_hi_<mode>_mask; | |
7333 | break; | |
7334 | default: | |
7335 | gcc_unreachable (); | |
7336 | } | |
7337 | ||
7338 | emit_insn (insn (operands[0], operands[1], operands[3], operands[4])); | |
7339 | DONE; | |
7340 | }) | |
7341 | ||
ed30e0a6 | 7342 | (define_expand "avx_vextractf128<mode>" |
abd4f58b | 7343 | [(match_operand:<ssehalfvecmode> 0 "nonimmediate_operand") |
7344 | (match_operand:V_256 1 "register_operand") | |
7345 | (match_operand:SI 2 "const_0_to_1_operand")] | |
ed30e0a6 | 7346 | "TARGET_AVX" |
7347 | { | |
c3fa352f | 7348 | rtx (*insn)(rtx, rtx); |
7349 | ||
ed30e0a6 | 7350 | switch (INTVAL (operands[2])) |
7351 | { | |
7352 | case 0: | |
c3fa352f | 7353 | insn = gen_vec_extract_lo_<mode>; |
ed30e0a6 | 7354 | break; |
7355 | case 1: | |
c3fa352f | 7356 | insn = gen_vec_extract_hi_<mode>; |
ed30e0a6 | 7357 | break; |
7358 | default: | |
7359 | gcc_unreachable (); | |
7360 | } | |
c3fa352f | 7361 | |
7362 | emit_insn (insn (operands[0], operands[1])); | |
ed30e0a6 | 7363 | DONE; |
7364 | }) | |
7365 | ||
fd1fee28 | 7366 | (define_insn "vec_extract_lo_<mode><mask_name>" |
8e9989b0 | 7367 | [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=v,m") |
7368 | (vec_select:<ssehalfvecmode> | |
7369 | (match_operand:V16FI 1 "nonimmediate_operand" "vm,v") | |
7370 | (parallel [(const_int 0) (const_int 1) | |
7371 | (const_int 2) (const_int 3) | |
7372 | (const_int 4) (const_int 5) | |
7373 | (const_int 6) (const_int 7)])))] | |
fd1fee28 | 7374 | "TARGET_AVX512F |
7375 | && <mask_mode512bit_condition> | |
7376 | && !(MEM_P (operands[0]) && MEM_P (operands[1]))" | |
8e9989b0 | 7377 | { |
fd1fee28 | 7378 | if (<mask_applied>) |
7379 | return "vextract<shuffletype>32x8\t{$0x0, %1, %0<mask_operand2>|%0<mask_operand2>, %1, 0x0}"; | |
7380 | else | |
7381 | return "#"; | |
7382 | }) | |
7383 | ||
7384 | (define_split | |
7385 | [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand") | |
7386 | (vec_select:<ssehalfvecmode> | |
7387 | (match_operand:V16FI 1 "nonimmediate_operand") | |
7388 | (parallel [(const_int 0) (const_int 1) | |
7389 | (const_int 2) (const_int 3) | |
7390 | (const_int 4) (const_int 5) | |
7391 | (const_int 6) (const_int 7)])))] | |
7392 | "TARGET_AVX512F && !(MEM_P (operands[0]) && MEM_P (operands[1])) | |
7393 | && reload_completed" | |
7394 | [(const_int 0)] | |
7395 | { | |
8e9989b0 | 7396 | rtx op1 = operands[1]; |
7397 | if (REG_P (op1)) | |
7398 | op1 = gen_rtx_REG (<ssehalfvecmode>mode, REGNO (op1)); | |
7399 | else | |
7400 | op1 = gen_lowpart (<ssehalfvecmode>mode, op1); | |
7401 | emit_move_insn (operands[0], op1); | |
7402 | DONE; | |
7403 | }) | |
7404 | ||
fd1fee28 | 7405 | (define_insn "vec_extract_lo_<mode><mask_name>" |
7406 | [(set (match_operand:<ssehalfvecmode> 0 "<store_mask_predicate>" "=v,m") | |
63d5e521 | 7407 | (vec_select:<ssehalfvecmode> |
fd1fee28 | 7408 | (match_operand:VI8F_256 1 "nonimmediate_operand" "vm,v") |
ed30e0a6 | 7409 | (parallel [(const_int 0) (const_int 1)])))] |
fd1fee28 | 7410 | "TARGET_AVX |
7411 | && <mask_avx512vl_condition> && <mask_avx512dq_condition> | |
7412 | && !(MEM_P (operands[0]) && MEM_P (operands[1]))" | |
3d1a0207 | 7413 | { |
fd1fee28 | 7414 | if (<mask_applied>) |
7415 | return "vextract<shuffletype>64x2\t{$0x0, %1, %0%{%3%}|%0%{%3%}, %1, 0x0}"; | |
3d1a0207 | 7416 | else |
fd1fee28 | 7417 | return "#"; |
7418 | } | |
7419 | [(set_attr "type" "sselog") | |
7420 | (set_attr "prefix_extra" "1") | |
7421 | (set_attr "length_immediate" "1") | |
7422 | (set_attr "memory" "none,store") | |
7423 | (set_attr "prefix" "evex") | |
7424 | (set_attr "mode" "XI")]) | |
7425 | ||
7426 | (define_split | |
7427 | [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand") | |
7428 | (vec_select:<ssehalfvecmode> | |
7429 | (match_operand:VI8F_256 1 "nonimmediate_operand") | |
7430 | (parallel [(const_int 0) (const_int 1)])))] | |
7431 | "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1])) | |
7432 | && reload_completed" | |
7433 | [(const_int 0)] | |
7434 | { | |
7435 | rtx op1 = operands[1]; | |
7436 | if (REG_P (op1)) | |
7437 | op1 = gen_rtx_REG (<ssehalfvecmode>mode, REGNO (op1)); | |
7438 | else | |
7439 | op1 = gen_lowpart (<ssehalfvecmode>mode, op1); | |
7440 | emit_move_insn (operands[0], op1); | |
7441 | DONE; | |
3d1a0207 | 7442 | }) |
ed30e0a6 | 7443 | |
fd1fee28 | 7444 | (define_insn "vec_extract_hi_<mode><mask_name>" |
7445 | [(set (match_operand:<ssehalfvecmode> 0 "<store_mask_predicate>" "=v,<store_mask_constraint>") | |
63d5e521 | 7446 | (vec_select:<ssehalfvecmode> |
fd1fee28 | 7447 | (match_operand:VI8F_256 1 "register_operand" "v,v") |
ed30e0a6 | 7448 | (parallel [(const_int 2) (const_int 3)])))] |
9fbb8dbd | 7449 | "TARGET_AVX && <mask_avx512vl_condition> && <mask_avx512dq_condition>" |
fd1fee28 | 7450 | { |
9fbb8dbd | 7451 | if (TARGET_AVX512VL) |
7452 | { | |
7453 | if (TARGET_AVX512DQ) | |
7454 | return "vextract<shuffletype>64x2\t{$0x1, %1, %0<mask_operand2>|%0<mask_operand2>, %1, 0x1}"; | |
7455 | else | |
7456 | return "vextract<shuffletype>32x4\t{$0x1, %1, %0|%0, %1, 0x1}"; | |
7457 | } | |
fd1fee28 | 7458 | else |
7459 | return "vextract<i128>\t{$0x1, %1, %0|%0, %1, 0x1}"; | |
7460 | } | |
ed30e0a6 | 7461 | [(set_attr "type" "sselog") |
00a0e418 | 7462 | (set_attr "prefix_extra" "1") |
7463 | (set_attr "length_immediate" "1") | |
ed30e0a6 | 7464 | (set_attr "memory" "none,store") |
7465 | (set_attr "prefix" "vex") | |
154d1782 | 7466 | (set_attr "mode" "<sseinsnmode>")]) |
ed30e0a6 | 7467 | |
fd1fee28 | 7468 | (define_split |
7469 | [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand") | |
63d5e521 | 7470 | (vec_select:<ssehalfvecmode> |
fd1fee28 | 7471 | (match_operand:VI4F_256 1 "nonimmediate_operand") |
ed30e0a6 | 7472 | (parallel [(const_int 0) (const_int 1) |
7473 | (const_int 2) (const_int 3)])))] | |
fd1fee28 | 7474 | "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1])) && reload_completed" |
7475 | [(const_int 0)] | |
3d1a0207 | 7476 | { |
fd1fee28 | 7477 | rtx op1 = operands[1]; |
7478 | if (REG_P (op1)) | |
7479 | op1 = gen_rtx_REG (<ssehalfvecmode>mode, REGNO (op1)); | |
3d1a0207 | 7480 | else |
fd1fee28 | 7481 | op1 = gen_lowpart (<ssehalfvecmode>mode, op1); |
7482 | emit_move_insn (operands[0], op1); | |
7483 | DONE; | |
3d1a0207 | 7484 | }) |
ed30e0a6 | 7485 | |
fd1fee28 | 7486 | |
7487 | (define_insn "vec_extract_lo_<mode><mask_name>" | |
7488 | [(set (match_operand:<ssehalfvecmode> 0 "<store_mask_predicate>" "=<store_mask_constraint>") | |
7489 | (vec_select:<ssehalfvecmode> | |
0a281fd0 | 7490 | (match_operand:VI4F_256 1 "register_operand" "v") |
fd1fee28 | 7491 | (parallel [(const_int 0) (const_int 1) |
7492 | (const_int 2) (const_int 3)])))] | |
7493 | "TARGET_AVX && <mask_avx512vl_condition> && <mask_avx512dq_condition>" | |
7494 | { | |
7495 | if (<mask_applied>) | |
7496 | return "vextract<shuffletype>32x4\t{$0x0, %1, %0<mask_operand2>|%0<mask_operand2>, %1, 0x0}"; | |
7497 | else | |
7498 | return "#"; | |
7499 | } | |
7500 | [(set_attr "type" "sselog1") | |
7501 | (set_attr "prefix_extra" "1") | |
7502 | (set_attr "length_immediate" "1") | |
7503 | (set_attr "prefix" "evex") | |
7504 | (set_attr "mode" "<sseinsnmode>")]) | |
7505 | ||
7506 | (define_insn "vec_extract_lo_<mode>_maskm" | |
7507 | [(set (match_operand:<ssehalfvecmode> 0 "memory_operand" "=m") | |
7508 | (vec_merge:<ssehalfvecmode> | |
7509 | (vec_select:<ssehalfvecmode> | |
7510 | (match_operand:VI4F_256 1 "register_operand" "v") | |
7511 | (parallel [(const_int 0) (const_int 1) | |
7512 | (const_int 2) (const_int 3)])) | |
7513 | (match_operand:<ssehalfvecmode> 2 "memory_operand" "0") | |
7514 | (match_operand:QI 3 "register_operand" "k")))] | |
648b0c25 | 7515 | "TARGET_AVX512VL && TARGET_AVX512F |
7516 | && rtx_equal_p (operands[2], operands[0])" | |
7517 | "vextract<shuffletype>32x4\t{$0x0, %1, %0%{%3%}|%0%{%3%}, %1, 0x0}" | |
fd1fee28 | 7518 | [(set_attr "type" "sselog1") |
7519 | (set_attr "prefix_extra" "1") | |
7520 | (set_attr "length_immediate" "1") | |
7521 | (set_attr "prefix" "evex") | |
7522 | (set_attr "mode" "<sseinsnmode>")]) | |
7523 | ||
7524 | (define_insn "vec_extract_hi_<mode>_maskm" | |
7525 | [(set (match_operand:<ssehalfvecmode> 0 "memory_operand" "=m") | |
7526 | (vec_merge:<ssehalfvecmode> | |
7527 | (vec_select:<ssehalfvecmode> | |
7528 | (match_operand:VI4F_256 1 "register_operand" "v") | |
7529 | (parallel [(const_int 4) (const_int 5) | |
7530 | (const_int 6) (const_int 7)])) | |
7531 | (match_operand:<ssehalfvecmode> 2 "memory_operand" "0") | |
7532 | (match_operand:<ssehalfvecmode> 3 "register_operand" "k")))] | |
648b0c25 | 7533 | "TARGET_AVX512F && TARGET_AVX512VL |
7534 | && rtx_equal_p (operands[2], operands[0])" | |
7535 | "vextract<shuffletype>32x4\t{$0x1, %1, %0%{%3%}|%0%{%3%}, %1, 0x1}" | |
fd1fee28 | 7536 | [(set_attr "type" "sselog1") |
fd1fee28 | 7537 | (set_attr "length_immediate" "1") |
7538 | (set_attr "prefix" "evex") | |
7539 | (set_attr "mode" "<sseinsnmode>")]) | |
7540 | ||
4d1088aa | 7541 | (define_insn "vec_extract_hi_<mode>_mask" |
7542 | [(set (match_operand:<ssehalfvecmode> 0 "register_operand" "=v") | |
7543 | (vec_merge:<ssehalfvecmode> | |
7544 | (vec_select:<ssehalfvecmode> | |
7545 | (match_operand:VI4F_256 1 "register_operand" "v") | |
7546 | (parallel [(const_int 4) (const_int 5) | |
7547 | (const_int 6) (const_int 7)])) | |
7548 | (match_operand:<ssehalfvecmode> 2 "vector_move_operand" "0C") | |
7549 | (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk")))] | |
7550 | "TARGET_AVX512VL" | |
7551 | "vextract<shuffletype>32x4\t{$0x1, %1, %0%{%3%}%N2|%0%{%3%}%N2, %1, 0x1}" | |
7552 | [(set_attr "type" "sselog1") | |
7553 | (set_attr "length_immediate" "1") | |
7554 | (set_attr "prefix" "evex") | |
7555 | (set_attr "mode" "<sseinsnmode>")]) | |
7556 | ||
7557 | (define_insn "vec_extract_hi_<mode>" | |
7558 | [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=xm, vm") | |
63d5e521 | 7559 | (vec_select:<ssehalfvecmode> |
4d1088aa | 7560 | (match_operand:VI4F_256 1 "register_operand" "x, v") |
ed30e0a6 | 7561 | (parallel [(const_int 4) (const_int 5) |
7562 | (const_int 6) (const_int 7)])))] | |
4d1088aa | 7563 | "TARGET_AVX" |
7564 | "@ | |
7565 | vextract<i128>\t{$0x1, %1, %0|%0, %1, 0x1} | |
7566 | vextract<shuffletype>32x4\t{$0x1, %1, %0|%0, %1, 0x1}" | |
7567 | [(set_attr "isa" "*, avx512vl") | |
7568 | (set_attr "prefix" "vex, evex") | |
7569 | (set_attr "type" "sselog1") | |
00a0e418 | 7570 | (set_attr "length_immediate" "1") |
154d1782 | 7571 | (set_attr "mode" "<sseinsnmode>")]) |
ed30e0a6 | 7572 | |
697a43f8 | 7573 | (define_insn_and_split "vec_extract_lo_v32hi" |
7574 | [(set (match_operand:V16HI 0 "nonimmediate_operand" "=v,m") | |
7575 | (vec_select:V16HI | |
7576 | (match_operand:V32HI 1 "nonimmediate_operand" "vm,v") | |
7577 | (parallel [(const_int 0) (const_int 1) | |
7578 | (const_int 2) (const_int 3) | |
7579 | (const_int 4) (const_int 5) | |
7580 | (const_int 6) (const_int 7) | |
7581 | (const_int 8) (const_int 9) | |
7582 | (const_int 10) (const_int 11) | |
7583 | (const_int 12) (const_int 13) | |
7584 | (const_int 14) (const_int 15)])))] | |
7585 | "TARGET_AVX512F && !(MEM_P (operands[0]) && MEM_P (operands[1]))" | |
7586 | "#" | |
7587 | "&& reload_completed" | |
7588 | [(set (match_dup 0) (match_dup 1))] | |
7589 | { | |
7590 | if (REG_P (operands[1])) | |
7591 | operands[1] = gen_rtx_REG (V16HImode, REGNO (operands[1])); | |
7592 | else | |
7593 | operands[1] = adjust_address (operands[1], V16HImode, 0); | |
7594 | }) | |
7595 | ||
7596 | (define_insn "vec_extract_hi_v32hi" | |
7597 | [(set (match_operand:V16HI 0 "nonimmediate_operand" "=v,m") | |
7598 | (vec_select:V16HI | |
0a281fd0 | 7599 | (match_operand:V32HI 1 "register_operand" "v,v") |
697a43f8 | 7600 | (parallel [(const_int 16) (const_int 17) |
7601 | (const_int 18) (const_int 19) | |
7602 | (const_int 20) (const_int 21) | |
7603 | (const_int 22) (const_int 23) | |
7604 | (const_int 24) (const_int 25) | |
7605 | (const_int 26) (const_int 27) | |
7606 | (const_int 28) (const_int 29) | |
7607 | (const_int 30) (const_int 31)])))] | |
7608 | "TARGET_AVX512F" | |
7609 | "vextracti64x4\t{$0x1, %1, %0|%0, %1, 0x1}" | |
7610 | [(set_attr "type" "sselog") | |
7611 | (set_attr "prefix_extra" "1") | |
7612 | (set_attr "length_immediate" "1") | |
7613 | (set_attr "memory" "none,store") | |
7614 | (set_attr "prefix" "evex") | |
7615 | (set_attr "mode" "XI")]) | |
7616 | ||
3d1a0207 | 7617 | (define_insn_and_split "vec_extract_lo_v16hi" |
ed30e0a6 | 7618 | [(set (match_operand:V8HI 0 "nonimmediate_operand" "=x,m") |
7619 | (vec_select:V8HI | |
3d1a0207 | 7620 | (match_operand:V16HI 1 "nonimmediate_operand" "xm,x") |
ed30e0a6 | 7621 | (parallel [(const_int 0) (const_int 1) |
7622 | (const_int 2) (const_int 3) | |
7623 | (const_int 4) (const_int 5) | |
7624 | (const_int 6) (const_int 7)])))] | |
a3d5479a | 7625 | "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))" |
3d1a0207 | 7626 | "#" |
7627 | "&& reload_completed" | |
573c5512 | 7628 | [(set (match_dup 0) (match_dup 1))] |
3d1a0207 | 7629 | { |
573c5512 | 7630 | if (REG_P (operands[1])) |
7631 | operands[1] = gen_rtx_REG (V8HImode, REGNO (operands[1])); | |
3d1a0207 | 7632 | else |
573c5512 | 7633 | operands[1] = adjust_address (operands[1], V8HImode, 0); |
3d1a0207 | 7634 | }) |
ed30e0a6 | 7635 | |
7636 | (define_insn "vec_extract_hi_v16hi" | |
7637 | [(set (match_operand:V8HI 0 "nonimmediate_operand" "=x,m") | |
7638 | (vec_select:V8HI | |
7639 | (match_operand:V16HI 1 "register_operand" "x,x") | |
7640 | (parallel [(const_int 8) (const_int 9) | |
7641 | (const_int 10) (const_int 11) | |
7642 | (const_int 12) (const_int 13) | |
7643 | (const_int 14) (const_int 15)])))] | |
7644 | "TARGET_AVX" | |
154d1782 | 7645 | "vextract%~128\t{$0x1, %1, %0|%0, %1, 0x1}" |
ed30e0a6 | 7646 | [(set_attr "type" "sselog") |
00a0e418 | 7647 | (set_attr "prefix_extra" "1") |
7648 | (set_attr "length_immediate" "1") | |
ed30e0a6 | 7649 | (set_attr "memory" "none,store") |
7650 | (set_attr "prefix" "vex") | |
154d1782 | 7651 | (set_attr "mode" "OI")]) |
ed30e0a6 | 7652 | |
697a43f8 | 7653 | (define_insn_and_split "vec_extract_lo_v64qi" |
7654 | [(set (match_operand:V32QI 0 "nonimmediate_operand" "=v,m") | |
7655 | (vec_select:V32QI | |
7656 | (match_operand:V64QI 1 "nonimmediate_operand" "vm,v") | |
7657 | (parallel [(const_int 0) (const_int 1) | |
7658 | (const_int 2) (const_int 3) | |
7659 | (const_int 4) (const_int 5) | |
7660 | (const_int 6) (const_int 7) | |
7661 | (const_int 8) (const_int 9) | |
7662 | (const_int 10) (const_int 11) | |
7663 | (const_int 12) (const_int 13) | |
7664 | (const_int 14) (const_int 15) | |
7665 | (const_int 16) (const_int 17) | |
7666 | (const_int 18) (const_int 19) | |
7667 | (const_int 20) (const_int 21) | |
7668 | (const_int 22) (const_int 23) | |
7669 | (const_int 24) (const_int 25) | |
7670 | (const_int 26) (const_int 27) | |
7671 | (const_int 28) (const_int 29) | |
7672 | (const_int 30) (const_int 31)])))] | |
7673 | "TARGET_AVX512F && !(MEM_P (operands[0]) && MEM_P (operands[1]))" | |
7674 | "#" | |
7675 | "&& reload_completed" | |
7676 | [(set (match_dup 0) (match_dup 1))] | |
7677 | { | |
7678 | if (REG_P (operands[1])) | |
7679 | operands[1] = gen_rtx_REG (V32QImode, REGNO (operands[1])); | |
7680 | else | |
7681 | operands[1] = adjust_address (operands[1], V32QImode, 0); | |
7682 | }) | |
7683 | ||
7684 | (define_insn "vec_extract_hi_v64qi" | |
7685 | [(set (match_operand:V32QI 0 "nonimmediate_operand" "=v,m") | |
7686 | (vec_select:V32QI | |
0a281fd0 | 7687 | (match_operand:V64QI 1 "register_operand" "v,v") |
697a43f8 | 7688 | (parallel [(const_int 32) (const_int 33) |
7689 | (const_int 34) (const_int 35) | |
7690 | (const_int 36) (const_int 37) | |
7691 | (const_int 38) (const_int 39) | |
7692 | (const_int 40) (const_int 41) | |
7693 | (const_int 42) (const_int 43) | |
7694 | (const_int 44) (const_int 45) | |
7695 | (const_int 46) (const_int 47) | |
7696 | (const_int 48) (const_int 49) | |
7697 | (const_int 50) (const_int 51) | |
7698 | (const_int 52) (const_int 53) | |
7699 | (const_int 54) (const_int 55) | |
7700 | (const_int 56) (const_int 57) | |
7701 | (const_int 58) (const_int 59) | |
7702 | (const_int 60) (const_int 61) | |
7703 | (const_int 62) (const_int 63)])))] | |
7704 | "TARGET_AVX512F" | |
7705 | "vextracti64x4\t{$0x1, %1, %0|%0, %1, 0x1}" | |
7706 | [(set_attr "type" "sselog") | |
7707 | (set_attr "prefix_extra" "1") | |
7708 | (set_attr "length_immediate" "1") | |
7709 | (set_attr "memory" "none,store") | |
7710 | (set_attr "prefix" "evex") | |
7711 | (set_attr "mode" "XI")]) | |
7712 | ||
3d1a0207 | 7713 | (define_insn_and_split "vec_extract_lo_v32qi" |
ed30e0a6 | 7714 | [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m") |
7715 | (vec_select:V16QI | |
3d1a0207 | 7716 | (match_operand:V32QI 1 "nonimmediate_operand" "xm,x") |
ed30e0a6 | 7717 | (parallel [(const_int 0) (const_int 1) |
7718 | (const_int 2) (const_int 3) | |
7719 | (const_int 4) (const_int 5) | |
7720 | (const_int 6) (const_int 7) | |
7721 | (const_int 8) (const_int 9) | |
7722 | (const_int 10) (const_int 11) | |
7723 | (const_int 12) (const_int 13) | |
7724 | (const_int 14) (const_int 15)])))] | |
a3d5479a | 7725 | "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))" |
3d1a0207 | 7726 | "#" |
7727 | "&& reload_completed" | |
573c5512 | 7728 | [(set (match_dup 0) (match_dup 1))] |
3d1a0207 | 7729 | { |
573c5512 | 7730 | if (REG_P (operands[1])) |
7731 | operands[1] = gen_rtx_REG (V16QImode, REGNO (operands[1])); | |
3d1a0207 | 7732 | else |
573c5512 | 7733 | operands[1] = adjust_address (operands[1], V16QImode, 0); |
3d1a0207 | 7734 | }) |
ed30e0a6 | 7735 | |
7736 | (define_insn "vec_extract_hi_v32qi" | |
7737 | [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m") | |
7738 | (vec_select:V16QI | |
7739 | (match_operand:V32QI 1 "register_operand" "x,x") | |
7740 | (parallel [(const_int 16) (const_int 17) | |
7741 | (const_int 18) (const_int 19) | |
7742 | (const_int 20) (const_int 21) | |
7743 | (const_int 22) (const_int 23) | |
7744 | (const_int 24) (const_int 25) | |
7745 | (const_int 26) (const_int 27) | |
7746 | (const_int 28) (const_int 29) | |
7747 | (const_int 30) (const_int 31)])))] | |
7748 | "TARGET_AVX" | |
154d1782 | 7749 | "vextract%~128\t{$0x1, %1, %0|%0, %1, 0x1}" |
ed30e0a6 | 7750 | [(set_attr "type" "sselog") |
00a0e418 | 7751 | (set_attr "prefix_extra" "1") |
7752 | (set_attr "length_immediate" "1") | |
ed30e0a6 | 7753 | (set_attr "memory" "none,store") |
7754 | (set_attr "prefix" "vex") | |
154d1782 | 7755 | (set_attr "mode" "OI")]) |
ed30e0a6 | 7756 | |
6fe5844b | 7757 | ;; Modes handled by vec_extract patterns. |
7758 | (define_mode_iterator VEC_EXTRACT_MODE | |
fd1fee28 | 7759 | [(V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX") V16QI |
7760 | (V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX") V8HI | |
697a43f8 | 7761 | (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX") V4SI |
7762 | (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") V2DI | |
7763 | (V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF | |
7764 | (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") V2DF]) | |
6fe5844b | 7765 | |
3ce6ef72 | 7766 | (define_expand "vec_extract<mode>" |
abd4f58b | 7767 | [(match_operand:<ssescalarmode> 0 "register_operand") |
7768 | (match_operand:VEC_EXTRACT_MODE 1 "register_operand") | |
7769 | (match_operand 2 "const_int_operand")] | |
2a466fea | 7770 | "TARGET_SSE" |
887b0069 | 7771 | { |
2a466fea | 7772 | ix86_expand_vector_extract (false, operands[0], operands[1], |
7773 | INTVAL (operands[2])); | |
887b0069 | 7774 | DONE; |
7775 | }) | |
7776 | ||
5802c0cb | 7777 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; |
7778 | ;; | |
7779 | ;; Parallel double-precision floating point element swizzling | |
7780 | ;; | |
7781 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | |
7782 | ||
5220cab6 | 7783 | (define_insn "<mask_codefor>avx512f_unpckhpd512<mask_name>" |
697a43f8 | 7784 | [(set (match_operand:V8DF 0 "register_operand" "=v") |
7785 | (vec_select:V8DF | |
7786 | (vec_concat:V16DF | |
0a281fd0 | 7787 | (match_operand:V8DF 1 "register_operand" "v") |
697a43f8 | 7788 | (match_operand:V8DF 2 "nonimmediate_operand" "vm")) |
7789 | (parallel [(const_int 1) (const_int 9) | |
7790 | (const_int 3) (const_int 11) | |
7791 | (const_int 5) (const_int 13) | |
7792 | (const_int 7) (const_int 15)])))] | |
7793 | "TARGET_AVX512F" | |
5220cab6 | 7794 | "vunpckhpd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}" |
697a43f8 | 7795 | [(set_attr "type" "sselog") |
7796 | (set_attr "prefix" "evex") | |
7797 | (set_attr "mode" "V8DF")]) | |
7798 | ||
d6e05290 | 7799 | ;; Recall that the 256-bit unpck insns only shuffle within their lanes. |
fd4e0ec4 | 7800 | (define_insn "avx_unpckhpd256<mask_name>" |
7801 | [(set (match_operand:V4DF 0 "register_operand" "=v") | |
ed30e0a6 | 7802 | (vec_select:V4DF |
7803 | (vec_concat:V8DF | |
fd4e0ec4 | 7804 | (match_operand:V4DF 1 "register_operand" "v") |
7805 | (match_operand:V4DF 2 "nonimmediate_operand" "vm")) | |
5e56456b | 7806 | (parallel [(const_int 1) (const_int 5) |
ed30e0a6 | 7807 | (const_int 3) (const_int 7)])))] |
fd4e0ec4 | 7808 | "TARGET_AVX && <mask_avx512vl_condition>" |
7809 | "vunpckhpd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}" | |
ed30e0a6 | 7810 | [(set_attr "type" "sselog") |
7811 | (set_attr "prefix" "vex") | |
7812 | (set_attr "mode" "V4DF")]) | |
7813 | ||
8cedf886 | 7814 | (define_expand "vec_interleave_highv4df" |
7815 | [(set (match_dup 3) | |
7816 | (vec_select:V4DF | |
7817 | (vec_concat:V8DF | |
7818 | (match_operand:V4DF 1 "register_operand" "x") | |
7819 | (match_operand:V4DF 2 "nonimmediate_operand" "xm")) | |
7820 | (parallel [(const_int 0) (const_int 4) | |
7821 | (const_int 2) (const_int 6)]))) | |
7822 | (set (match_dup 4) | |
7823 | (vec_select:V4DF | |
7824 | (vec_concat:V8DF | |
7825 | (match_dup 1) | |
7826 | (match_dup 2)) | |
7827 | (parallel [(const_int 1) (const_int 5) | |
7828 | (const_int 3) (const_int 7)]))) | |
abd4f58b | 7829 | (set (match_operand:V4DF 0 "register_operand") |
33d0986a | 7830 | (vec_select:V4DF |
7831 | (vec_concat:V8DF | |
8cedf886 | 7832 | (match_dup 3) |
33d0986a | 7833 | (match_dup 4)) |
7834 | (parallel [(const_int 2) (const_int 3) | |
7835 | (const_int 6) (const_int 7)])))] | |
8cedf886 | 7836 | "TARGET_AVX" |
7837 | { | |
7838 | operands[3] = gen_reg_rtx (V4DFmode); | |
7839 | operands[4] = gen_reg_rtx (V4DFmode); | |
7840 | }) | |
7841 | ||
7842 | ||
fd4e0ec4 | 7843 | (define_insn "avx512vl_unpckhpd128_mask" |
7844 | [(set (match_operand:V2DF 0 "register_operand" "=v") | |
7845 | (vec_merge:V2DF | |
7846 | (vec_select:V2DF | |
7847 | (vec_concat:V4DF | |
7848 | (match_operand:V2DF 1 "register_operand" "v") | |
7849 | (match_operand:V2DF 2 "nonimmediate_operand" "vm")) | |
7850 | (parallel [(const_int 1) (const_int 3)])) | |
7851 | (match_operand:V2DF 3 "vector_move_operand" "0C") | |
7852 | (match_operand:QI 4 "register_operand" "Yk")))] | |
7853 | "TARGET_AVX512VL" | |
7854 | "vunpckhpd\t{%2, %1, %0%{%4%}%N3|%0%{%4%}%N3, %1, %2}" | |
7855 | [(set_attr "type" "sselog") | |
7856 | (set_attr "prefix" "evex") | |
7857 | (set_attr "mode" "V2DF")]) | |
7858 | ||
d6e05290 | 7859 | (define_expand "vec_interleave_highv2df" |
abd4f58b | 7860 | [(set (match_operand:V2DF 0 "register_operand") |
7c839b3f | 7861 | (vec_select:V2DF |
7862 | (vec_concat:V4DF | |
abd4f58b | 7863 | (match_operand:V2DF 1 "nonimmediate_operand") |
7864 | (match_operand:V2DF 2 "nonimmediate_operand")) | |
7c839b3f | 7865 | (parallel [(const_int 1) |
7866 | (const_int 3)])))] | |
7867 | "TARGET_SSE2" | |
04e14b44 | 7868 | { |
7869 | if (!ix86_vec_interleave_v2df_operator_ok (operands, 1)) | |
7870 | operands[2] = force_reg (V2DFmode, operands[2]); | |
7871 | }) | |
7c839b3f | 7872 | |
01624f90 | 7873 | (define_insn "*vec_interleave_highv2df" |
f30b3ad6 | 7874 | [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,x,x,m") |
04e14b44 | 7875 | (vec_select:V2DF |
7876 | (vec_concat:V4DF | |
f30b3ad6 | 7877 | (match_operand:V2DF 1 "nonimmediate_operand" " 0,x,o,o,o,x") |
7878 | (match_operand:V2DF 2 "nonimmediate_operand" " x,x,1,0,x,0")) | |
04e14b44 | 7879 | (parallel [(const_int 1) |
7880 | (const_int 3)])))] | |
01624f90 | 7881 | "TARGET_SSE2 && ix86_vec_interleave_v2df_operator_ok (operands, 1)" |
04e14b44 | 7882 | "@ |
7883 | unpckhpd\t{%2, %0|%0, %2} | |
45c0368c | 7884 | vunpckhpd\t{%2, %1, %0|%0, %1, %2} |
7885 | %vmovddup\t{%H1, %0|%0, %H1} | |
04e14b44 | 7886 | movlpd\t{%H1, %0|%0, %H1} |
45c0368c | 7887 | vmovlpd\t{%H1, %2, %0|%0, %2, %H1} |
c358a059 | 7888 | %vmovhpd\t{%1, %0|%q0, %1}" |
f30b3ad6 | 7889 | [(set_attr "isa" "noavx,avx,sse3,noavx,avx,*") |
8c1dfa94 | 7890 | (set_attr "type" "sselog,sselog,sselog,ssemov,ssemov,ssemov") |
7891 | (set_attr "ssememalign" "64") | |
45c0368c | 7892 | (set_attr "prefix_data16" "*,*,*,1,*,1") |
7893 | (set_attr "prefix" "orig,vex,maybe_vex,orig,vex,maybe_vex") | |
889d67a8 | 7894 | (set_attr "mode" "V2DF,V2DF,DF,V1DF,V1DF,V1DF")]) |
ed30e0a6 | 7895 | |
5220cab6 | 7896 | (define_expand "avx512f_movddup512<mask_name>" |
697a43f8 | 7897 | [(set (match_operand:V8DF 0 "register_operand") |
7898 | (vec_select:V8DF | |
7899 | (vec_concat:V16DF | |
7900 | (match_operand:V8DF 1 "nonimmediate_operand") | |
7901 | (match_dup 1)) | |
7902 | (parallel [(const_int 0) (const_int 8) | |
7903 | (const_int 2) (const_int 10) | |
7904 | (const_int 4) (const_int 12) | |
7905 | (const_int 6) (const_int 14)])))] | |
7906 | "TARGET_AVX512F") | |
7907 | ||
5220cab6 | 7908 | (define_expand "avx512f_unpcklpd512<mask_name>" |
697a43f8 | 7909 | [(set (match_operand:V8DF 0 "register_operand") |
7910 | (vec_select:V8DF | |
7911 | (vec_concat:V16DF | |
7912 | (match_operand:V8DF 1 "register_operand") | |
7913 | (match_operand:V8DF 2 "nonimmediate_operand")) | |
7914 | (parallel [(const_int 0) (const_int 8) | |
7915 | (const_int 2) (const_int 10) | |
7916 | (const_int 4) (const_int 12) | |
7917 | (const_int 6) (const_int 14)])))] | |
7918 | "TARGET_AVX512F") | |
7919 | ||
5220cab6 | 7920 | (define_insn "*avx512f_unpcklpd512<mask_name>" |
697a43f8 | 7921 | [(set (match_operand:V8DF 0 "register_operand" "=v,v") |
7922 | (vec_select:V8DF | |
7923 | (vec_concat:V16DF | |
7924 | (match_operand:V8DF 1 "nonimmediate_operand" "vm, v") | |
7925 | (match_operand:V8DF 2 "nonimmediate_operand" "1 ,vm")) | |
7926 | (parallel [(const_int 0) (const_int 8) | |
7927 | (const_int 2) (const_int 10) | |
7928 | (const_int 4) (const_int 12) | |
7929 | (const_int 6) (const_int 14)])))] | |
7930 | "TARGET_AVX512F" | |
7931 | "@ | |
5220cab6 | 7932 | vmovddup\t{%1, %0<mask_operand3>|%0<mask_operand3>, %1} |
7933 | vunpcklpd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}" | |
697a43f8 | 7934 | [(set_attr "type" "sselog") |
7935 | (set_attr "prefix" "evex") | |
7936 | (set_attr "mode" "V8DF")]) | |
7937 | ||
04e14b44 | 7938 | ;; Recall that the 256-bit unpck insns only shuffle within their lanes. |
fd4e0ec4 | 7939 | (define_expand "avx_movddup256<mask_name>" |
abd4f58b | 7940 | [(set (match_operand:V4DF 0 "register_operand") |
ed30e0a6 | 7941 | (vec_select:V4DF |
7942 | (vec_concat:V8DF | |
abd4f58b | 7943 | (match_operand:V4DF 1 "nonimmediate_operand") |
ed30e0a6 | 7944 | (match_dup 1)) |
04e14b44 | 7945 | (parallel [(const_int 0) (const_int 4) |
7946 | (const_int 2) (const_int 6)])))] | |
fd4e0ec4 | 7947 | "TARGET_AVX && <mask_avx512vl_condition>") |
5802c0cb | 7948 | |
fd4e0ec4 | 7949 | (define_expand "avx_unpcklpd256<mask_name>" |
abd4f58b | 7950 | [(set (match_operand:V4DF 0 "register_operand") |
ed30e0a6 | 7951 | (vec_select:V4DF |
7952 | (vec_concat:V8DF | |
abd4f58b | 7953 | (match_operand:V4DF 1 "register_operand") |
7954 | (match_operand:V4DF 2 "nonimmediate_operand")) | |
ed30e0a6 | 7955 | (parallel [(const_int 0) (const_int 4) |
5e56456b | 7956 | (const_int 2) (const_int 6)])))] |
fd4e0ec4 | 7957 | "TARGET_AVX && <mask_avx512vl_condition>") |
04e14b44 | 7958 | |
fd4e0ec4 | 7959 | (define_insn "*avx_unpcklpd256<mask_name>" |
7960 | [(set (match_operand:V4DF 0 "register_operand" "=v,v") | |
04e14b44 | 7961 | (vec_select:V4DF |
7962 | (vec_concat:V8DF | |
fd4e0ec4 | 7963 | (match_operand:V4DF 1 "nonimmediate_operand" " v,m") |
7964 | (match_operand:V4DF 2 "nonimmediate_operand" "vm,1")) | |
04e14b44 | 7965 | (parallel [(const_int 0) (const_int 4) |
7966 | (const_int 2) (const_int 6)])))] | |
fd4e0ec4 | 7967 | "TARGET_AVX && <mask_avx512vl_condition>" |
04e14b44 | 7968 | "@ |
fd4e0ec4 | 7969 | vunpcklpd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2} |
7970 | vmovddup\t{%1, %0<mask_operand3>|%0<mask_operand3>, %1}" | |
ed30e0a6 | 7971 | [(set_attr "type" "sselog") |
7972 | (set_attr "prefix" "vex") | |
7973 | (set_attr "mode" "V4DF")]) | |
7974 | ||
8cedf886 | 7975 | (define_expand "vec_interleave_lowv4df" |
7976 | [(set (match_dup 3) | |
7977 | (vec_select:V4DF | |
7978 | (vec_concat:V8DF | |
7979 | (match_operand:V4DF 1 "register_operand" "x") | |
7980 | (match_operand:V4DF 2 "nonimmediate_operand" "xm")) | |
7981 | (parallel [(const_int 0) (const_int 4) | |
7982 | (const_int 2) (const_int 6)]))) | |
7983 | (set (match_dup 4) | |
7984 | (vec_select:V4DF | |
7985 | (vec_concat:V8DF | |
7986 | (match_dup 1) | |
7987 | (match_dup 2)) | |
7988 | (parallel [(const_int 1) (const_int 5) | |
7989 | (const_int 3) (const_int 7)]))) | |
abd4f58b | 7990 | (set (match_operand:V4DF 0 "register_operand") |
33d0986a | 7991 | (vec_select:V4DF |
7992 | (vec_concat:V8DF | |
8cedf886 | 7993 | (match_dup 3) |
33d0986a | 7994 | (match_dup 4)) |
7995 | (parallel [(const_int 0) (const_int 1) | |
5deb404d | 7996 | (const_int 4) (const_int 5)])))] |
8cedf886 | 7997 | "TARGET_AVX" |
7998 | { | |
7999 | operands[3] = gen_reg_rtx (V4DFmode); | |
8000 | operands[4] = gen_reg_rtx (V4DFmode); | |
8001 | }) | |
8002 | ||
fd4e0ec4 | 8003 | (define_insn "avx512vl_unpcklpd128_mask" |
8004 | [(set (match_operand:V2DF 0 "register_operand" "=v") | |
8005 | (vec_merge:V2DF | |
8006 | (vec_select:V2DF | |
8007 | (vec_concat:V4DF | |
8008 | (match_operand:V2DF 1 "register_operand" "v") | |
8009 | (match_operand:V2DF 2 "nonimmediate_operand" "vm")) | |
8010 | (parallel [(const_int 0) (const_int 2)])) | |
8011 | (match_operand:V2DF 3 "vector_move_operand" "0C") | |
8012 | (match_operand:QI 4 "register_operand" "Yk")))] | |
8013 | "TARGET_AVX512VL" | |
8014 | "vunpcklpd\t{%2, %1, %0%{%4%}%N3|%0%{%4%}%N3, %1, %2}" | |
8015 | [(set_attr "type" "sselog") | |
8016 | (set_attr "prefix" "evex") | |
8017 | (set_attr "mode" "V2DF")]) | |
8018 | ||
d6e05290 | 8019 | (define_expand "vec_interleave_lowv2df" |
abd4f58b | 8020 | [(set (match_operand:V2DF 0 "register_operand") |
7c839b3f | 8021 | (vec_select:V2DF |
8022 | (vec_concat:V4DF | |
abd4f58b | 8023 | (match_operand:V2DF 1 "nonimmediate_operand") |
8024 | (match_operand:V2DF 2 "nonimmediate_operand")) | |
7c839b3f | 8025 | (parallel [(const_int 0) |
8026 | (const_int 2)])))] | |
8027 | "TARGET_SSE2" | |
04e14b44 | 8028 | { |
8029 | if (!ix86_vec_interleave_v2df_operator_ok (operands, 0)) | |
8030 | operands[1] = force_reg (V2DFmode, operands[1]); | |
8031 | }) | |
7c839b3f | 8032 | |
01624f90 | 8033 | (define_insn "*vec_interleave_lowv2df" |
f30b3ad6 | 8034 | [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,x,x,o") |
04e14b44 | 8035 | (vec_select:V2DF |
8036 | (vec_concat:V4DF | |
f30b3ad6 | 8037 | (match_operand:V2DF 1 "nonimmediate_operand" " 0,x,m,0,x,0") |
8038 | (match_operand:V2DF 2 "nonimmediate_operand" " x,x,1,m,m,x")) | |
04e14b44 | 8039 | (parallel [(const_int 0) |
8040 | (const_int 2)])))] | |
01624f90 | 8041 | "TARGET_SSE2 && ix86_vec_interleave_v2df_operator_ok (operands, 0)" |
04e14b44 | 8042 | "@ |
8043 | unpcklpd\t{%2, %0|%0, %2} | |
45c0368c | 8044 | vunpcklpd\t{%2, %1, %0|%0, %1, %2} |
c358a059 | 8045 | %vmovddup\t{%1, %0|%0, %q1} |
8046 | movhpd\t{%2, %0|%0, %q2} | |
8047 | vmovhpd\t{%2, %1, %0|%0, %1, %q2} | |
45c0368c | 8048 | %vmovlpd\t{%2, %H0|%H0, %2}" |
f30b3ad6 | 8049 | [(set_attr "isa" "noavx,avx,sse3,noavx,avx,*") |
45c0368c | 8050 | (set_attr "type" "sselog,sselog,sselog,ssemov,ssemov,ssemov") |
8c1dfa94 | 8051 | (set_attr "ssememalign" "64") |
45c0368c | 8052 | (set_attr "prefix_data16" "*,*,*,1,*,1") |
8053 | (set_attr "prefix" "orig,vex,maybe_vex,orig,vex,maybe_vex") | |
889d67a8 | 8054 | (set_attr "mode" "V2DF,V2DF,DF,V1DF,V1DF,V1DF")]) |
ed30e0a6 | 8055 | |
04e14b44 | 8056 | (define_split |
abd4f58b | 8057 | [(set (match_operand:V2DF 0 "memory_operand") |
04e14b44 | 8058 | (vec_select:V2DF |
8059 | (vec_concat:V4DF | |
abd4f58b | 8060 | (match_operand:V2DF 1 "register_operand") |
04e14b44 | 8061 | (match_dup 1)) |
8062 | (parallel [(const_int 0) | |
8063 | (const_int 2)])))] | |
8064 | "TARGET_SSE3 && reload_completed" | |
8065 | [(const_int 0)] | |
8066 | { | |
8067 | rtx low = gen_rtx_REG (DFmode, REGNO (operands[1])); | |
8068 | emit_move_insn (adjust_address (operands[0], DFmode, 0), low); | |
8069 | emit_move_insn (adjust_address (operands[0], DFmode, 8), low); | |
8070 | DONE; | |
8071 | }) | |
8072 | ||
8073 | (define_split | |
abd4f58b | 8074 | [(set (match_operand:V2DF 0 "register_operand") |
04e14b44 | 8075 | (vec_select:V2DF |
8076 | (vec_concat:V4DF | |
abd4f58b | 8077 | (match_operand:V2DF 1 "memory_operand") |
04e14b44 | 8078 | (match_dup 1)) |
abd4f58b | 8079 | (parallel [(match_operand:SI 2 "const_0_to_1_operand") |
8080 | (match_operand:SI 3 "const_int_operand")])))] | |
04e14b44 | 8081 | "TARGET_SSE3 && INTVAL (operands[2]) + 2 == INTVAL (operands[3])" |
8082 | [(set (match_dup 0) (vec_duplicate:V2DF (match_dup 1)))] | |
8083 | { | |
8084 | operands[1] = adjust_address (operands[1], DFmode, INTVAL (operands[2]) * 8); | |
8085 | }) | |
8086 | ||
0b7cc9c6 | 8087 | (define_insn "avx512f_vmscalef<mode><round_name>" |
85065932 | 8088 | [(set (match_operand:VF_128 0 "register_operand" "=v") |
8089 | (vec_merge:VF_128 | |
5220cab6 | 8090 | (unspec:VF_128 |
8091 | [(match_operand:VF_128 1 "register_operand" "v") | |
fbf4df62 | 8092 | (match_operand:VF_128 2 "<round_nimm_predicate>" "<round_constraint>")] |
5220cab6 | 8093 | UNSPEC_SCALEF) |
85065932 | 8094 | (match_dup 1) |
8095 | (const_int 1)))] | |
8096 | "TARGET_AVX512F" | |
f46a34a6 | 8097 | "vscalef<ssescalarmodesuffix>\t{<round_op3>%2, %1, %0|%0, %1, %2<round_op3>}" |
85065932 | 8098 | [(set_attr "prefix" "evex") |
8099 | (set_attr "mode" "<ssescalarmode>")]) | |
8100 | ||
250533c0 | 8101 | (define_insn "<avx512>_scalef<mode><mask_name><round_name>" |
8102 | [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v") | |
8103 | (unspec:VF_AVX512VL | |
8104 | [(match_operand:VF_AVX512VL 1 "register_operand" "v") | |
8105 | (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>")] | |
5220cab6 | 8106 | UNSPEC_SCALEF))] |
85065932 | 8107 | "TARGET_AVX512F" |
f46a34a6 | 8108 | "vscalef<ssemodesuffix>\t{<round_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2<round_mask_op3>}" |
85065932 | 8109 | [(set_attr "prefix" "evex") |
8110 | (set_attr "mode" "<MODE>")]) | |
8111 | ||
3d038641 | 8112 | (define_expand "<avx512>_vternlog<mode>_maskz" |
8113 | [(match_operand:VI48_AVX512VL 0 "register_operand") | |
8114 | (match_operand:VI48_AVX512VL 1 "register_operand") | |
8115 | (match_operand:VI48_AVX512VL 2 "register_operand") | |
8116 | (match_operand:VI48_AVX512VL 3 "nonimmediate_operand") | |
9a5ea1d5 | 8117 | (match_operand:SI 4 "const_0_to_255_operand") |
8118 | (match_operand:<avx512fmaskmode> 5 "register_operand")] | |
8119 | "TARGET_AVX512F" | |
8120 | { | |
3d038641 | 8121 | emit_insn (gen_<avx512>_vternlog<mode>_maskz_1 ( |
9a5ea1d5 | 8122 | operands[0], operands[1], operands[2], operands[3], |
8123 | operands[4], CONST0_RTX (<MODE>mode), operands[5])); | |
8124 | DONE; | |
8125 | }) | |
8126 | ||
3d038641 | 8127 | (define_insn "<avx512>_vternlog<mode><sd_maskz_name>" |
8128 | [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v") | |
8129 | (unspec:VI48_AVX512VL | |
8130 | [(match_operand:VI48_AVX512VL 1 "register_operand" "0") | |
8131 | (match_operand:VI48_AVX512VL 2 "register_operand" "v") | |
8132 | (match_operand:VI48_AVX512VL 3 "nonimmediate_operand" "vm") | |
d2ff59d6 | 8133 | (match_operand:SI 4 "const_0_to_255_operand")] |
8134 | UNSPEC_VTERNLOG))] | |
8135 | "TARGET_AVX512F" | |
9a5ea1d5 | 8136 | "vpternlog<ssemodesuffix>\t{%4, %3, %2, %0<sd_mask_op5>|%0<sd_mask_op5>, %2, %3, %4}" |
d2ff59d6 | 8137 | [(set_attr "type" "sselog") |
8138 | (set_attr "prefix" "evex") | |
8139 | (set_attr "mode" "<sseinsnmode>")]) | |
8140 | ||
3d038641 | 8141 | (define_insn "<avx512>_vternlog<mode>_mask" |
8142 | [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v") | |
8143 | (vec_merge:VI48_AVX512VL | |
8144 | (unspec:VI48_AVX512VL | |
8145 | [(match_operand:VI48_AVX512VL 1 "register_operand" "0") | |
8146 | (match_operand:VI48_AVX512VL 2 "register_operand" "v") | |
8147 | (match_operand:VI48_AVX512VL 3 "nonimmediate_operand" "vm") | |
5220cab6 | 8148 | (match_operand:SI 4 "const_0_to_255_operand")] |
8149 | UNSPEC_VTERNLOG) | |
8150 | (match_dup 1) | |
a31e7f46 | 8151 | (match_operand:<avx512fmaskmode> 5 "register_operand" "Yk")))] |
5220cab6 | 8152 | "TARGET_AVX512F" |
8153 | "vpternlog<ssemodesuffix>\t{%4, %3, %2, %0%{%5%}|%0%{%5%}, %2, %3, %4}" | |
8154 | [(set_attr "type" "sselog") | |
8155 | (set_attr "prefix" "evex") | |
8156 | (set_attr "mode" "<sseinsnmode>")]) | |
8157 | ||
250533c0 | 8158 | (define_insn "<avx512>_getexp<mode><mask_name><round_saeonly_name>" |
8159 | [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v") | |
8160 | (unspec:VF_AVX512VL [(match_operand:VF_AVX512VL 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")] | |
85065932 | 8161 | UNSPEC_GETEXP))] |
8162 | "TARGET_AVX512F" | |
dbfe84d5 | 8163 | "vgetexp<ssemodesuffix>\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"; |
85065932 | 8164 | [(set_attr "prefix" "evex") |
8165 | (set_attr "mode" "<MODE>")]) | |
8166 | ||
0b7cc9c6 | 8167 | (define_insn "avx512f_sgetexp<mode><round_saeonly_name>" |
85065932 | 8168 | [(set (match_operand:VF_128 0 "register_operand" "=v") |
8169 | (vec_merge:VF_128 | |
5220cab6 | 8170 | (unspec:VF_128 |
8171 | [(match_operand:VF_128 1 "register_operand" "v") | |
fbf4df62 | 8172 | (match_operand:VF_128 2 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")] |
5220cab6 | 8173 | UNSPEC_GETEXP) |
85065932 | 8174 | (match_dup 1) |
8175 | (const_int 1)))] | |
8176 | "TARGET_AVX512F" | |
0b7cc9c6 | 8177 | "vgetexp<ssescalarmodesuffix>\t{<round_saeonly_op3>%2, %1, %0|%0, %1, %2<round_saeonly_op3>}"; |
85065932 | 8178 | [(set_attr "prefix" "evex") |
8179 | (set_attr "mode" "<ssescalarmode>")]) | |
8180 | ||
3d038641 | 8181 | (define_insn "<mask_codefor><avx512>_align<mode><mask_name>" |
8182 | [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v") | |
8183 | (unspec:VI48_AVX512VL [(match_operand:VI48_AVX512VL 1 "register_operand" "v") | |
8184 | (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "vm") | |
8185 | (match_operand:SI 3 "const_0_to_255_operand")] | |
8186 | UNSPEC_ALIGN))] | |
d2ff59d6 | 8187 | "TARGET_AVX512F" |
5220cab6 | 8188 | "valign<ssemodesuffix>\t{%3, %2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2, %3}"; |
d2ff59d6 | 8189 | [(set_attr "prefix" "evex") |
8190 | (set_attr "mode" "<sseinsnmode>")]) | |
8191 | ||
5220cab6 | 8192 | (define_expand "avx512f_shufps512_mask" |
8193 | [(match_operand:V16SF 0 "register_operand") | |
8194 | (match_operand:V16SF 1 "register_operand") | |
8195 | (match_operand:V16SF 2 "nonimmediate_operand") | |
8196 | (match_operand:SI 3 "const_0_to_255_operand") | |
8197 | (match_operand:V16SF 4 "register_operand") | |
8198 | (match_operand:HI 5 "register_operand")] | |
8199 | "TARGET_AVX512F" | |
8200 | { | |
8201 | int mask = INTVAL (operands[3]); | |
8202 | emit_insn (gen_avx512f_shufps512_1_mask (operands[0], operands[1], operands[2], | |
8203 | GEN_INT ((mask >> 0) & 3), | |
8204 | GEN_INT ((mask >> 2) & 3), | |
8205 | GEN_INT (((mask >> 4) & 3) + 16), | |
8206 | GEN_INT (((mask >> 6) & 3) + 16), | |
8207 | GEN_INT (((mask >> 0) & 3) + 4), | |
8208 | GEN_INT (((mask >> 2) & 3) + 4), | |
8209 | GEN_INT (((mask >> 4) & 3) + 20), | |
8210 | GEN_INT (((mask >> 6) & 3) + 20), | |
8211 | GEN_INT (((mask >> 0) & 3) + 8), | |
8212 | GEN_INT (((mask >> 2) & 3) + 8), | |
8213 | GEN_INT (((mask >> 4) & 3) + 24), | |
8214 | GEN_INT (((mask >> 6) & 3) + 24), | |
8215 | GEN_INT (((mask >> 0) & 3) + 12), | |
8216 | GEN_INT (((mask >> 2) & 3) + 12), | |
8217 | GEN_INT (((mask >> 4) & 3) + 28), | |
8218 | GEN_INT (((mask >> 6) & 3) + 28), | |
8219 | operands[4], operands[5])); | |
8220 | DONE; | |
8221 | }) | |
8222 | ||
9a5ea1d5 | 8223 | |
250533c0 | 8224 | (define_expand "<avx512>_fixupimm<mode>_maskz<round_saeonly_expand_name>" |
8225 | [(match_operand:VF_AVX512VL 0 "register_operand") | |
8226 | (match_operand:VF_AVX512VL 1 "register_operand") | |
8227 | (match_operand:VF_AVX512VL 2 "register_operand") | |
affa436a | 8228 | (match_operand:<sseintvecmode> 3 "<round_saeonly_expand_nimm_predicate>") |
9a5ea1d5 | 8229 | (match_operand:SI 4 "const_0_to_255_operand") |
8230 | (match_operand:<avx512fmaskmode> 5 "register_operand")] | |
8231 | "TARGET_AVX512F" | |
8232 | { | |
250533c0 | 8233 | emit_insn (gen_<avx512>_fixupimm<mode>_maskz_1<round_saeonly_expand_name> ( |
9a5ea1d5 | 8234 | operands[0], operands[1], operands[2], operands[3], |
affa436a | 8235 | operands[4], CONST0_RTX (<MODE>mode), operands[5] |
8236 | <round_saeonly_expand_operand6>)); | |
9a5ea1d5 | 8237 | DONE; |
8238 | }) | |
8239 | ||
250533c0 | 8240 | (define_insn "<avx512>_fixupimm<mode><sd_maskz_name><round_saeonly_name>" |
8241 | [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v") | |
8242 | (unspec:VF_AVX512VL | |
8243 | [(match_operand:VF_AVX512VL 1 "register_operand" "0") | |
8244 | (match_operand:VF_AVX512VL 2 "register_operand" "v") | |
8245 | (match_operand:<sseintvecmode> 3 "nonimmediate_operand" "<round_saeonly_constraint>") | |
85065932 | 8246 | (match_operand:SI 4 "const_0_to_255_operand")] |
8247 | UNSPEC_FIXUPIMM))] | |
8248 | "TARGET_AVX512F" | |
dbfe84d5 | 8249 | "vfixupimm<ssemodesuffix>\t{%4, <round_saeonly_sd_mask_op5>%3, %2, %0<sd_mask_op5>|%0<sd_mask_op5>, %2, %3<round_saeonly_sd_mask_op5>, %4}"; |
85065932 | 8250 | [(set_attr "prefix" "evex") |
8251 | (set_attr "mode" "<MODE>")]) | |
8252 | ||
250533c0 | 8253 | (define_insn "<avx512>_fixupimm<mode>_mask<round_saeonly_name>" |
8254 | [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v") | |
8255 | (vec_merge:VF_AVX512VL | |
8256 | (unspec:VF_AVX512VL | |
8257 | [(match_operand:VF_AVX512VL 1 "register_operand" "0") | |
8258 | (match_operand:VF_AVX512VL 2 "register_operand" "v") | |
8259 | (match_operand:<sseintvecmode> 3 "nonimmediate_operand" "<round_saeonly_constraint>") | |
5220cab6 | 8260 | (match_operand:SI 4 "const_0_to_255_operand")] |
8261 | UNSPEC_FIXUPIMM) | |
8262 | (match_dup 1) | |
a31e7f46 | 8263 | (match_operand:<avx512fmaskmode> 5 "register_operand" "Yk")))] |
5220cab6 | 8264 | "TARGET_AVX512F" |
dbfe84d5 | 8265 | "vfixupimm<ssemodesuffix>\t{%4, <round_saeonly_op6>%3, %2, %0%{%5%}|%0%{%5%}, %2, %3<round_saeonly_op6>, %4}"; |
5220cab6 | 8266 | [(set_attr "prefix" "evex") |
8267 | (set_attr "mode" "<MODE>")]) | |
8268 | ||
affa436a | 8269 | (define_expand "avx512f_sfixupimm<mode>_maskz<round_saeonly_expand_name>" |
9a5ea1d5 | 8270 | [(match_operand:VF_128 0 "register_operand") |
8271 | (match_operand:VF_128 1 "register_operand") | |
8272 | (match_operand:VF_128 2 "register_operand") | |
affa436a | 8273 | (match_operand:<sseintvecmode> 3 "<round_saeonly_expand_nimm_predicate>") |
9a5ea1d5 | 8274 | (match_operand:SI 4 "const_0_to_255_operand") |
8275 | (match_operand:<avx512fmaskmode> 5 "register_operand")] | |
8276 | "TARGET_AVX512F" | |
8277 | { | |
affa436a | 8278 | emit_insn (gen_avx512f_sfixupimm<mode>_maskz_1<round_saeonly_expand_name> ( |
9a5ea1d5 | 8279 | operands[0], operands[1], operands[2], operands[3], |
affa436a | 8280 | operands[4], CONST0_RTX (<MODE>mode), operands[5] |
8281 | <round_saeonly_expand_operand6>)); | |
9a5ea1d5 | 8282 | DONE; |
8283 | }) | |
8284 | ||
dbfe84d5 | 8285 | (define_insn "avx512f_sfixupimm<mode><sd_maskz_name><round_saeonly_name>" |
85065932 | 8286 | [(set (match_operand:VF_128 0 "register_operand" "=v") |
8287 | (vec_merge:VF_128 | |
8288 | (unspec:VF_128 | |
8289 | [(match_operand:VF_128 1 "register_operand" "0") | |
8290 | (match_operand:VF_128 2 "register_operand" "v") | |
dbfe84d5 | 8291 | (match_operand:<sseintvecmode> 3 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>") |
85065932 | 8292 | (match_operand:SI 4 "const_0_to_255_operand")] |
8293 | UNSPEC_FIXUPIMM) | |
8294 | (match_dup 1) | |
8295 | (const_int 1)))] | |
8296 | "TARGET_AVX512F" | |
dbfe84d5 | 8297 | "vfixupimm<ssescalarmodesuffix>\t{%4, <round_saeonly_sd_mask_op5>%3, %2, %0<sd_mask_op5>|%0<sd_mask_op5>, %2, %3<round_saeonly_sd_mask_op5>, %4}"; |
85065932 | 8298 | [(set_attr "prefix" "evex") |
8299 | (set_attr "mode" "<ssescalarmode>")]) | |
8300 | ||
dbfe84d5 | 8301 | (define_insn "avx512f_sfixupimm<mode>_mask<round_saeonly_name>" |
5220cab6 | 8302 | [(set (match_operand:VF_128 0 "register_operand" "=v") |
8303 | (vec_merge:VF_128 | |
8304 | (vec_merge:VF_128 | |
8305 | (unspec:VF_128 | |
8306 | [(match_operand:VF_128 1 "register_operand" "0") | |
8307 | (match_operand:VF_128 2 "register_operand" "v") | |
dbfe84d5 | 8308 | (match_operand:<sseintvecmode> 3 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>") |
5220cab6 | 8309 | (match_operand:SI 4 "const_0_to_255_operand")] |
8310 | UNSPEC_FIXUPIMM) | |
8311 | (match_dup 1) | |
8312 | (const_int 1)) | |
8313 | (match_dup 1) | |
a31e7f46 | 8314 | (match_operand:<avx512fmaskmode> 5 "register_operand" "Yk")))] |
5220cab6 | 8315 | "TARGET_AVX512F" |
dbfe84d5 | 8316 | "vfixupimm<ssescalarmodesuffix>\t{%4, <round_saeonly_op6>%3, %2, %0%{%5%}|%0%{%5%}, %2, %3<round_saeonly_op6>, %4}"; |
5220cab6 | 8317 | [(set_attr "prefix" "evex") |
8318 | (set_attr "mode" "<ssescalarmode>")]) | |
8319 | ||
250533c0 | 8320 | (define_insn "<avx512>_rndscale<mode><mask_name><round_saeonly_name>" |
8321 | [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v") | |
8322 | (unspec:VF_AVX512VL | |
8323 | [(match_operand:VF_AVX512VL 1 "nonimmediate_operand" "<round_saeonly_constraint>") | |
85065932 | 8324 | (match_operand:SI 2 "const_0_to_255_operand")] |
8325 | UNSPEC_ROUND))] | |
6615b722 | 8326 | "TARGET_AVX512F" |
dbfe84d5 | 8327 | "vrndscale<ssemodesuffix>\t{%2, <round_saeonly_mask_op3>%1, %0<mask_operand3>|%0<mask_operand3>, %1<round_saeonly_mask_op3>, %2}" |
6615b722 | 8328 | [(set_attr "length_immediate" "1") |
8329 | (set_attr "prefix" "evex") | |
8330 | (set_attr "mode" "<MODE>")]) | |
8331 | ||
0b7cc9c6 | 8332 | (define_insn "avx512f_rndscale<mode><round_saeonly_name>" |
85065932 | 8333 | [(set (match_operand:VF_128 0 "register_operand" "=v") |
8334 | (vec_merge:VF_128 | |
8335 | (unspec:VF_128 | |
8336 | [(match_operand:VF_128 1 "register_operand" "v") | |
fbf4df62 | 8337 | (match_operand:VF_128 2 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>") |
85065932 | 8338 | (match_operand:SI 3 "const_0_to_255_operand")] |
8339 | UNSPEC_ROUND) | |
8340 | (match_dup 1) | |
8341 | (const_int 1)))] | |
8342 | "TARGET_AVX512F" | |
0b7cc9c6 | 8343 | "vrndscale<ssescalarmodesuffix>\t{%3, <round_saeonly_op4>%2, %1, %0|%0, %1, %2<round_saeonly_op4>, %3}" |
85065932 | 8344 | [(set_attr "length_immediate" "1") |
8345 | (set_attr "prefix" "evex") | |
8346 | (set_attr "mode" "<MODE>")]) | |
8347 | ||
697a43f8 | 8348 | ;; One bit in mask selects 2 elements. |
5220cab6 | 8349 | (define_insn "avx512f_shufps512_1<mask_name>" |
697a43f8 | 8350 | [(set (match_operand:V16SF 0 "register_operand" "=v") |
8351 | (vec_select:V16SF | |
8352 | (vec_concat:V32SF | |
8353 | (match_operand:V16SF 1 "register_operand" "v") | |
8354 | (match_operand:V16SF 2 "nonimmediate_operand" "vm")) | |
8355 | (parallel [(match_operand 3 "const_0_to_3_operand") | |
8356 | (match_operand 4 "const_0_to_3_operand") | |
8357 | (match_operand 5 "const_16_to_19_operand") | |
8358 | (match_operand 6 "const_16_to_19_operand") | |
8359 | (match_operand 7 "const_4_to_7_operand") | |
8360 | (match_operand 8 "const_4_to_7_operand") | |
8361 | (match_operand 9 "const_20_to_23_operand") | |
8362 | (match_operand 10 "const_20_to_23_operand") | |
8363 | (match_operand 11 "const_8_to_11_operand") | |
8364 | (match_operand 12 "const_8_to_11_operand") | |
8365 | (match_operand 13 "const_24_to_27_operand") | |
8366 | (match_operand 14 "const_24_to_27_operand") | |
8367 | (match_operand 15 "const_12_to_15_operand") | |
8368 | (match_operand 16 "const_12_to_15_operand") | |
8369 | (match_operand 17 "const_28_to_31_operand") | |
8370 | (match_operand 18 "const_28_to_31_operand")])))] | |
8371 | "TARGET_AVX512F | |
8372 | && (INTVAL (operands[3]) == (INTVAL (operands[7]) - 4) | |
8373 | && INTVAL (operands[4]) == (INTVAL (operands[8]) - 4) | |
8374 | && INTVAL (operands[5]) == (INTVAL (operands[9]) - 4) | |
8375 | && INTVAL (operands[6]) == (INTVAL (operands[10]) - 4) | |
8376 | && INTVAL (operands[3]) == (INTVAL (operands[11]) - 8) | |
8377 | && INTVAL (operands[4]) == (INTVAL (operands[12]) - 8) | |
8378 | && INTVAL (operands[5]) == (INTVAL (operands[13]) - 8) | |
8379 | && INTVAL (operands[6]) == (INTVAL (operands[14]) - 8) | |
8380 | && INTVAL (operands[3]) == (INTVAL (operands[15]) - 12) | |
8381 | && INTVAL (operands[4]) == (INTVAL (operands[16]) - 12) | |
8382 | && INTVAL (operands[5]) == (INTVAL (operands[17]) - 12) | |
8383 | && INTVAL (operands[6]) == (INTVAL (operands[18]) - 12))" | |
8384 | { | |
8385 | int mask; | |
8386 | mask = INTVAL (operands[3]); | |
8387 | mask |= INTVAL (operands[4]) << 2; | |
8388 | mask |= (INTVAL (operands[5]) - 16) << 4; | |
8389 | mask |= (INTVAL (operands[6]) - 16) << 6; | |
8390 | operands[3] = GEN_INT (mask); | |
8391 | ||
5220cab6 | 8392 | return "vshufps\t{%3, %2, %1, %0<mask_operand19>|%0<mask_operand19>, %1, %2, %3}"; |
697a43f8 | 8393 | } |
8394 | [(set_attr "type" "sselog") | |
8395 | (set_attr "length_immediate" "1") | |
8396 | (set_attr "prefix" "evex") | |
8397 | (set_attr "mode" "V16SF")]) | |
8398 | ||
5220cab6 | 8399 | (define_expand "avx512f_shufpd512_mask" |
8400 | [(match_operand:V8DF 0 "register_operand") | |
8401 | (match_operand:V8DF 1 "register_operand") | |
8402 | (match_operand:V8DF 2 "nonimmediate_operand") | |
8403 | (match_operand:SI 3 "const_0_to_255_operand") | |
8404 | (match_operand:V8DF 4 "register_operand") | |
8405 | (match_operand:QI 5 "register_operand")] | |
8406 | "TARGET_AVX512F" | |
8407 | { | |
8408 | int mask = INTVAL (operands[3]); | |
8409 | emit_insn (gen_avx512f_shufpd512_1_mask (operands[0], operands[1], operands[2], | |
8410 | GEN_INT (mask & 1), | |
8411 | GEN_INT (mask & 2 ? 9 : 8), | |
8412 | GEN_INT (mask & 4 ? 3 : 2), | |
8413 | GEN_INT (mask & 8 ? 11 : 10), | |
8414 | GEN_INT (mask & 16 ? 5 : 4), | |
8415 | GEN_INT (mask & 32 ? 13 : 12), | |
8416 | GEN_INT (mask & 64 ? 7 : 6), | |
8417 | GEN_INT (mask & 128 ? 15 : 14), | |
8418 | operands[4], operands[5])); | |
8419 | DONE; | |
8420 | }) | |
8421 | ||
8422 | (define_insn "avx512f_shufpd512_1<mask_name>" | |
697a43f8 | 8423 | [(set (match_operand:V8DF 0 "register_operand" "=v") |
8424 | (vec_select:V8DF | |
8425 | (vec_concat:V16DF | |
8426 | (match_operand:V8DF 1 "register_operand" "v") | |
8427 | (match_operand:V8DF 2 "nonimmediate_operand" "vm")) | |
8428 | (parallel [(match_operand 3 "const_0_to_1_operand") | |
8429 | (match_operand 4 "const_8_to_9_operand") | |
8430 | (match_operand 5 "const_2_to_3_operand") | |
8431 | (match_operand 6 "const_10_to_11_operand") | |
8432 | (match_operand 7 "const_4_to_5_operand") | |
8433 | (match_operand 8 "const_12_to_13_operand") | |
8434 | (match_operand 9 "const_6_to_7_operand") | |
8435 | (match_operand 10 "const_14_to_15_operand")])))] | |
8436 | "TARGET_AVX512F" | |
8437 | { | |
8438 | int mask; | |
8439 | mask = INTVAL (operands[3]); | |
8440 | mask |= (INTVAL (operands[4]) - 8) << 1; | |
8441 | mask |= (INTVAL (operands[5]) - 2) << 2; | |
8442 | mask |= (INTVAL (operands[6]) - 10) << 3; | |
8443 | mask |= (INTVAL (operands[7]) - 4) << 4; | |
8444 | mask |= (INTVAL (operands[8]) - 12) << 5; | |
8445 | mask |= (INTVAL (operands[9]) - 6) << 6; | |
8446 | mask |= (INTVAL (operands[10]) - 14) << 7; | |
8447 | operands[3] = GEN_INT (mask); | |
8448 | ||
5220cab6 | 8449 | return "vshufpd\t{%3, %2, %1, %0<mask_operand11>|%0<mask_operand11>, %1, %2, %3}"; |
697a43f8 | 8450 | } |
8451 | [(set_attr "type" "sselog") | |
8452 | (set_attr "length_immediate" "1") | |
8453 | (set_attr "prefix" "evex") | |
8454 | (set_attr "mode" "V8DF")]) | |
8455 | ||
a17ccedb | 8456 | (define_expand "avx_shufpd256<mask_expand4_name>" |
abd4f58b | 8457 | [(match_operand:V4DF 0 "register_operand") |
8458 | (match_operand:V4DF 1 "register_operand") | |
8459 | (match_operand:V4DF 2 "nonimmediate_operand") | |
8460 | (match_operand:SI 3 "const_int_operand")] | |
ed30e0a6 | 8461 | "TARGET_AVX" |
8462 | { | |
8463 | int mask = INTVAL (operands[3]); | |
a17ccedb | 8464 | emit_insn (gen_avx_shufpd256_1<mask_expand4_name> (operands[0], |
8465 | operands[1], | |
8466 | operands[2], | |
8467 | GEN_INT (mask & 1), | |
8468 | GEN_INT (mask & 2 ? 5 : 4), | |
8469 | GEN_INT (mask & 4 ? 3 : 2), | |
8470 | GEN_INT (mask & 8 ? 7 : 6) | |
8471 | <mask_expand4_args>)); | |
ed30e0a6 | 8472 | DONE; |
8473 | }) | |
8474 | ||
a17ccedb | 8475 | (define_insn "avx_shufpd256_1<mask_name>" |
8476 | [(set (match_operand:V4DF 0 "register_operand" "=v") | |
ed30e0a6 | 8477 | (vec_select:V4DF |
8478 | (vec_concat:V8DF | |
a17ccedb | 8479 | (match_operand:V4DF 1 "register_operand" "v") |
8480 | (match_operand:V4DF 2 "nonimmediate_operand" "vm")) | |
abd4f58b | 8481 | (parallel [(match_operand 3 "const_0_to_1_operand") |
8482 | (match_operand 4 "const_4_to_5_operand") | |
8483 | (match_operand 5 "const_2_to_3_operand") | |
8484 | (match_operand 6 "const_6_to_7_operand")])))] | |
a17ccedb | 8485 | "TARGET_AVX && <mask_avx512vl_condition>" |
ed30e0a6 | 8486 | { |
8487 | int mask; | |
8488 | mask = INTVAL (operands[3]); | |
8489 | mask |= (INTVAL (operands[4]) - 4) << 1; | |
8490 | mask |= (INTVAL (operands[5]) - 2) << 2; | |
8491 | mask |= (INTVAL (operands[6]) - 6) << 3; | |
8492 | operands[3] = GEN_INT (mask); | |
8493 | ||
a17ccedb | 8494 | return "vshufpd\t{%3, %2, %1, %0<mask_operand7>|%0<mask_operand7>, %1, %2, %3}"; |
ed30e0a6 | 8495 | } |
77aff08f | 8496 | [(set_attr "type" "sseshuf") |
00a0e418 | 8497 | (set_attr "length_immediate" "1") |
ed30e0a6 | 8498 | (set_attr "prefix" "vex") |
8499 | (set_attr "mode" "V4DF")]) | |
8500 | ||
a17ccedb | 8501 | (define_expand "sse2_shufpd<mask_expand4_name>" |
abd4f58b | 8502 | [(match_operand:V2DF 0 "register_operand") |
8503 | (match_operand:V2DF 1 "register_operand") | |
8504 | (match_operand:V2DF 2 "nonimmediate_operand") | |
8505 | (match_operand:SI 3 "const_int_operand")] | |
5802c0cb | 8506 | "TARGET_SSE2" |
8507 | { | |
8508 | int mask = INTVAL (operands[3]); | |
a17ccedb | 8509 | emit_insn (gen_sse2_shufpd_v2df<mask_expand4_name> (operands[0], operands[1], |
8510 | operands[2], GEN_INT (mask & 1), | |
8511 | GEN_INT (mask & 2 ? 3 : 2) | |
8512 | <mask_expand4_args>)); | |
5802c0cb | 8513 | DONE; |
8514 | }) | |
8515 | ||
a17ccedb | 8516 | (define_insn "sse2_shufpd_v2df_mask" |
8517 | [(set (match_operand:V2DF 0 "register_operand" "=v") | |
8518 | (vec_merge:V2DF | |
8519 | (vec_select:V2DF | |
8520 | (vec_concat:V4DF | |
8521 | (match_operand:V2DF 1 "register_operand" "v") | |
8522 | (match_operand:V2DF 2 "nonimmediate_operand" "vm")) | |
8523 | (parallel [(match_operand 3 "const_0_to_1_operand") | |
8524 | (match_operand 4 "const_2_to_3_operand")])) | |
8525 | (match_operand:V2DF 5 "vector_move_operand" "0C") | |
8526 | (match_operand:QI 6 "register_operand" "Yk")))] | |
8527 | "TARGET_AVX512VL" | |
8528 | { | |
8529 | int mask; | |
8530 | mask = INTVAL (operands[3]); | |
8531 | mask |= (INTVAL (operands[4]) - 2) << 1; | |
8532 | operands[3] = GEN_INT (mask); | |
8533 | ||
8534 | return "vshufpd\t{%3, %2, %1, %0%{%6%}%N5|%0%{6%}%N5, %1, %2, %3}"; | |
8535 | } | |
8536 | [(set_attr "type" "sseshuf") | |
8537 | (set_attr "length_immediate" "1") | |
8538 | (set_attr "prefix" "evex") | |
8539 | (set_attr "mode" "V2DF")]) | |
8540 | ||
a6142438 | 8541 | ;; punpcklqdq and punpckhqdq are shorter than shufpd. |
d5f65ad4 | 8542 | (define_insn "avx2_interleave_highv4di<mask_name>" |
8543 | [(set (match_operand:V4DI 0 "register_operand" "=v") | |
5deb404d | 8544 | (vec_select:V4DI |
8545 | (vec_concat:V8DI | |
d5f65ad4 | 8546 | (match_operand:V4DI 1 "register_operand" "v") |
8547 | (match_operand:V4DI 2 "nonimmediate_operand" "vm")) | |
5deb404d | 8548 | (parallel [(const_int 1) |
8549 | (const_int 5) | |
8550 | (const_int 3) | |
8551 | (const_int 7)])))] | |
d5f65ad4 | 8552 | "TARGET_AVX2 && <mask_avx512vl_condition>" |
8553 | "vpunpckhqdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}" | |
5deb404d | 8554 | [(set_attr "type" "sselog") |
8555 | (set_attr "prefix" "vex") | |
8556 | (set_attr "mode" "OI")]) | |
ed30e0a6 | 8557 | |
5220cab6 | 8558 | (define_insn "<mask_codefor>avx512f_interleave_highv8di<mask_name>" |
697a43f8 | 8559 | [(set (match_operand:V8DI 0 "register_operand" "=v") |
8560 | (vec_select:V8DI | |
8561 | (vec_concat:V16DI | |
8562 | (match_operand:V8DI 1 "register_operand" "v") | |
8563 | (match_operand:V8DI 2 "nonimmediate_operand" "vm")) | |
8564 | (parallel [(const_int 1) (const_int 9) | |
8565 | (const_int 3) (const_int 11) | |
8566 | (const_int 5) (const_int 13) | |
8567 | (const_int 7) (const_int 15)])))] | |
8568 | "TARGET_AVX512F" | |
5220cab6 | 8569 | "vpunpckhqdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}" |
697a43f8 | 8570 | [(set_attr "type" "sselog") |
8571 | (set_attr "prefix" "evex") | |
8572 | (set_attr "mode" "XI")]) | |
8573 | ||
d5f65ad4 | 8574 | (define_insn "vec_interleave_highv2di<mask_name>" |
8575 | [(set (match_operand:V2DI 0 "register_operand" "=x,v") | |
a6142438 | 8576 | (vec_select:V2DI |
8577 | (vec_concat:V4DI | |
d5f65ad4 | 8578 | (match_operand:V2DI 1 "register_operand" "0,v") |
8579 | (match_operand:V2DI 2 "nonimmediate_operand" "xm,vm")) | |
a6142438 | 8580 | (parallel [(const_int 1) |
8581 | (const_int 3)])))] | |
d5f65ad4 | 8582 | "TARGET_SSE2 && <mask_avx512vl_condition>" |
45c0368c | 8583 | "@ |
8584 | punpckhqdq\t{%2, %0|%0, %2} | |
d5f65ad4 | 8585 | vpunpckhqdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}" |
45c0368c | 8586 | [(set_attr "isa" "noavx,avx") |
8587 | (set_attr "type" "sselog") | |
8588 | (set_attr "prefix_data16" "1,*") | |
d5f65ad4 | 8589 | (set_attr "prefix" "orig,<mask_prefix>") |
ed30e0a6 | 8590 | (set_attr "mode" "TI")]) |
8591 | ||
d5f65ad4 | 8592 | (define_insn "avx2_interleave_lowv4di<mask_name>" |
8593 | [(set (match_operand:V4DI 0 "register_operand" "=v") | |
5deb404d | 8594 | (vec_select:V4DI |
8595 | (vec_concat:V8DI | |
d5f65ad4 | 8596 | (match_operand:V4DI 1 "register_operand" "v") |
8597 | (match_operand:V4DI 2 "nonimmediate_operand" "vm")) | |
5deb404d | 8598 | (parallel [(const_int 0) |
8599 | (const_int 4) | |
8600 | (const_int 2) | |
8601 | (const_int 6)])))] | |
d5f65ad4 | 8602 | "TARGET_AVX2 && <mask_avx512vl_condition>" |
8603 | "vpunpcklqdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}" | |
5deb404d | 8604 | [(set_attr "type" "sselog") |
8605 | (set_attr "prefix" "vex") | |
8606 | (set_attr "mode" "OI")]) | |
8607 | ||
5220cab6 | 8608 | (define_insn "<mask_codefor>avx512f_interleave_lowv8di<mask_name>" |
697a43f8 | 8609 | [(set (match_operand:V8DI 0 "register_operand" "=v") |
8610 | (vec_select:V8DI | |
8611 | (vec_concat:V16DI | |
8612 | (match_operand:V8DI 1 "register_operand" "v") | |
8613 | (match_operand:V8DI 2 "nonimmediate_operand" "vm")) | |
8614 | (parallel [(const_int 0) (const_int 8) | |
8615 | (const_int 2) (const_int 10) | |
8616 | (const_int 4) (const_int 12) | |
8617 | (const_int 6) (const_int 14)])))] | |
8618 | "TARGET_AVX512F" | |
5220cab6 | 8619 | "vpunpcklqdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}" |
697a43f8 | 8620 | [(set_attr "type" "sselog") |
8621 | (set_attr "prefix" "evex") | |
8622 | (set_attr "mode" "XI")]) | |
8623 | ||
d5f65ad4 | 8624 | (define_insn "vec_interleave_lowv2di<mask_name>" |
8625 | [(set (match_operand:V2DI 0 "register_operand" "=x,v") | |
a6142438 | 8626 | (vec_select:V2DI |
8627 | (vec_concat:V4DI | |
d5f65ad4 | 8628 | (match_operand:V2DI 1 "register_operand" "0,v") |
8629 | (match_operand:V2DI 2 "nonimmediate_operand" "xm,vm")) | |
a6142438 | 8630 | (parallel [(const_int 0) |
8631 | (const_int 2)])))] | |
d5f65ad4 | 8632 | "TARGET_SSE2 && <mask_avx512vl_condition>" |
45c0368c | 8633 | "@ |
8634 | punpcklqdq\t{%2, %0|%0, %2} | |
d5f65ad4 | 8635 | vpunpcklqdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}" |
45c0368c | 8636 | [(set_attr "isa" "noavx,avx") |
8637 | (set_attr "type" "sselog") | |
8638 | (set_attr "prefix_data16" "1,*") | |
8639 | (set_attr "prefix" "orig,vex") | |
a6142438 | 8640 | (set_attr "mode" "TI")]) |
8641 | ||
56c7c824 | 8642 | (define_insn "sse2_shufpd_<mode>" |
6fe5844b | 8643 | [(set (match_operand:VI8F_128 0 "register_operand" "=x,x") |
8644 | (vec_select:VI8F_128 | |
63d5e521 | 8645 | (vec_concat:<ssedoublevecmode> |
6fe5844b | 8646 | (match_operand:VI8F_128 1 "register_operand" "0,x") |
8647 | (match_operand:VI8F_128 2 "nonimmediate_operand" "xm,xm")) | |
abd4f58b | 8648 | (parallel [(match_operand 3 "const_0_to_1_operand") |
8649 | (match_operand 4 "const_2_to_3_operand")])))] | |
5802c0cb | 8650 | "TARGET_SSE2" |
8651 | { | |
8652 | int mask; | |
8653 | mask = INTVAL (operands[3]); | |
8654 | mask |= (INTVAL (operands[4]) - 2) << 1; | |
8655 | operands[3] = GEN_INT (mask); | |
8656 | ||
45c0368c | 8657 | switch (which_alternative) |
8658 | { | |
8659 | case 0: | |
8660 | return "shufpd\t{%3, %2, %0|%0, %2, %3}"; | |
8661 | case 1: | |
8662 | return "vshufpd\t{%3, %2, %1, %0|%0, %1, %2, %3}"; | |
8663 | default: | |
8664 | gcc_unreachable (); | |
8665 | } | |
5802c0cb | 8666 | } |
45c0368c | 8667 | [(set_attr "isa" "noavx,avx") |
77aff08f | 8668 | (set_attr "type" "sseshuf") |
00a0e418 | 8669 | (set_attr "length_immediate" "1") |
45c0368c | 8670 | (set_attr "prefix" "orig,vex") |
5802c0cb | 8671 | (set_attr "mode" "V2DF")]) |
8672 | ||
6be36710 | 8673 | ;; Avoid combining registers from different units in a single alternative, |
8674 | ;; see comment above inline_secondary_memory_needed function in i386.c | |
5802c0cb | 8675 | (define_insn "sse2_storehpd" |
45c0368c | 8676 | [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x,x,*f,r") |
5802c0cb | 8677 | (vec_select:DF |
45c0368c | 8678 | (match_operand:V2DF 1 "nonimmediate_operand" " x,0,x,o,o,o") |
5802c0cb | 8679 | (parallel [(const_int 1)])))] |
8680 | "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))" | |
8681 | "@ | |
45c0368c | 8682 | %vmovhpd\t{%1, %0|%0, %1} |
5802c0cb | 8683 | unpckhpd\t%0, %0 |
45c0368c | 8684 | vunpckhpd\t{%d1, %0|%0, %d1} |
6be36710 | 8685 | # |
8686 | # | |
5802c0cb | 8687 | #" |
d1c8b778 | 8688 | [(set_attr "isa" "*,noavx,avx,*,*,*") |
45c0368c | 8689 | (set_attr "type" "ssemov,sselog1,sselog1,ssemov,fmov,imov") |
8690 | (set (attr "prefix_data16") | |
8691 | (if_then_else | |
8692 | (and (eq_attr "alternative" "0") | |
6be3efec | 8693 | (not (match_test "TARGET_AVX"))) |
45c0368c | 8694 | (const_string "1") |
8695 | (const_string "*"))) | |
8696 | (set_attr "prefix" "maybe_vex,orig,vex,*,*,*") | |
8697 | (set_attr "mode" "V1DF,V1DF,V2DF,DF,DF,DF")]) | |
5802c0cb | 8698 | |
8699 | (define_split | |
abd4f58b | 8700 | [(set (match_operand:DF 0 "register_operand") |
5802c0cb | 8701 | (vec_select:DF |
abd4f58b | 8702 | (match_operand:V2DF 1 "memory_operand") |
5802c0cb | 8703 | (parallel [(const_int 1)])))] |
8704 | "TARGET_SSE2 && reload_completed" | |
8705 | [(set (match_dup 0) (match_dup 1))] | |
5bd1ff1d | 8706 | "operands[1] = adjust_address (operands[1], DFmode, 8);") |
5802c0cb | 8707 | |
d1c8b778 | 8708 | (define_insn "*vec_extractv2df_1_sse" |
8709 | [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x") | |
8710 | (vec_select:DF | |
8711 | (match_operand:V2DF 1 "nonimmediate_operand" "x,x,o") | |
8712 | (parallel [(const_int 1)])))] | |
8713 | "!TARGET_SSE2 && TARGET_SSE | |
8714 | && !(MEM_P (operands[0]) && MEM_P (operands[1]))" | |
8715 | "@ | |
c358a059 | 8716 | movhps\t{%1, %0|%q0, %1} |
d1c8b778 | 8717 | movhlps\t{%1, %0|%0, %1} |
8718 | movlps\t{%H1, %0|%0, %H1}" | |
8719 | [(set_attr "type" "ssemov") | |
8c1dfa94 | 8720 | (set_attr "ssememalign" "64") |
d1c8b778 | 8721 | (set_attr "mode" "V2SF,V4SF,V2SF")]) |
8722 | ||
6be36710 | 8723 | ;; Avoid combining registers from different units in a single alternative, |
8724 | ;; see comment above inline_secondary_memory_needed function in i386.c | |
5802c0cb | 8725 | (define_insn "sse2_storelpd" |
6be36710 | 8726 | [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x,*f,r") |
5802c0cb | 8727 | (vec_select:DF |
6be36710 | 8728 | (match_operand:V2DF 1 "nonimmediate_operand" " x,x,m,m,m") |
5802c0cb | 8729 | (parallel [(const_int 0)])))] |
8730 | "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))" | |
8731 | "@ | |
ed30e0a6 | 8732 | %vmovlpd\t{%1, %0|%0, %1} |
5802c0cb | 8733 | # |
6be36710 | 8734 | # |
8735 | # | |
5802c0cb | 8736 | #" |
6be36710 | 8737 | [(set_attr "type" "ssemov,ssemov,ssemov,fmov,imov") |
00a0e418 | 8738 | (set_attr "prefix_data16" "1,*,*,*,*") |
ed30e0a6 | 8739 | (set_attr "prefix" "maybe_vex") |
6be36710 | 8740 | (set_attr "mode" "V1DF,DF,DF,DF,DF")]) |
5802c0cb | 8741 | |
8742 | (define_split | |
abd4f58b | 8743 | [(set (match_operand:DF 0 "register_operand") |
5802c0cb | 8744 | (vec_select:DF |
abd4f58b | 8745 | (match_operand:V2DF 1 "nonimmediate_operand") |
5802c0cb | 8746 | (parallel [(const_int 0)])))] |
8747 | "TARGET_SSE2 && reload_completed" | |
573c5512 | 8748 | [(set (match_dup 0) (match_dup 1))] |
5802c0cb | 8749 | { |
573c5512 | 8750 | if (REG_P (operands[1])) |
8751 | operands[1] = gen_rtx_REG (DFmode, REGNO (operands[1])); | |
ad2c46cf | 8752 | else |
573c5512 | 8753 | operands[1] = adjust_address (operands[1], DFmode, 0); |
5802c0cb | 8754 | }) |
8755 | ||
d1c8b778 | 8756 | (define_insn "*vec_extractv2df_0_sse" |
8757 | [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x") | |
8758 | (vec_select:DF | |
8759 | (match_operand:V2DF 1 "nonimmediate_operand" "x,x,m") | |
8760 | (parallel [(const_int 0)])))] | |
8761 | "!TARGET_SSE2 && TARGET_SSE | |
8762 | && !(MEM_P (operands[0]) && MEM_P (operands[1]))" | |
8763 | "@ | |
8764 | movlps\t{%1, %0|%0, %1} | |
8765 | movaps\t{%1, %0|%0, %1} | |
c358a059 | 8766 | movlps\t{%1, %0|%0, %q1}" |
d1c8b778 | 8767 | [(set_attr "type" "ssemov") |
8768 | (set_attr "mode" "V2SF,V4SF,V2SF")]) | |
8769 | ||
2485795e | 8770 | (define_expand "sse2_loadhpd_exp" |
abd4f58b | 8771 | [(set (match_operand:V2DF 0 "nonimmediate_operand") |
7c839b3f | 8772 | (vec_concat:V2DF |
8773 | (vec_select:DF | |
abd4f58b | 8774 | (match_operand:V2DF 1 "nonimmediate_operand") |
7c839b3f | 8775 | (parallel [(const_int 0)])) |
abd4f58b | 8776 | (match_operand:DF 2 "nonimmediate_operand")))] |
7c839b3f | 8777 | "TARGET_SSE2" |
cc05a422 | 8778 | { |
8779 | rtx dst = ix86_fixup_binary_operands (UNKNOWN, V2DFmode, operands); | |
33541f98 | 8780 | |
cc05a422 | 8781 | emit_insn (gen_sse2_loadhpd (dst, operands[1], operands[2])); |
8782 | ||
8783 | /* Fix up the destination if needed. */ | |
8784 | if (dst != operands[0]) | |
8785 | emit_move_insn (operands[0], dst); | |
8786 | ||
8787 | DONE; | |
8788 | }) | |
7c839b3f | 8789 | |
6be36710 | 8790 | ;; Avoid combining registers from different units in a single alternative, |
8791 | ;; see comment above inline_secondary_memory_needed function in i386.c | |
2485795e | 8792 | (define_insn "sse2_loadhpd" |
45c0368c | 8793 | [(set (match_operand:V2DF 0 "nonimmediate_operand" |
23372c6e | 8794 | "=x,x,x,x,o,o ,o") |
5802c0cb | 8795 | (vec_concat:V2DF |
8796 | (vec_select:DF | |
45c0368c | 8797 | (match_operand:V2DF 1 "nonimmediate_operand" |
23372c6e | 8798 | " 0,x,0,x,0,0 ,0") |
5802c0cb | 8799 | (parallel [(const_int 0)])) |
45c0368c | 8800 | (match_operand:DF 2 "nonimmediate_operand" |
23372c6e | 8801 | " m,m,x,x,x,*f,r")))] |
5c752e47 | 8802 | "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))" |
5802c0cb | 8803 | "@ |
8804 | movhpd\t{%2, %0|%0, %2} | |
45c0368c | 8805 | vmovhpd\t{%2, %1, %0|%0, %1, %2} |
5802c0cb | 8806 | unpcklpd\t{%2, %0|%0, %2} |
45c0368c | 8807 | vunpcklpd\t{%2, %1, %0|%0, %1, %2} |
6be36710 | 8808 | # |
8809 | # | |
5802c0cb | 8810 | #" |
d1c8b778 | 8811 | [(set_attr "isa" "noavx,avx,noavx,avx,*,*,*") |
23372c6e | 8812 | (set_attr "type" "ssemov,ssemov,sselog,sselog,ssemov,fmov,imov") |
8c1dfa94 | 8813 | (set_attr "ssememalign" "64") |
23372c6e | 8814 | (set_attr "prefix_data16" "1,*,*,*,*,*,*") |
8815 | (set_attr "prefix" "orig,vex,orig,vex,*,*,*") | |
8816 | (set_attr "mode" "V1DF,V1DF,V2DF,V2DF,DF,DF,DF")]) | |
5802c0cb | 8817 | |
8818 | (define_split | |
abd4f58b | 8819 | [(set (match_operand:V2DF 0 "memory_operand") |
5802c0cb | 8820 | (vec_concat:V2DF |
8821 | (vec_select:DF (match_dup 0) (parallel [(const_int 0)])) | |
abd4f58b | 8822 | (match_operand:DF 1 "register_operand")))] |
5802c0cb | 8823 | "TARGET_SSE2 && reload_completed" |
8824 | [(set (match_dup 0) (match_dup 1))] | |
5bd1ff1d | 8825 | "operands[0] = adjust_address (operands[0], DFmode, 8);") |
5802c0cb | 8826 | |
2485795e | 8827 | (define_expand "sse2_loadlpd_exp" |
abd4f58b | 8828 | [(set (match_operand:V2DF 0 "nonimmediate_operand") |
7c839b3f | 8829 | (vec_concat:V2DF |
abd4f58b | 8830 | (match_operand:DF 2 "nonimmediate_operand") |
7c839b3f | 8831 | (vec_select:DF |
abd4f58b | 8832 | (match_operand:V2DF 1 "nonimmediate_operand") |
7c839b3f | 8833 | (parallel [(const_int 1)]))))] |
8834 | "TARGET_SSE2" | |
cc05a422 | 8835 | { |
8836 | rtx dst = ix86_fixup_binary_operands (UNKNOWN, V2DFmode, operands); | |
33541f98 | 8837 | |
cc05a422 | 8838 | emit_insn (gen_sse2_loadlpd (dst, operands[1], operands[2])); |
8839 | ||
8840 | /* Fix up the destination if needed. */ | |
8841 | if (dst != operands[0]) | |
8842 | emit_move_insn (operands[0], dst); | |
8843 | ||
8844 | DONE; | |
8845 | }) | |
7c839b3f | 8846 | |
6be36710 | 8847 | ;; Avoid combining registers from different units in a single alternative, |
8848 | ;; see comment above inline_secondary_memory_needed function in i386.c | |
2485795e | 8849 | (define_insn "sse2_loadlpd" |
45c0368c | 8850 | [(set (match_operand:V2DF 0 "nonimmediate_operand" |
8851 | "=x,x,x,x,x,x,x,x,m,m ,m") | |
5802c0cb | 8852 | (vec_concat:V2DF |
45c0368c | 8853 | (match_operand:DF 2 "nonimmediate_operand" |
8854 | " m,m,m,x,x,0,0,x,x,*f,r") | |
5802c0cb | 8855 | (vec_select:DF |
45c0368c | 8856 | (match_operand:V2DF 1 "vector_move_operand" |
8857 | " C,0,x,0,x,x,o,o,0,0 ,0") | |
5802c0cb | 8858 | (parallel [(const_int 1)]))))] |
8859 | "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))" | |
8860 | "@ | |
45c0368c | 8861 | %vmovsd\t{%2, %0|%0, %2} |
5802c0cb | 8862 | movlpd\t{%2, %0|%0, %2} |
45c0368c | 8863 | vmovlpd\t{%2, %1, %0|%0, %1, %2} |
5802c0cb | 8864 | movsd\t{%2, %0|%0, %2} |
45c0368c | 8865 | vmovsd\t{%2, %1, %0|%0, %1, %2} |
ff6a33df | 8866 | shufpd\t{$2, %1, %0|%0, %1, 2} |
5802c0cb | 8867 | movhpd\t{%H1, %0|%0, %H1} |
45c0368c | 8868 | vmovhpd\t{%H1, %2, %0|%0, %2, %H1} |
6be36710 | 8869 | # |
8870 | # | |
5802c0cb | 8871 | #" |
d1c8b778 | 8872 | [(set_attr "isa" "*,noavx,avx,noavx,avx,noavx,noavx,avx,*,*,*") |
8873 | (set (attr "type") | |
8874 | (cond [(eq_attr "alternative" "5") | |
8875 | (const_string "sselog") | |
8876 | (eq_attr "alternative" "9") | |
8877 | (const_string "fmov") | |
8878 | (eq_attr "alternative" "10") | |
8879 | (const_string "imov") | |
8880 | ] | |
8881 | (const_string "ssemov"))) | |
8c1dfa94 | 8882 | (set_attr "ssememalign" "64") |
45c0368c | 8883 | (set_attr "prefix_data16" "*,1,*,*,*,*,1,*,*,*,*") |
8884 | (set_attr "length_immediate" "*,*,*,*,*,1,*,*,*,*,*") | |
8885 | (set_attr "prefix" "maybe_vex,orig,vex,orig,vex,orig,orig,vex,*,*,*") | |
8886 | (set_attr "mode" "DF,V1DF,V1DF,V1DF,V1DF,V2DF,V1DF,V1DF,DF,DF,DF")]) | |
5802c0cb | 8887 | |
8888 | (define_split | |
abd4f58b | 8889 | [(set (match_operand:V2DF 0 "memory_operand") |
5802c0cb | 8890 | (vec_concat:V2DF |
abd4f58b | 8891 | (match_operand:DF 1 "register_operand") |
5802c0cb | 8892 | (vec_select:DF (match_dup 0) (parallel [(const_int 1)]))))] |
8893 | "TARGET_SSE2 && reload_completed" | |
8894 | [(set (match_dup 0) (match_dup 1))] | |
9af8c7c5 | 8895 | "operands[0] = adjust_address (operands[0], DFmode, 0);") |
5802c0cb | 8896 | |
5802c0cb | 8897 | (define_insn "sse2_movsd" |
45c0368c | 8898 | [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,x,m,x,x,x,o") |
5802c0cb | 8899 | (vec_merge:V2DF |
45c0368c | 8900 | (match_operand:V2DF 2 "nonimmediate_operand" " x,x,m,m,x,0,0,x,0") |
8901 | (match_operand:V2DF 1 "nonimmediate_operand" " 0,x,0,x,0,x,o,o,x") | |
5802c0cb | 8902 | (const_int 1)))] |
8903 | "TARGET_SSE2" | |
8904 | "@ | |
8905 | movsd\t{%2, %0|%0, %2} | |
45c0368c | 8906 | vmovsd\t{%2, %1, %0|%0, %1, %2} |
c358a059 | 8907 | movlpd\t{%2, %0|%0, %q2} |
8908 | vmovlpd\t{%2, %1, %0|%0, %1, %q2} | |
8909 | %vmovlpd\t{%2, %0|%q0, %2} | |
ff6a33df | 8910 | shufpd\t{$2, %1, %0|%0, %1, 2} |
b2266391 | 8911 | movhps\t{%H1, %0|%0, %H1} |
45c0368c | 8912 | vmovhps\t{%H1, %2, %0|%0, %2, %H1} |
8913 | %vmovhps\t{%1, %H0|%H0, %1}" | |
d1c8b778 | 8914 | [(set_attr "isa" "noavx,avx,noavx,avx,*,noavx,noavx,avx,*") |
8915 | (set (attr "type") | |
8916 | (if_then_else | |
8917 | (eq_attr "alternative" "5") | |
8918 | (const_string "sselog") | |
8919 | (const_string "ssemov"))) | |
45c0368c | 8920 | (set (attr "prefix_data16") |
8921 | (if_then_else | |
8922 | (and (eq_attr "alternative" "2,4") | |
6be3efec | 8923 | (not (match_test "TARGET_AVX"))) |
45c0368c | 8924 | (const_string "1") |
8925 | (const_string "*"))) | |
8926 | (set_attr "length_immediate" "*,*,*,*,*,1,*,*,*") | |
8c1dfa94 | 8927 | (set_attr "ssememalign" "64") |
45c0368c | 8928 | (set_attr "prefix" "orig,vex,orig,vex,maybe_vex,orig,orig,vex,maybe_vex") |
8929 | (set_attr "mode" "DF,DF,V1DF,V1DF,V1DF,V2DF,V1DF,V1DF,V1DF")]) | |
5802c0cb | 8930 | |
adea432f | 8931 | (define_insn "vec_dupv2df<mask_name>" |
b2eda4e9 | 8932 | [(set (match_operand:V2DF 0 "register_operand" "=x,x,v") |
a17124a0 | 8933 | (vec_duplicate:V2DF |
b2eda4e9 | 8934 | (match_operand:DF 1 "nonimmediate_operand" " 0,xm,vm")))] |
adea432f | 8935 | "TARGET_SSE2 && <mask_avx512vl_condition>" |
eea5ff47 | 8936 | "@ |
8937 | unpcklpd\t%0, %0 | |
b2eda4e9 | 8938 | %vmovddup\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1} |
8939 | vmovddup\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}" | |
8940 | [(set_attr "isa" "noavx,sse3,avx512vl") | |
eea5ff47 | 8941 | (set_attr "type" "sselog1") |
b2eda4e9 | 8942 | (set_attr "prefix" "orig,maybe_vex,evex") |
8943 | (set_attr "mode" "V2DF,DF,DF")]) | |
ad2c46cf | 8944 | |
ad2c46cf | 8945 | (define_insn "*vec_concatv2df" |
b2eda4e9 | 8946 | [(set (match_operand:V2DF 0 "register_operand" "=x,x,v,x,v,x,x,v,x,x") |
ad2c46cf | 8947 | (vec_concat:V2DF |
b2eda4e9 | 8948 | (match_operand:DF 1 "nonimmediate_operand" " 0,x,v,m,m,0,x,m,0,0") |
8949 | (match_operand:DF 2 "vector_move_operand" " x,x,v,1,1,m,m,C,x,m")))] | |
0a281fd0 | 8950 | "TARGET_SSE |
8951 | && (!(MEM_P (operands[1]) && MEM_P (operands[2])) | |
8952 | || (TARGET_SSE3 && rtx_equal_p (operands[1], operands[2])))" | |
ad2c46cf | 8953 | "@ |
8954 | unpcklpd\t{%2, %0|%0, %2} | |
45c0368c | 8955 | vunpcklpd\t{%2, %1, %0|%0, %1, %2} |
b2eda4e9 | 8956 | vunpcklpd\t{%2, %1, %0|%0, %1, %2} |
eea5ff47 | 8957 | %vmovddup\t{%1, %0|%0, %1} |
b2eda4e9 | 8958 | vmovddup\t{%1, %0|%0, %1} |
ad2c46cf | 8959 | movhpd\t{%2, %0|%0, %2} |
45c0368c | 8960 | vmovhpd\t{%2, %1, %0|%0, %1, %2} |
8961 | %vmovsd\t{%1, %0|%0, %1} | |
ad2c46cf | 8962 | movlhps\t{%2, %0|%0, %2} |
8963 | movhps\t{%2, %0|%0, %2}" | |
b2eda4e9 | 8964 | [(set_attr "isa" "sse2_noavx,avx,avx512vl,sse3,avx512vl,sse2_noavx,avx,sse2,noavx,noavx") |
d1c8b778 | 8965 | (set (attr "type") |
8966 | (if_then_else | |
7bb3b827 | 8967 | (eq_attr "alternative" "0,1,2,3,4") |
d1c8b778 | 8968 | (const_string "sselog") |
8969 | (const_string "ssemov"))) | |
b2eda4e9 | 8970 | (set (attr "prefix_data16") |
8971 | (if_then_else (eq_attr "alternative" "5") | |
8972 | (const_string "1") | |
8973 | (const_string "*"))) | |
8974 | (set_attr "prefix" "orig,vex,evex,maybe_vex,evex,orig,vex,maybe_vex,orig,orig") | |
8975 | (set_attr "mode" "V2DF,V2DF,V2DF, DF, DF, V1DF,V1DF,DF,V4SF,V2SF")]) | |
ad2c46cf | 8976 | |
697a43f8 | 8977 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; |
8978 | ;; | |
8979 | ;; Parallel integer down-conversion operations | |
8980 | ;; | |
8981 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | |
8982 | ||
da982d5c | 8983 | (define_mode_iterator PMOV_DST_MODE_1 [V16QI V16HI V8SI V8HI]) |
697a43f8 | 8984 | (define_mode_attr pmov_src_mode |
8985 | [(V16QI "V16SI") (V16HI "V16SI") (V8SI "V8DI") (V8HI "V8DI")]) | |
8986 | (define_mode_attr pmov_src_lower | |
8987 | [(V16QI "v16si") (V16HI "v16si") (V8SI "v8di") (V8HI "v8di")]) | |
da982d5c | 8988 | (define_mode_attr pmov_suff_1 |
697a43f8 | 8989 | [(V16QI "db") (V16HI "dw") (V8SI "qd") (V8HI "qw")]) |
8990 | ||
8991 | (define_insn "*avx512f_<code><pmov_src_lower><mode>2" | |
da982d5c | 8992 | [(set (match_operand:PMOV_DST_MODE_1 0 "nonimmediate_operand" "=v,m") |
8993 | (any_truncate:PMOV_DST_MODE_1 | |
697a43f8 | 8994 | (match_operand:<pmov_src_mode> 1 "register_operand" "v,v")))] |
8995 | "TARGET_AVX512F" | |
da982d5c | 8996 | "vpmov<trunsuffix><pmov_suff_1>\t{%1, %0|%0, %1}" |
697a43f8 | 8997 | [(set_attr "type" "ssemov") |
8998 | (set_attr "memory" "none,store") | |
8999 | (set_attr "prefix" "evex") | |
9000 | (set_attr "mode" "<sseinsnmode>")]) | |
9001 | ||
5220cab6 | 9002 | (define_insn "avx512f_<code><pmov_src_lower><mode>2_mask" |
da982d5c | 9003 | [(set (match_operand:PMOV_DST_MODE_1 0 "nonimmediate_operand" "=v,m") |
9004 | (vec_merge:PMOV_DST_MODE_1 | |
9005 | (any_truncate:PMOV_DST_MODE_1 | |
5220cab6 | 9006 | (match_operand:<pmov_src_mode> 1 "register_operand" "v,v")) |
da982d5c | 9007 | (match_operand:PMOV_DST_MODE_1 2 "vector_move_operand" "0C,0") |
a31e7f46 | 9008 | (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk,Yk")))] |
5220cab6 | 9009 | "TARGET_AVX512F" |
da982d5c | 9010 | "vpmov<trunsuffix><pmov_suff_1>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}" |
5220cab6 | 9011 | [(set_attr "type" "ssemov") |
9012 | (set_attr "memory" "none,store") | |
9013 | (set_attr "prefix" "evex") | |
9014 | (set_attr "mode" "<sseinsnmode>")]) | |
9015 | ||
f4a19f2a | 9016 | (define_expand "avx512f_<code><pmov_src_lower><mode>2_mask_store" |
da982d5c | 9017 | [(set (match_operand:PMOV_DST_MODE_1 0 "memory_operand") |
9018 | (vec_merge:PMOV_DST_MODE_1 | |
9019 | (any_truncate:PMOV_DST_MODE_1 | |
f4a19f2a | 9020 | (match_operand:<pmov_src_mode> 1 "register_operand")) |
9021 | (match_dup 0) | |
9022 | (match_operand:<avx512fmaskmode> 2 "register_operand")))] | |
9023 | "TARGET_AVX512F") | |
9024 | ||
8f83f53e | 9025 | (define_insn "avx512bw_<code>v32hiv32qi2" |
da982d5c | 9026 | [(set (match_operand:V32QI 0 "nonimmediate_operand" "=v,m") |
9027 | (any_truncate:V32QI | |
9028 | (match_operand:V32HI 1 "register_operand" "v,v")))] | |
9029 | "TARGET_AVX512BW" | |
9030 | "vpmov<trunsuffix>wb\t{%1, %0|%0, %1}" | |
9031 | [(set_attr "type" "ssemov") | |
9032 | (set_attr "memory" "none,store") | |
9033 | (set_attr "prefix" "evex") | |
9034 | (set_attr "mode" "XI")]) | |
9035 | ||
9036 | (define_insn "avx512bw_<code>v32hiv32qi2_mask" | |
9037 | [(set (match_operand:V32QI 0 "nonimmediate_operand" "=v,m") | |
9038 | (vec_merge:V32QI | |
9039 | (any_truncate:V32QI | |
9040 | (match_operand:V32HI 1 "register_operand" "v,v")) | |
9041 | (match_operand:V32QI 2 "vector_move_operand" "0C,0") | |
9042 | (match_operand:SI 3 "register_operand" "Yk,Yk")))] | |
9043 | "TARGET_AVX512BW" | |
9044 | "vpmov<trunsuffix>wb\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}" | |
9045 | [(set_attr "type" "ssemov") | |
9046 | (set_attr "memory" "none,store") | |
9047 | (set_attr "prefix" "evex") | |
9048 | (set_attr "mode" "XI")]) | |
9049 | ||
9050 | (define_expand "avx512bw_<code>v32hiv32qi2_mask_store" | |
9051 | [(set (match_operand:V32QI 0 "nonimmediate_operand") | |
9052 | (vec_merge:V32QI | |
9053 | (any_truncate:V32QI | |
9054 | (match_operand:V32HI 1 "register_operand")) | |
9055 | (match_dup 0) | |
9056 | (match_operand:SI 2 "register_operand")))] | |
9057 | "TARGET_AVX512BW") | |
9058 | ||
9059 | (define_mode_iterator PMOV_DST_MODE_2 | |
9060 | [V4SI V8HI (V16QI "TARGET_AVX512BW")]) | |
9061 | (define_mode_attr pmov_suff_2 | |
9062 | [(V16QI "wb") (V8HI "dw") (V4SI "qd")]) | |
9063 | ||
9064 | (define_insn "*avx512vl_<code><ssedoublemodelower><mode>2" | |
9065 | [(set (match_operand:PMOV_DST_MODE_2 0 "nonimmediate_operand" "=v,m") | |
9066 | (any_truncate:PMOV_DST_MODE_2 | |
9067 | (match_operand:<ssedoublemode> 1 "register_operand" "v,v")))] | |
9068 | "TARGET_AVX512VL" | |
9069 | "vpmov<trunsuffix><pmov_suff_2>\t{%1, %0|%0, %1}" | |
9070 | [(set_attr "type" "ssemov") | |
9071 | (set_attr "memory" "none,store") | |
9072 | (set_attr "prefix" "evex") | |
9073 | (set_attr "mode" "<sseinsnmode>")]) | |
9074 | ||
9075 | (define_insn "<avx512>_<code><ssedoublemodelower><mode>2_mask" | |
9076 | [(set (match_operand:PMOV_DST_MODE_2 0 "nonimmediate_operand" "=v,m") | |
9077 | (vec_merge:PMOV_DST_MODE_2 | |
9078 | (any_truncate:PMOV_DST_MODE_2 | |
9079 | (match_operand:<ssedoublemode> 1 "register_operand" "v,v")) | |
9080 | (match_operand:PMOV_DST_MODE_2 2 "vector_move_operand" "0C,0") | |
9081 | (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk,Yk")))] | |
9082 | "TARGET_AVX512VL" | |
9083 | "vpmov<trunsuffix><pmov_suff_2>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}" | |
9084 | [(set_attr "type" "ssemov") | |
9085 | (set_attr "memory" "none,store") | |
9086 | (set_attr "prefix" "evex") | |
9087 | (set_attr "mode" "<sseinsnmode>")]) | |
9088 | ||
9089 | (define_expand "<avx512>_<code><ssedoublemodelower><mode>2_mask_store" | |
9090 | [(set (match_operand:PMOV_DST_MODE_2 0 "nonimmediate_operand") | |
9091 | (vec_merge:PMOV_DST_MODE_2 | |
9092 | (any_truncate:PMOV_DST_MODE_2 | |
9093 | (match_operand:<ssedoublemode> 1 "register_operand")) | |
9094 | (match_dup 0) | |
9095 | (match_operand:<avx512fmaskmode> 2 "register_operand")))] | |
9096 | "TARGET_AVX512VL") | |
9097 | ||
9098 | (define_mode_iterator PMOV_SRC_MODE_3 [V4DI V2DI V8SI V4SI (V8HI "TARGET_AVX512BW")]) | |
9099 | (define_mode_attr pmov_dst_3 | |
9100 | [(V4DI "V4QI") (V2DI "V2QI") (V8SI "V8QI") (V4SI "V4QI") (V8HI "V8QI")]) | |
9101 | (define_mode_attr pmov_dst_zeroed_3 | |
9102 | [(V4DI "V12QI") (V2DI "V14QI") (V8SI "V8QI") (V4SI "V12QI") (V8HI "V8QI")]) | |
9103 | (define_mode_attr pmov_suff_3 | |
9104 | [(V4DI "qb") (V2DI "qb") (V8SI "db") (V4SI "db") (V8HI "wb")]) | |
9105 | ||
9106 | (define_insn "*avx512vl_<code><mode>v<ssescalarnum>qi2" | |
9107 | [(set (match_operand:V16QI 0 "register_operand" "=v") | |
9108 | (vec_concat:V16QI | |
9109 | (any_truncate:<pmov_dst_3> | |
9110 | (match_operand:PMOV_SRC_MODE_3 1 "register_operand" "v")) | |
9111 | (match_operand:<pmov_dst_zeroed_3> 2 "const0_operand")))] | |
9112 | "TARGET_AVX512VL" | |
9113 | "vpmov<trunsuffix><pmov_suff_3>\t{%1, %0|%0, %1}" | |
9114 | [(set_attr "type" "ssemov") | |
9115 | (set_attr "prefix" "evex") | |
9116 | (set_attr "mode" "TI")]) | |
9117 | ||
9118 | (define_insn "*avx512vl_<code>v2div2qi2_store" | |
9119 | [(set (match_operand:V16QI 0 "memory_operand" "=m") | |
9120 | (vec_concat:V16QI | |
9121 | (any_truncate:V2QI | |
9122 | (match_operand:V2DI 1 "register_operand" "v")) | |
9123 | (vec_select:V14QI | |
9124 | (match_dup 0) | |
9125 | (parallel [(const_int 2) (const_int 3) | |
9126 | (const_int 4) (const_int 5) | |
9127 | (const_int 6) (const_int 7) | |
9128 | (const_int 8) (const_int 9) | |
9129 | (const_int 10) (const_int 11) | |
9130 | (const_int 12) (const_int 13) | |
9131 | (const_int 14) (const_int 15)]))))] | |
9132 | "TARGET_AVX512VL" | |
9133 | "vpmov<trunsuffix>qb\t{%1, %0|%0, %1}" | |
9134 | [(set_attr "type" "ssemov") | |
9135 | (set_attr "memory" "store") | |
9136 | (set_attr "prefix" "evex") | |
9137 | (set_attr "mode" "TI")]) | |
9138 | ||
9139 | (define_insn "avx512vl_<code>v2div2qi2_mask" | |
9140 | [(set (match_operand:V16QI 0 "register_operand" "=v") | |
9141 | (vec_concat:V16QI | |
9142 | (vec_merge:V2QI | |
9143 | (any_truncate:V2QI | |
9144 | (match_operand:V2DI 1 "register_operand" "v")) | |
9145 | (vec_select:V2QI | |
9146 | (match_operand:V16QI 2 "vector_move_operand" "0C") | |
9147 | (parallel [(const_int 0) (const_int 1)])) | |
9148 | (match_operand:QI 3 "register_operand" "Yk")) | |
9149 | (const_vector:V14QI [(const_int 0) (const_int 0) | |
9150 | (const_int 0) (const_int 0) | |
9151 | (const_int 0) (const_int 0) | |
9152 | (const_int 0) (const_int 0) | |
9153 | (const_int 0) (const_int 0) | |
9154 | (const_int 0) (const_int 0) | |
9155 | (const_int 0) (const_int 0)])))] | |
9156 | "TARGET_AVX512VL" | |
9157 | "vpmov<trunsuffix>qb\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}" | |
9158 | [(set_attr "type" "ssemov") | |
9159 | (set_attr "prefix" "evex") | |
9160 | (set_attr "mode" "TI")]) | |
9161 | ||
9162 | (define_insn "avx512vl_<code>v2div2qi2_mask_store" | |
9163 | [(set (match_operand:V16QI 0 "memory_operand" "=m") | |
9164 | (vec_concat:V16QI | |
9165 | (vec_merge:V2QI | |
9166 | (any_truncate:V2QI | |
9167 | (match_operand:V2DI 1 "register_operand" "v")) | |
9168 | (vec_select:V2QI | |
9169 | (match_dup 0) | |
9170 | (parallel [(const_int 0) (const_int 1)])) | |
9171 | (match_operand:QI 2 "register_operand" "Yk")) | |
9172 | (vec_select:V14QI | |
9173 | (match_dup 0) | |
9174 | (parallel [(const_int 2) (const_int 3) | |
9175 | (const_int 4) (const_int 5) | |
9176 | (const_int 6) (const_int 7) | |
9177 | (const_int 8) (const_int 9) | |
9178 | (const_int 10) (const_int 11) | |
9179 | (const_int 12) (const_int 13) | |
9180 | (const_int 14) (const_int 15)]))))] | |
9181 | "TARGET_AVX512VL" | |
9182 | "vpmov<trunsuffix>qb\t{%1, %0%{%2%}|%0%{%2%}, %1}" | |
9183 | [(set_attr "type" "ssemov") | |
9184 | (set_attr "memory" "store") | |
9185 | (set_attr "prefix" "evex") | |
9186 | (set_attr "mode" "TI")]) | |
9187 | ||
9188 | (define_insn "*avx512vl_<code><mode>v4qi2_store" | |
9189 | [(set (match_operand:V16QI 0 "memory_operand" "=m") | |
9190 | (vec_concat:V16QI | |
9191 | (any_truncate:V4QI | |
9192 | (match_operand:VI4_128_8_256 1 "register_operand" "v")) | |
9193 | (vec_select:V12QI | |
9194 | (match_dup 0) | |
9195 | (parallel [(const_int 4) (const_int 5) | |
9196 | (const_int 6) (const_int 7) | |
9197 | (const_int 8) (const_int 9) | |
9198 | (const_int 10) (const_int 11) | |
9199 | (const_int 12) (const_int 13) | |
9200 | (const_int 14) (const_int 15)]))))] | |
9201 | "TARGET_AVX512VL" | |
9202 | "vpmov<trunsuffix><pmov_suff_3>\t{%1, %0|%0, %1}" | |
9203 | [(set_attr "type" "ssemov") | |
9204 | (set_attr "memory" "store") | |
9205 | (set_attr "prefix" "evex") | |
9206 | (set_attr "mode" "TI")]) | |
9207 | ||
9208 | (define_insn "avx512vl_<code><mode>v4qi2_mask" | |
9209 | [(set (match_operand:V16QI 0 "register_operand" "=v") | |
9210 | (vec_concat:V16QI | |
9211 | (vec_merge:V4QI | |
9212 | (any_truncate:V4QI | |
9213 | (match_operand:VI4_128_8_256 1 "register_operand" "v")) | |
9214 | (vec_select:V4QI | |
9215 | (match_operand:V16QI 2 "vector_move_operand" "0C") | |
9216 | (parallel [(const_int 0) (const_int 1) | |
9217 | (const_int 2) (const_int 3)])) | |
9218 | (match_operand:QI 3 "register_operand" "Yk")) | |
9219 | (const_vector:V12QI [(const_int 0) (const_int 0) | |
9220 | (const_int 0) (const_int 0) | |
9221 | (const_int 0) (const_int 0) | |
9222 | (const_int 0) (const_int 0) | |
9223 | (const_int 0) (const_int 0) | |
9224 | (const_int 0) (const_int 0)])))] | |
9225 | "TARGET_AVX512VL" | |
9226 | "vpmov<trunsuffix><pmov_suff_3>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}" | |
9227 | [(set_attr "type" "ssemov") | |
9228 | (set_attr "prefix" "evex") | |
9229 | (set_attr "mode" "TI")]) | |
9230 | ||
9231 | (define_insn "avx512vl_<code><mode>v4qi2_mask_store" | |
9232 | [(set (match_operand:V16QI 0 "memory_operand" "=m") | |
9233 | (vec_concat:V16QI | |
9234 | (vec_merge:V4QI | |
9235 | (any_truncate:V4QI | |
9236 | (match_operand:VI4_128_8_256 1 "register_operand" "v")) | |
9237 | (vec_select:V4QI | |
9238 | (match_dup 0) | |
9239 | (parallel [(const_int 0) (const_int 1) | |
9240 | (const_int 2) (const_int 3)])) | |
9241 | (match_operand:QI 2 "register_operand" "Yk")) | |
9242 | (vec_select:V12QI | |
9243 | (match_dup 0) | |
9244 | (parallel [(const_int 4) (const_int 5) | |
9245 | (const_int 6) (const_int 7) | |
9246 | (const_int 8) (const_int 9) | |
9247 | (const_int 10) (const_int 11) | |
9248 | (const_int 12) (const_int 13) | |
9249 | (const_int 14) (const_int 15)]))))] | |
9250 | "TARGET_AVX512VL" | |
9251 | "vpmov<trunsuffix><pmov_suff_3>\t{%1, %0%{%2%}|%0%{%2%}, %1}" | |
9252 | [(set_attr "type" "ssemov") | |
9253 | (set_attr "memory" "store") | |
9254 | (set_attr "prefix" "evex") | |
9255 | (set_attr "mode" "TI")]) | |
9256 | ||
9257 | (define_mode_iterator VI2_128_BW_4_256 | |
9258 | [(V8HI "TARGET_AVX512BW") V8SI]) | |
9259 | ||
9260 | (define_insn "*avx512vl_<code><mode>v8qi2_store" | |
9261 | [(set (match_operand:V16QI 0 "memory_operand" "=m") | |
9262 | (vec_concat:V16QI | |
9263 | (any_truncate:V8QI | |
9264 | (match_operand:VI2_128_BW_4_256 1 "register_operand" "v")) | |
9265 | (vec_select:V8QI | |
9266 | (match_dup 0) | |
9267 | (parallel [(const_int 8) (const_int 9) | |
9268 | (const_int 10) (const_int 11) | |
9269 | (const_int 12) (const_int 13) | |
9270 | (const_int 14) (const_int 15)]))))] | |
9271 | "TARGET_AVX512VL" | |
9272 | "vpmov<trunsuffix><pmov_suff_3>\t{%1, %0|%0, %1}" | |
9273 | [(set_attr "type" "ssemov") | |
9274 | (set_attr "memory" "store") | |
9275 | (set_attr "prefix" "evex") | |
9276 | (set_attr "mode" "TI")]) | |
9277 | ||
9278 | (define_insn "avx512vl_<code><mode>v8qi2_mask" | |
9279 | [(set (match_operand:V16QI 0 "register_operand" "=v") | |
9280 | (vec_concat:V16QI | |
9281 | (vec_merge:V8QI | |
9282 | (any_truncate:V8QI | |
9283 | (match_operand:VI2_128_BW_4_256 1 "register_operand" "v")) | |
9284 | (vec_select:V8QI | |
9285 | (match_operand:V16QI 2 "vector_move_operand" "0C") | |
9286 | (parallel [(const_int 0) (const_int 1) | |
9287 | (const_int 2) (const_int 3) | |
9288 | (const_int 4) (const_int 5) | |
9289 | (const_int 6) (const_int 7)])) | |
9290 | (match_operand:QI 3 "register_operand" "Yk")) | |
9291 | (const_vector:V8QI [(const_int 0) (const_int 0) | |
9292 | (const_int 0) (const_int 0) | |
9293 | (const_int 0) (const_int 0) | |
9294 | (const_int 0) (const_int 0)])))] | |
9295 | "TARGET_AVX512VL" | |
9296 | "vpmov<trunsuffix><pmov_suff_3>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}" | |
9297 | [(set_attr "type" "ssemov") | |
9298 | (set_attr "prefix" "evex") | |
9299 | (set_attr "mode" "TI")]) | |
9300 | ||
9301 | (define_insn "avx512vl_<code><mode>v8qi2_mask_store" | |
9302 | [(set (match_operand:V16QI 0 "memory_operand" "=m") | |
9303 | (vec_concat:V16QI | |
9304 | (vec_merge:V8QI | |
9305 | (any_truncate:V8QI | |
9306 | (match_operand:VI2_128_BW_4_256 1 "register_operand" "v")) | |
9307 | (vec_select:V8QI | |
9308 | (match_dup 0) | |
9309 | (parallel [(const_int 0) (const_int 1) | |
9310 | (const_int 2) (const_int 3) | |
9311 | (const_int 4) (const_int 5) | |
9312 | (const_int 6) (const_int 7)])) | |
9313 | (match_operand:QI 2 "register_operand" "Yk")) | |
9314 | (vec_select:V8QI | |
9315 | (match_dup 0) | |
9316 | (parallel [(const_int 8) (const_int 9) | |
9317 | (const_int 10) (const_int 11) | |
9318 | (const_int 12) (const_int 13) | |
9319 | (const_int 14) (const_int 15)]))))] | |
9320 | "TARGET_AVX512VL" | |
9321 | "vpmov<trunsuffix><pmov_suff_3>\t{%1, %0%{%2%}|%0%{%2%}, %1}" | |
9322 | [(set_attr "type" "ssemov") | |
9323 | (set_attr "memory" "store") | |
9324 | (set_attr "prefix" "evex") | |
9325 | (set_attr "mode" "TI")]) | |
9326 | ||
9327 | (define_mode_iterator PMOV_SRC_MODE_4 [V4DI V2DI V4SI]) | |
9328 | (define_mode_attr pmov_dst_4 | |
9329 | [(V4DI "V4HI") (V2DI "V2HI") (V4SI "V4HI")]) | |
9330 | (define_mode_attr pmov_dst_zeroed_4 | |
9331 | [(V4DI "V4HI") (V2DI "V6HI") (V4SI "V4HI")]) | |
9332 | (define_mode_attr pmov_suff_4 | |
9333 | [(V4DI "qw") (V2DI "qw") (V4SI "dw")]) | |
9334 | ||
9335 | (define_insn "*avx512vl_<code><mode>v<ssescalarnum>hi2" | |
9336 | [(set (match_operand:V8HI 0 "register_operand" "=v") | |
9337 | (vec_concat:V8HI | |
9338 | (any_truncate:<pmov_dst_4> | |
9339 | (match_operand:PMOV_SRC_MODE_4 1 "register_operand" "v")) | |
9340 | (match_operand:<pmov_dst_zeroed_4> 2 "const0_operand")))] | |
9341 | "TARGET_AVX512VL" | |
9342 | "vpmov<trunsuffix><pmov_suff_4>\t{%1, %0|%0, %1}" | |
9343 | [(set_attr "type" "ssemov") | |
9344 | (set_attr "prefix" "evex") | |
9345 | (set_attr "mode" "TI")]) | |
9346 | ||
9347 | (define_insn "*avx512vl_<code><mode>v4hi2_store" | |
9348 | [(set (match_operand:V8HI 0 "memory_operand" "=m") | |
9349 | (vec_concat:V8HI | |
9350 | (any_truncate:V4HI | |
9351 | (match_operand:VI4_128_8_256 1 "register_operand" "v")) | |
9352 | (vec_select:V4HI | |
9353 | (match_dup 0) | |
9354 | (parallel [(const_int 4) (const_int 5) | |
9355 | (const_int 6) (const_int 7)]))))] | |
9356 | "TARGET_AVX512VL" | |
9357 | "vpmov<trunsuffix><pmov_suff_4>\t{%1, %0|%0, %1}" | |
9358 | [(set_attr "type" "ssemov") | |
9359 | (set_attr "memory" "store") | |
9360 | (set_attr "prefix" "evex") | |
9361 | (set_attr "mode" "TI")]) | |
9362 | ||
9363 | (define_insn "avx512vl_<code><mode>v4hi2_mask" | |
9364 | [(set (match_operand:V8HI 0 "register_operand" "=v") | |
9365 | (vec_concat:V8HI | |
9366 | (vec_merge:V4HI | |
9367 | (any_truncate:V4HI | |
9368 | (match_operand:VI4_128_8_256 1 "register_operand" "v")) | |
9369 | (vec_select:V4HI | |
9370 | (match_operand:V8HI 2 "vector_move_operand" "0C") | |
9371 | (parallel [(const_int 0) (const_int 1) | |
9372 | (const_int 2) (const_int 3)])) | |
9373 | (match_operand:QI 3 "register_operand" "Yk")) | |
9374 | (const_vector:V4HI [(const_int 0) (const_int 0) | |
9375 | (const_int 0) (const_int 0)])))] | |
9376 | "TARGET_AVX512VL" | |
9377 | "vpmov<trunsuffix><pmov_suff_4>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}" | |
9378 | [(set_attr "type" "ssemov") | |
9379 | (set_attr "prefix" "evex") | |
9380 | (set_attr "mode" "TI")]) | |
9381 | ||
9382 | (define_insn "avx512vl_<code><mode>v4hi2_mask_store" | |
9383 | [(set (match_operand:V8HI 0 "memory_operand" "=m") | |
9384 | (vec_concat:V8HI | |
9385 | (vec_merge:V4HI | |
9386 | (any_truncate:V4HI | |
9387 | (match_operand:VI4_128_8_256 1 "register_operand" "v")) | |
9388 | (vec_select:V4HI | |
9389 | (match_dup 0) | |
9390 | (parallel [(const_int 0) (const_int 1) | |
9391 | (const_int 2) (const_int 3)])) | |
9392 | (match_operand:QI 2 "register_operand" "Yk")) | |
9393 | (vec_select:V4HI | |
9394 | (match_dup 0) | |
9395 | (parallel [(const_int 4) (const_int 5) | |
9396 | (const_int 6) (const_int 7)]))))] | |
9397 | "TARGET_AVX512VL" | |
9398 | "vpmov<trunsuffix><pmov_suff_4>\t{%1, %0%{%2%}|%0%{%2%}, %1}" | |
9399 | [(set_attr "type" "ssemov") | |
9400 | (set_attr "memory" "store") | |
9401 | (set_attr "prefix" "evex") | |
9402 | (set_attr "mode" "TI")]) | |
9403 | ||
9404 | (define_insn "*avx512vl_<code>v2div2hi2_store" | |
9405 | [(set (match_operand:V8HI 0 "memory_operand" "=m") | |
9406 | (vec_concat:V8HI | |
9407 | (any_truncate:V2HI | |
9408 | (match_operand:V2DI 1 "register_operand" "v")) | |
9409 | (vec_select:V6HI | |
9410 | (match_dup 0) | |
9411 | (parallel [(const_int 2) (const_int 3) | |
9412 | (const_int 4) (const_int 5) | |
9413 | (const_int 6) (const_int 7)]))))] | |
9414 | "TARGET_AVX512VL" | |
9415 | "vpmov<trunsuffix>qw\t{%1, %0|%0, %1}" | |
9416 | [(set_attr "type" "ssemov") | |
9417 | (set_attr "memory" "store") | |
9418 | (set_attr "prefix" "evex") | |
9419 | (set_attr "mode" "TI")]) | |
9420 | ||
9421 | (define_insn "avx512vl_<code>v2div2hi2_mask" | |
9422 | [(set (match_operand:V8HI 0 "register_operand" "=v") | |
9423 | (vec_concat:V8HI | |
9424 | (vec_merge:V2HI | |
9425 | (any_truncate:V2HI | |
9426 | (match_operand:V2DI 1 "register_operand" "v")) | |
9427 | (vec_select:V2HI | |
9428 | (match_operand:V8HI 2 "vector_move_operand" "0C") | |
9429 | (parallel [(const_int 0) (const_int 1)])) | |
9430 | (match_operand:QI 3 "register_operand" "Yk")) | |
9431 | (const_vector:V6HI [(const_int 0) (const_int 0) | |
9432 | (const_int 0) (const_int 0) | |
9433 | (const_int 0) (const_int 0)])))] | |
9434 | "TARGET_AVX512VL" | |
9435 | "vpmov<trunsuffix>qw\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}" | |
9436 | [(set_attr "type" "ssemov") | |
9437 | (set_attr "prefix" "evex") | |
9438 | (set_attr "mode" "TI")]) | |
9439 | ||
9440 | (define_insn "avx512vl_<code>v2div2hi2_mask_store" | |
9441 | [(set (match_operand:V8HI 0 "memory_operand" "=m") | |
9442 | (vec_concat:V8HI | |
9443 | (vec_merge:V2HI | |
9444 | (any_truncate:V2HI | |
9445 | (match_operand:V2DI 1 "register_operand" "v")) | |
9446 | (vec_select:V2HI | |
9447 | (match_dup 0) | |
9448 | (parallel [(const_int 0) (const_int 1)])) | |
9449 | (match_operand:QI 2 "register_operand" "Yk")) | |
9450 | (vec_select:V6HI | |
9451 | (match_dup 0) | |
9452 | (parallel [(const_int 2) (const_int 3) | |
9453 | (const_int 4) (const_int 5) | |
9454 | (const_int 6) (const_int 7)]))))] | |
9455 | "TARGET_AVX512VL" | |
9456 | "vpmov<trunsuffix>qw\t{%1, %0%{%2%}|%0%{%2%}, %1}" | |
9457 | [(set_attr "type" "ssemov") | |
9458 | (set_attr "memory" "store") | |
9459 | (set_attr "prefix" "evex") | |
9460 | (set_attr "mode" "TI")]) | |
9461 | ||
9462 | (define_insn "*avx512vl_<code>v2div2si2" | |
9463 | [(set (match_operand:V4SI 0 "register_operand" "=v") | |
9464 | (vec_concat:V4SI | |
9465 | (any_truncate:V2SI | |
9466 | (match_operand:V2DI 1 "register_operand" "v")) | |
9467 | (match_operand:V2SI 2 "const0_operand")))] | |
9468 | "TARGET_AVX512VL" | |
9469 | "vpmov<trunsuffix>qd\t{%1, %0|%0, %1}" | |
9470 | [(set_attr "type" "ssemov") | |
9471 | (set_attr "prefix" "evex") | |
9472 | (set_attr "mode" "TI")]) | |
9473 | ||
9474 | (define_insn "*avx512vl_<code>v2div2si2_store" | |
9475 | [(set (match_operand:V4SI 0 "memory_operand" "=m") | |
9476 | (vec_concat:V4SI | |
9477 | (any_truncate:V2SI | |
9478 | (match_operand:V2DI 1 "register_operand" "v")) | |
9479 | (vec_select:V2SI | |
9480 | (match_dup 0) | |
9481 | (parallel [(const_int 2) (const_int 3)]))))] | |
9482 | "TARGET_AVX512VL" | |
9483 | "vpmov<trunsuffix>qd\t{%1, %0|%0, %1}" | |
9484 | [(set_attr "type" "ssemov") | |
9485 | (set_attr "memory" "store") | |
9486 | (set_attr "prefix" "evex") | |
9487 | (set_attr "mode" "TI")]) | |
9488 | ||
9489 | (define_insn "avx512vl_<code>v2div2si2_mask" | |
9490 | [(set (match_operand:V4SI 0 "register_operand" "=v") | |
9491 | (vec_concat:V4SI | |
9492 | (vec_merge:V2SI | |
9493 | (any_truncate:V2SI | |
9494 | (match_operand:V2DI 1 "register_operand" "v")) | |
9495 | (vec_select:V2SI | |
9496 | (match_operand:V4SI 2 "vector_move_operand" "0C") | |
9497 | (parallel [(const_int 0) (const_int 1)])) | |
9498 | (match_operand:QI 3 "register_operand" "Yk")) | |
9499 | (const_vector:V2SI [(const_int 0) (const_int 0)])))] | |
9500 | "TARGET_AVX512VL" | |
9501 | "vpmov<trunsuffix>qd\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}" | |
9502 | [(set_attr "type" "ssemov") | |
9503 | (set_attr "prefix" "evex") | |
9504 | (set_attr "mode" "TI")]) | |
9505 | ||
9506 | (define_insn "avx512vl_<code>v2div2si2_mask_store" | |
9507 | [(set (match_operand:V4SI 0 "memory_operand" "=m") | |
9508 | (vec_concat:V4SI | |
9509 | (vec_merge:V2SI | |
9510 | (any_truncate:V2SI | |
9511 | (match_operand:V2DI 1 "register_operand" "v")) | |
9512 | (vec_select:V2SI | |
9513 | (match_dup 0) | |
9514 | (parallel [(const_int 0) (const_int 1)])) | |
9515 | (match_operand:QI 2 "register_operand" "Yk")) | |
9516 | (vec_select:V2SI | |
9517 | (match_dup 0) | |
9518 | (parallel [(const_int 2) (const_int 3)]))))] | |
9519 | "TARGET_AVX512VL" | |
9520 | "vpmov<trunsuffix>qd\t{%1, %0%{%2%}|%0%{%2%}, %1}" | |
9521 | [(set_attr "type" "ssemov") | |
9522 | (set_attr "memory" "store") | |
9523 | (set_attr "prefix" "evex") | |
9524 | (set_attr "mode" "TI")]) | |
9525 | ||
697a43f8 | 9526 | (define_insn "*avx512f_<code>v8div16qi2" |
9527 | [(set (match_operand:V16QI 0 "register_operand" "=v") | |
9528 | (vec_concat:V16QI | |
9529 | (any_truncate:V8QI | |
9530 | (match_operand:V8DI 1 "register_operand" "v")) | |
9531 | (const_vector:V8QI [(const_int 0) (const_int 0) | |
9532 | (const_int 0) (const_int 0) | |
9533 | (const_int 0) (const_int 0) | |
9534 | (const_int 0) (const_int 0)])))] | |
9535 | "TARGET_AVX512F" | |
9536 | "vpmov<trunsuffix>qb\t{%1, %0|%0, %1}" | |
9537 | [(set_attr "type" "ssemov") | |
9538 | (set_attr "prefix" "evex") | |
9539 | (set_attr "mode" "TI")]) | |
9540 | ||
9541 | (define_insn "*avx512f_<code>v8div16qi2_store" | |
9542 | [(set (match_operand:V16QI 0 "memory_operand" "=m") | |
9543 | (vec_concat:V16QI | |
9544 | (any_truncate:V8QI | |
9545 | (match_operand:V8DI 1 "register_operand" "v")) | |
9546 | (vec_select:V8QI | |
9547 | (match_dup 0) | |
9548 | (parallel [(const_int 8) (const_int 9) | |
9549 | (const_int 10) (const_int 11) | |
9550 | (const_int 12) (const_int 13) | |
9551 | (const_int 14) (const_int 15)]))))] | |
9552 | "TARGET_AVX512F" | |
9553 | "vpmov<trunsuffix>qb\t{%1, %0|%0, %1}" | |
9554 | [(set_attr "type" "ssemov") | |
9555 | (set_attr "memory" "store") | |
9556 | (set_attr "prefix" "evex") | |
9557 | (set_attr "mode" "TI")]) | |
9558 | ||
5220cab6 | 9559 | (define_insn "avx512f_<code>v8div16qi2_mask" |
9560 | [(set (match_operand:V16QI 0 "register_operand" "=v") | |
9561 | (vec_concat:V16QI | |
9562 | (vec_merge:V8QI | |
9563 | (any_truncate:V8QI | |
9564 | (match_operand:V8DI 1 "register_operand" "v")) | |
9565 | (vec_select:V8QI | |
9566 | (match_operand:V16QI 2 "vector_move_operand" "0C") | |
9567 | (parallel [(const_int 0) (const_int 1) | |
9568 | (const_int 2) (const_int 3) | |
9569 | (const_int 4) (const_int 5) | |
9570 | (const_int 6) (const_int 7)])) | |
a31e7f46 | 9571 | (match_operand:QI 3 "register_operand" "Yk")) |
5220cab6 | 9572 | (const_vector:V8QI [(const_int 0) (const_int 0) |
9573 | (const_int 0) (const_int 0) | |
9574 | (const_int 0) (const_int 0) | |
9575 | (const_int 0) (const_int 0)])))] | |
9576 | "TARGET_AVX512F" | |
9577 | "vpmov<trunsuffix>qb\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}" | |
9578 | [(set_attr "type" "ssemov") | |
9579 | (set_attr "prefix" "evex") | |
9580 | (set_attr "mode" "TI")]) | |
9581 | ||
f4a19f2a | 9582 | (define_insn "avx512f_<code>v8div16qi2_mask_store" |
5220cab6 | 9583 | [(set (match_operand:V16QI 0 "memory_operand" "=m") |
9584 | (vec_concat:V16QI | |
9585 | (vec_merge:V8QI | |
9586 | (any_truncate:V8QI | |
9587 | (match_operand:V8DI 1 "register_operand" "v")) | |
9588 | (vec_select:V8QI | |
9589 | (match_dup 0) | |
9590 | (parallel [(const_int 0) (const_int 1) | |
9591 | (const_int 2) (const_int 3) | |
9592 | (const_int 4) (const_int 5) | |
9593 | (const_int 6) (const_int 7)])) | |
a31e7f46 | 9594 | (match_operand:QI 2 "register_operand" "Yk")) |
5220cab6 | 9595 | (vec_select:V8QI |
9596 | (match_dup 0) | |
9597 | (parallel [(const_int 8) (const_int 9) | |
9598 | (const_int 10) (const_int 11) | |
9599 | (const_int 12) (const_int 13) | |
9600 | (const_int 14) (const_int 15)]))))] | |
9601 | "TARGET_AVX512F" | |
9602 | "vpmov<trunsuffix>qb\t{%1, %0%{%2%}|%0%{%2%}, %1}" | |
9603 | [(set_attr "type" "ssemov") | |
9604 | (set_attr "memory" "store") | |
9605 | (set_attr "prefix" "evex") | |
9606 | (set_attr "mode" "TI")]) | |
9607 | ||
5802c0cb | 9608 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; |
9609 | ;; | |
9610 | ;; Parallel integral arithmetic | |
9611 | ;; | |
9612 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | |
9613 | ||
9614 | (define_expand "neg<mode>2" | |
abd4f58b | 9615 | [(set (match_operand:VI_AVX2 0 "register_operand") |
d2c249f5 | 9616 | (minus:VI_AVX2 |
5802c0cb | 9617 | (match_dup 2) |
abd4f58b | 9618 | (match_operand:VI_AVX2 1 "nonimmediate_operand")))] |
5802c0cb | 9619 | "TARGET_SSE2" |
9620 | "operands[2] = force_reg (<MODE>mode, CONST0_RTX (<MODE>mode));") | |
9621 | ||
12803fe0 | 9622 | (define_expand "<plusminus_insn><mode>3" |
abd4f58b | 9623 | [(set (match_operand:VI_AVX2 0 "register_operand") |
c4530783 | 9624 | (plusminus:VI_AVX2 |
abd4f58b | 9625 | (match_operand:VI_AVX2 1 "nonimmediate_operand") |
9626 | (match_operand:VI_AVX2 2 "nonimmediate_operand")))] | |
12803fe0 | 9627 | "TARGET_SSE2" |
9628 | "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);") | |
9629 | ||
9630 | (define_expand "<plusminus_insn><mode>3_mask" | |
9631 | [(set (match_operand:VI48_AVX512VL 0 "register_operand") | |
9632 | (vec_merge:VI48_AVX512VL | |
9633 | (plusminus:VI48_AVX512VL | |
9634 | (match_operand:VI48_AVX512VL 1 "nonimmediate_operand") | |
9635 | (match_operand:VI48_AVX512VL 2 "nonimmediate_operand")) | |
9636 | (match_operand:VI48_AVX512VL 3 "vector_move_operand") | |
9637 | (match_operand:<avx512fmaskmode> 4 "register_operand")))] | |
9638 | "TARGET_AVX512F" | |
9639 | "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);") | |
9640 | ||
9641 | (define_expand "<plusminus_insn><mode>3_mask" | |
9642 | [(set (match_operand:VI12_AVX512VL 0 "register_operand") | |
9643 | (vec_merge:VI12_AVX512VL | |
9644 | (plusminus:VI12_AVX512VL | |
9645 | (match_operand:VI12_AVX512VL 1 "nonimmediate_operand") | |
9646 | (match_operand:VI12_AVX512VL 2 "nonimmediate_operand")) | |
9647 | (match_operand:VI12_AVX512VL 3 "vector_move_operand") | |
9648 | (match_operand:<avx512fmaskmode> 4 "register_operand")))] | |
9649 | "TARGET_AVX512BW" | |
801ff5b2 | 9650 | "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);") |
5802c0cb | 9651 | |
12803fe0 | 9652 | (define_insn "*<plusminus_insn><mode>3" |
e13e1b39 | 9653 | [(set (match_operand:VI_AVX2 0 "register_operand" "=x,v") |
c4530783 | 9654 | (plusminus:VI_AVX2 |
e13e1b39 | 9655 | (match_operand:VI_AVX2 1 "nonimmediate_operand" "<comm>0,v") |
9656 | (match_operand:VI_AVX2 2 "nonimmediate_operand" "xm,vm")))] | |
12803fe0 | 9657 | "TARGET_SSE2 |
9658 | && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)" | |
d8f82f6b | 9659 | "@ |
63d5e521 | 9660 | p<plusminus_mnemonic><ssemodesuffix>\t{%2, %0|%0, %2} |
5220cab6 | 9661 | vp<plusminus_mnemonic><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}" |
d8f82f6b | 9662 | [(set_attr "isa" "noavx,avx") |
9663 | (set_attr "type" "sseiadd") | |
9664 | (set_attr "prefix_data16" "1,*") | |
5220cab6 | 9665 | (set_attr "prefix" "<mask_prefix3>") |
5deb404d | 9666 | (set_attr "mode" "<sseinsnmode>")]) |
5802c0cb | 9667 | |
12803fe0 | 9668 | (define_insn "*<plusminus_insn><mode>3_mask" |
9669 | [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v") | |
9670 | (vec_merge:VI48_AVX512VL | |
9671 | (plusminus:VI48_AVX512VL | |
9672 | (match_operand:VI48_AVX512VL 1 "nonimmediate_operand" "<comm>v") | |
9673 | (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "vm")) | |
9674 | (match_operand:VI48_AVX512VL 3 "vector_move_operand" "0C") | |
9675 | (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))] | |
9676 | "TARGET_AVX512F | |
9677 | && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)" | |
9678 | "vp<plusminus_mnemonic><ssemodesuffix>\t{%2, %1, %0%{%4%}%N3|%0%{%4%}%N3, %1, %2}" | |
9679 | [(set_attr "type" "sseiadd") | |
9680 | (set_attr "prefix" "evex") | |
9681 | (set_attr "mode" "<sseinsnmode>")]) | |
9682 | ||
9683 | (define_insn "*<plusminus_insn><mode>3_mask" | |
9684 | [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v") | |
9685 | (vec_merge:VI12_AVX512VL | |
9686 | (plusminus:VI12_AVX512VL | |
9687 | (match_operand:VI12_AVX512VL 1 "nonimmediate_operand" "<comm>v") | |
9688 | (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm")) | |
9689 | (match_operand:VI12_AVX512VL 3 "vector_move_operand" "0C") | |
9690 | (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))] | |
9691 | "TARGET_AVX512BW && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)" | |
9692 | "vp<plusminus_mnemonic><ssemodesuffix>\t{%2, %1, %0%{%4%}%N3|%0%{%4%}%N3, %1, %2}" | |
9693 | [(set_attr "type" "sseiadd") | |
9694 | (set_attr "prefix" "evex") | |
9695 | (set_attr "mode" "<sseinsnmode>")]) | |
9696 | ||
293fd15f | 9697 | (define_expand "<sse2_avx2>_<plusminus_insn><mode>3<mask_name>" |
abd4f58b | 9698 | [(set (match_operand:VI12_AVX2 0 "register_operand") |
5deb404d | 9699 | (sat_plusminus:VI12_AVX2 |
abd4f58b | 9700 | (match_operand:VI12_AVX2 1 "nonimmediate_operand") |
9701 | (match_operand:VI12_AVX2 2 "nonimmediate_operand")))] | |
293fd15f | 9702 | "TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>" |
7c839b3f | 9703 | "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);") |
9704 | ||
293fd15f | 9705 | (define_insn "*<sse2_avx2>_<plusminus_insn><mode>3<mask_name>" |
e13e1b39 | 9706 | [(set (match_operand:VI12_AVX2 0 "register_operand" "=x,v") |
5deb404d | 9707 | (sat_plusminus:VI12_AVX2 |
e13e1b39 | 9708 | (match_operand:VI12_AVX2 1 "nonimmediate_operand" "<comm>0,v") |
9709 | (match_operand:VI12_AVX2 2 "nonimmediate_operand" "xm,vm")))] | |
293fd15f | 9710 | "TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition> |
9711 | && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)" | |
d8f82f6b | 9712 | "@ |
63d5e521 | 9713 | p<plusminus_mnemonic><ssemodesuffix>\t{%2, %0|%0, %2} |
293fd15f | 9714 | vp<plusminus_mnemonic><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}" |
d8f82f6b | 9715 | [(set_attr "isa" "noavx,avx") |
9716 | (set_attr "type" "sseiadd") | |
9717 | (set_attr "prefix_data16" "1,*") | |
293fd15f | 9718 | (set_attr "prefix" "orig,maybe_evex") |
5802c0cb | 9719 | (set_attr "mode" "TI")]) |
9720 | ||
05bea2df | 9721 | (define_expand "mul<mode>3<mask_name>" |
201f262d | 9722 | [(set (match_operand:VI1_AVX512 0 "register_operand") |
9723 | (mult:VI1_AVX512 (match_operand:VI1_AVX512 1 "register_operand") | |
9724 | (match_operand:VI1_AVX512 2 "register_operand")))] | |
05bea2df | 9725 | "TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>" |
4b26818b | 9726 | { |
b1b4d742 | 9727 | ix86_expand_vecop_qihi (MULT, operands[0], operands[1], operands[2]); |
4b26818b | 9728 | DONE; |
9729 | }) | |
9730 | ||
2d71b728 | 9731 | (define_expand "mul<mode>3<mask_name>" |
abd4f58b | 9732 | [(set (match_operand:VI2_AVX2 0 "register_operand") |
9733 | (mult:VI2_AVX2 (match_operand:VI2_AVX2 1 "nonimmediate_operand") | |
9734 | (match_operand:VI2_AVX2 2 "nonimmediate_operand")))] | |
2d71b728 | 9735 | "TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>" |
5deb404d | 9736 | "ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);") |
5802c0cb | 9737 | |
2d71b728 | 9738 | (define_insn "*mul<mode>3<mask_name>" |
9739 | [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,v") | |
9740 | (mult:VI2_AVX2 (match_operand:VI2_AVX2 1 "nonimmediate_operand" "%0,v") | |
9741 | (match_operand:VI2_AVX2 2 "nonimmediate_operand" "xm,vm")))] | |
9742 | "TARGET_SSE2 | |
9743 | && ix86_binary_operator_ok (MULT, <MODE>mode, operands) | |
9744 | && <mask_mode512bit_condition> && <mask_avx512bw_condition>" | |
d8f82f6b | 9745 | "@ |
9746 | pmullw\t{%2, %0|%0, %2} | |
2d71b728 | 9747 | vpmullw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}" |
d8f82f6b | 9748 | [(set_attr "isa" "noavx,avx") |
9749 | (set_attr "type" "sseimul") | |
9750 | (set_attr "prefix_data16" "1,*") | |
9751 | (set_attr "prefix" "orig,vex") | |
5deb404d | 9752 | (set_attr "mode" "<sseinsnmode>")]) |
5802c0cb | 9753 | |
2d71b728 | 9754 | (define_expand "<s>mul<mode>3_highpart<mask_name>" |
abd4f58b | 9755 | [(set (match_operand:VI2_AVX2 0 "register_operand") |
5deb404d | 9756 | (truncate:VI2_AVX2 |
9757 | (lshiftrt:<ssedoublemode> | |
9758 | (mult:<ssedoublemode> | |
9759 | (any_extend:<ssedoublemode> | |
abd4f58b | 9760 | (match_operand:VI2_AVX2 1 "nonimmediate_operand")) |
5deb404d | 9761 | (any_extend:<ssedoublemode> |
abd4f58b | 9762 | (match_operand:VI2_AVX2 2 "nonimmediate_operand"))) |
5deb404d | 9763 | (const_int 16))))] |
2d71b728 | 9764 | "TARGET_SSE2 |
9765 | && <mask_mode512bit_condition> && <mask_avx512bw_condition>" | |
1004a2c1 | 9766 | "ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);") |
c6c91d61 | 9767 | |
2d71b728 | 9768 | (define_insn "*<s>mul<mode>3_highpart<mask_name>" |
9769 | [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,v") | |
5deb404d | 9770 | (truncate:VI2_AVX2 |
9771 | (lshiftrt:<ssedoublemode> | |
9772 | (mult:<ssedoublemode> | |
9773 | (any_extend:<ssedoublemode> | |
2d71b728 | 9774 | (match_operand:VI2_AVX2 1 "nonimmediate_operand" "%0,v")) |
5deb404d | 9775 | (any_extend:<ssedoublemode> |
2d71b728 | 9776 | (match_operand:VI2_AVX2 2 "nonimmediate_operand" "xm,vm"))) |
5802c0cb | 9777 | (const_int 16))))] |
2d71b728 | 9778 | "TARGET_SSE2 |
9779 | && ix86_binary_operator_ok (MULT, <MODE>mode, operands) | |
9780 | && <mask_mode512bit_condition> && <mask_avx512bw_condition>" | |
d8f82f6b | 9781 | "@ |
9782 | pmulh<u>w\t{%2, %0|%0, %2} | |
2d71b728 | 9783 | vpmulh<u>w\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}" |
d8f82f6b | 9784 | [(set_attr "isa" "noavx,avx") |
9785 | (set_attr "type" "sseimul") | |
9786 | (set_attr "prefix_data16" "1,*") | |
9787 | (set_attr "prefix" "orig,vex") | |
5deb404d | 9788 | (set_attr "mode" "<sseinsnmode>")]) |
9789 | ||
5220cab6 | 9790 | (define_expand "vec_widen_umult_even_v16si<mask_name>" |
697a43f8 | 9791 | [(set (match_operand:V8DI 0 "register_operand") |
9792 | (mult:V8DI | |
9793 | (zero_extend:V8DI | |
9794 | (vec_select:V8SI | |
9795 | (match_operand:V16SI 1 "nonimmediate_operand") | |
9796 | (parallel [(const_int 0) (const_int 2) | |
9797 | (const_int 4) (const_int 6) | |
9798 | (const_int 8) (const_int 10) | |
9799 | (const_int 12) (const_int 14)]))) | |
9800 | (zero_extend:V8DI | |
9801 | (vec_select:V8SI | |
9802 | (match_operand:V16SI 2 "nonimmediate_operand") | |
9803 | (parallel [(const_int 0) (const_int 2) | |
9804 | (const_int 4) (const_int 6) | |
9805 | (const_int 8) (const_int 10) | |
9806 | (const_int 12) (const_int 14)])))))] | |
9807 | "TARGET_AVX512F" | |
9808 | "ix86_fixup_binary_operands_no_copy (MULT, V16SImode, operands);") | |
9809 | ||
5220cab6 | 9810 | (define_insn "*vec_widen_umult_even_v16si<mask_name>" |
697a43f8 | 9811 | [(set (match_operand:V8DI 0 "register_operand" "=v") |
9812 | (mult:V8DI | |
9813 | (zero_extend:V8DI | |
9814 | (vec_select:V8SI | |
9815 | (match_operand:V16SI 1 "nonimmediate_operand" "%v") | |
9816 | (parallel [(const_int 0) (const_int 2) | |
9817 | (const_int 4) (const_int 6) | |
9818 | (const_int 8) (const_int 10) | |
9819 | (const_int 12) (const_int 14)]))) | |
9820 | (zero_extend:V8DI | |
9821 | (vec_select:V8SI | |
9822 | (match_operand:V16SI 2 "nonimmediate_operand" "vm") | |
9823 | (parallel [(const_int 0) (const_int 2) | |
9824 | (const_int 4) (const_int 6) | |
9825 | (const_int 8) (const_int 10) | |
9826 | (const_int 12) (const_int 14)])))))] | |
9827 | "TARGET_AVX512F && ix86_binary_operator_ok (MULT, V16SImode, operands)" | |
5220cab6 | 9828 | "vpmuludq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}" |
697a43f8 | 9829 | [(set_attr "isa" "avx512f") |
9830 | (set_attr "type" "sseimul") | |
9831 | (set_attr "prefix_extra" "1") | |
9832 | (set_attr "prefix" "evex") | |
9833 | (set_attr "mode" "XI")]) | |
9834 | ||
4c79b3a9 | 9835 | (define_expand "vec_widen_umult_even_v8si<mask_name>" |
abd4f58b | 9836 | [(set (match_operand:V4DI 0 "register_operand") |
5deb404d | 9837 | (mult:V4DI |
9838 | (zero_extend:V4DI | |
9839 | (vec_select:V4SI | |
abd4f58b | 9840 | (match_operand:V8SI 1 "nonimmediate_operand") |
5deb404d | 9841 | (parallel [(const_int 0) (const_int 2) |
9842 | (const_int 4) (const_int 6)]))) | |
9843 | (zero_extend:V4DI | |
9844 | (vec_select:V4SI | |
abd4f58b | 9845 | (match_operand:V8SI 2 "nonimmediate_operand") |
5deb404d | 9846 | (parallel [(const_int 0) (const_int 2) |
9847 | (const_int 4) (const_int 6)])))))] | |
4c79b3a9 | 9848 | "TARGET_AVX2 && <mask_avx512vl_condition>" |
5deb404d | 9849 | "ix86_fixup_binary_operands_no_copy (MULT, V8SImode, operands);") |
9850 | ||
4c79b3a9 | 9851 | (define_insn "*vec_widen_umult_even_v8si<mask_name>" |
9852 | [(set (match_operand:V4DI 0 "register_operand" "=v") | |
5deb404d | 9853 | (mult:V4DI |
9854 | (zero_extend:V4DI | |
9855 | (vec_select:V4SI | |
4c79b3a9 | 9856 | (match_operand:V8SI 1 "nonimmediate_operand" "%v") |
5deb404d | 9857 | (parallel [(const_int 0) (const_int 2) |
9858 | (const_int 4) (const_int 6)]))) | |
9859 | (zero_extend:V4DI | |
9860 | (vec_select:V4SI | |
4c79b3a9 | 9861 | (match_operand:V8SI 2 "nonimmediate_operand" "vm") |
5deb404d | 9862 | (parallel [(const_int 0) (const_int 2) |
9863 | (const_int 4) (const_int 6)])))))] | |
4c79b3a9 | 9864 | "TARGET_AVX2 && <mask_avx512vl_condition> |
9865 | && ix86_binary_operator_ok (MULT, V8SImode, operands)" | |
9866 | "vpmuludq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}" | |
5deb404d | 9867 | [(set_attr "type" "sseimul") |
4c79b3a9 | 9868 | (set_attr "prefix" "maybe_evex") |
5deb404d | 9869 | (set_attr "mode" "OI")]) |
5802c0cb | 9870 | |
4c79b3a9 | 9871 | (define_expand "vec_widen_umult_even_v4si<mask_name>" |
abd4f58b | 9872 | [(set (match_operand:V2DI 0 "register_operand") |
7c839b3f | 9873 | (mult:V2DI |
9874 | (zero_extend:V2DI | |
9875 | (vec_select:V2SI | |
abd4f58b | 9876 | (match_operand:V4SI 1 "nonimmediate_operand") |
7c839b3f | 9877 | (parallel [(const_int 0) (const_int 2)]))) |
9878 | (zero_extend:V2DI | |
9879 | (vec_select:V2SI | |
abd4f58b | 9880 | (match_operand:V4SI 2 "nonimmediate_operand") |
7c839b3f | 9881 | (parallel [(const_int 0) (const_int 2)])))))] |
4c79b3a9 | 9882 | "TARGET_SSE2 && <mask_avx512vl_condition>" |
7c839b3f | 9883 | "ix86_fixup_binary_operands_no_copy (MULT, V4SImode, operands);") |
9884 | ||
4c79b3a9 | 9885 | (define_insn "*vec_widen_umult_even_v4si<mask_name>" |
9886 | [(set (match_operand:V2DI 0 "register_operand" "=x,v") | |
5802c0cb | 9887 | (mult:V2DI |
9888 | (zero_extend:V2DI | |
9889 | (vec_select:V2SI | |
4c79b3a9 | 9890 | (match_operand:V4SI 1 "nonimmediate_operand" "%0,v") |
5802c0cb | 9891 | (parallel [(const_int 0) (const_int 2)]))) |
9892 | (zero_extend:V2DI | |
9893 | (vec_select:V2SI | |
4c79b3a9 | 9894 | (match_operand:V4SI 2 "nonimmediate_operand" "xm,vm") |
5802c0cb | 9895 | (parallel [(const_int 0) (const_int 2)])))))] |
4c79b3a9 | 9896 | "TARGET_SSE2 && <mask_avx512vl_condition> |
9897 | && ix86_binary_operator_ok (MULT, V4SImode, operands)" | |
d8f82f6b | 9898 | "@ |
9899 | pmuludq\t{%2, %0|%0, %2} | |
4c79b3a9 | 9900 | vpmuludq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}" |
d8f82f6b | 9901 | [(set_attr "isa" "noavx,avx") |
9902 | (set_attr "type" "sseimul") | |
9903 | (set_attr "prefix_data16" "1,*") | |
4c79b3a9 | 9904 | (set_attr "prefix" "orig,maybe_evex") |
5802c0cb | 9905 | (set_attr "mode" "TI")]) |
9906 | ||
5220cab6 | 9907 | (define_expand "vec_widen_smult_even_v16si<mask_name>" |
697a43f8 | 9908 | [(set (match_operand:V8DI 0 "register_operand") |
9909 | (mult:V8DI | |
9910 | (sign_extend:V8DI | |
9911 | (vec_select:V8SI | |
9912 | (match_operand:V16SI 1 "nonimmediate_operand") | |
9913 | (parallel [(const_int 0) (const_int 2) | |
9914 | (const_int 4) (const_int 6) | |
9915 | (const_int 8) (const_int 10) | |
9916 | (const_int 12) (const_int 14)]))) | |
9917 | (sign_extend:V8DI | |
9918 | (vec_select:V8SI | |
9919 | (match_operand:V16SI 2 "nonimmediate_operand") | |
9920 | (parallel [(const_int 0) (const_int 2) | |
9921 | (const_int 4) (const_int 6) | |
9922 | (const_int 8) (const_int 10) | |
9923 | (const_int 12) (const_int 14)])))))] | |
9924 | "TARGET_AVX512F" | |
9925 | "ix86_fixup_binary_operands_no_copy (MULT, V16SImode, operands);") | |
9926 | ||
5220cab6 | 9927 | (define_insn "*vec_widen_smult_even_v16si<mask_name>" |
697a43f8 | 9928 | [(set (match_operand:V8DI 0 "register_operand" "=v") |
9929 | (mult:V8DI | |
9930 | (sign_extend:V8DI | |
9931 | (vec_select:V8SI | |
9932 | (match_operand:V16SI 1 "nonimmediate_operand" "%v") | |
9933 | (parallel [(const_int 0) (const_int 2) | |
9934 | (const_int 4) (const_int 6) | |
9935 | (const_int 8) (const_int 10) | |
9936 | (const_int 12) (const_int 14)]))) | |
9937 | (sign_extend:V8DI | |
9938 | (vec_select:V8SI | |
9939 | (match_operand:V16SI 2 "nonimmediate_operand" "vm") | |
9940 | (parallel [(const_int 0) (const_int 2) | |
9941 | (const_int 4) (const_int 6) | |
9942 | (const_int 8) (const_int 10) | |
9943 | (const_int 12) (const_int 14)])))))] | |
9944 | "TARGET_AVX512F && ix86_binary_operator_ok (MULT, V16SImode, operands)" | |
5220cab6 | 9945 | "vpmuldq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}" |
697a43f8 | 9946 | [(set_attr "isa" "avx512f") |
9947 | (set_attr "type" "sseimul") | |
9948 | (set_attr "prefix_extra" "1") | |
9949 | (set_attr "prefix" "evex") | |
9950 | (set_attr "mode" "XI")]) | |
9951 | ||
4c79b3a9 | 9952 | (define_expand "vec_widen_smult_even_v8si<mask_name>" |
abd4f58b | 9953 | [(set (match_operand:V4DI 0 "register_operand") |
5deb404d | 9954 | (mult:V4DI |
9955 | (sign_extend:V4DI | |
9956 | (vec_select:V4SI | |
abd4f58b | 9957 | (match_operand:V8SI 1 "nonimmediate_operand") |
5deb404d | 9958 | (parallel [(const_int 0) (const_int 2) |
9959 | (const_int 4) (const_int 6)]))) | |
9960 | (sign_extend:V4DI | |
9961 | (vec_select:V4SI | |
abd4f58b | 9962 | (match_operand:V8SI 2 "nonimmediate_operand") |
5deb404d | 9963 | (parallel [(const_int 0) (const_int 2) |
9964 | (const_int 4) (const_int 6)])))))] | |
4c79b3a9 | 9965 | "TARGET_AVX2 && <mask_avx512vl_condition>" |
5deb404d | 9966 | "ix86_fixup_binary_operands_no_copy (MULT, V8SImode, operands);") |
9967 | ||
4c79b3a9 | 9968 | (define_insn "*vec_widen_smult_even_v8si<mask_name>" |
9969 | [(set (match_operand:V4DI 0 "register_operand" "=v") | |
5deb404d | 9970 | (mult:V4DI |
9971 | (sign_extend:V4DI | |
9972 | (vec_select:V4SI | |
0a281fd0 | 9973 | (match_operand:V8SI 1 "nonimmediate_operand" "%v") |
5deb404d | 9974 | (parallel [(const_int 0) (const_int 2) |
9975 | (const_int 4) (const_int 6)]))) | |
9976 | (sign_extend:V4DI | |
9977 | (vec_select:V4SI | |
4c79b3a9 | 9978 | (match_operand:V8SI 2 "nonimmediate_operand" "vm") |
5deb404d | 9979 | (parallel [(const_int 0) (const_int 2) |
9980 | (const_int 4) (const_int 6)])))))] | |
4c79b3a9 | 9981 | "TARGET_AVX2 |
9982 | && ix86_binary_operator_ok (MULT, V8SImode, operands)" | |
9983 | "vpmuldq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}" | |
66e3f7be | 9984 | [(set_attr "type" "sseimul") |
5deb404d | 9985 | (set_attr "prefix_extra" "1") |
9986 | (set_attr "prefix" "vex") | |
9987 | (set_attr "mode" "OI")]) | |
9988 | ||
4c79b3a9 | 9989 | (define_expand "sse4_1_mulv2siv2di3<mask_name>" |
abd4f58b | 9990 | [(set (match_operand:V2DI 0 "register_operand") |
7c839b3f | 9991 | (mult:V2DI |
9992 | (sign_extend:V2DI | |
9993 | (vec_select:V2SI | |
abd4f58b | 9994 | (match_operand:V4SI 1 "nonimmediate_operand") |
7c839b3f | 9995 | (parallel [(const_int 0) (const_int 2)]))) |
9996 | (sign_extend:V2DI | |
9997 | (vec_select:V2SI | |
abd4f58b | 9998 | (match_operand:V4SI 2 "nonimmediate_operand") |
7c839b3f | 9999 | (parallel [(const_int 0) (const_int 2)])))))] |
4c79b3a9 | 10000 | "TARGET_SSE4_1 && <mask_avx512vl_condition>" |
7c839b3f | 10001 | "ix86_fixup_binary_operands_no_copy (MULT, V4SImode, operands);") |
ed30e0a6 | 10002 | |
4c79b3a9 | 10003 | (define_insn "*sse4_1_mulv2siv2di3<mask_name>" |
0a32b282 | 10004 | [(set (match_operand:V2DI 0 "register_operand" "=Yr,*x,v") |
ed30e0a6 | 10005 | (mult:V2DI |
10006 | (sign_extend:V2DI | |
10007 | (vec_select:V2SI | |
0a32b282 | 10008 | (match_operand:V4SI 1 "nonimmediate_operand" "%0,0,v") |
ed30e0a6 | 10009 | (parallel [(const_int 0) (const_int 2)]))) |
10010 | (sign_extend:V2DI | |
10011 | (vec_select:V2SI | |
0a32b282 | 10012 | (match_operand:V4SI 2 "nonimmediate_operand" "Yrm,*xm,vm") |
ed30e0a6 | 10013 | (parallel [(const_int 0) (const_int 2)])))))] |
4c79b3a9 | 10014 | "TARGET_SSE4_1 && <mask_avx512vl_condition> |
10015 | && ix86_binary_operator_ok (MULT, V4SImode, operands)" | |
d8f82f6b | 10016 | "@ |
0a32b282 | 10017 | pmuldq\t{%2, %0|%0, %2} |
d8f82f6b | 10018 | pmuldq\t{%2, %0|%0, %2} |
4c79b3a9 | 10019 | vpmuldq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}" |
0a32b282 | 10020 | [(set_attr "isa" "noavx,noavx,avx") |
d8f82f6b | 10021 | (set_attr "type" "sseimul") |
0a32b282 | 10022 | (set_attr "prefix_data16" "1,1,*") |
ed30e0a6 | 10023 | (set_attr "prefix_extra" "1") |
0a32b282 | 10024 | (set_attr "prefix" "orig,orig,vex") |
ed30e0a6 | 10025 | (set_attr "mode" "TI")]) |
10026 | ||
2d71b728 | 10027 | (define_insn "avx512bw_pmaddwd512<mode><mask_name>" |
10028 | [(set (match_operand:<sseunpackmode> 0 "register_operand" "=v") | |
10029 | (unspec:<sseunpackmode> | |
10030 | [(match_operand:VI2_AVX2 1 "register_operand" "v") | |
10031 | (match_operand:VI2_AVX2 2 "nonimmediate_operand" "vm")] | |
10032 | UNSPEC_PMADDWD512))] | |
10033 | "TARGET_AVX512BW && <mask_mode512bit_condition>" | |
10034 | "vpmaddwd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"; | |
10035 | [(set_attr "type" "sseiadd") | |
10036 | (set_attr "prefix" "evex") | |
10037 | (set_attr "mode" "XI")]) | |
10038 | ||
5deb404d | 10039 | (define_expand "avx2_pmaddwd" |
abd4f58b | 10040 | [(set (match_operand:V8SI 0 "register_operand") |
5deb404d | 10041 | (plus:V8SI |
10042 | (mult:V8SI | |
10043 | (sign_extend:V8SI | |
10044 | (vec_select:V8HI | |
abd4f58b | 10045 | (match_operand:V16HI 1 "nonimmediate_operand") |
04d95c72 | 10046 | (parallel [(const_int 0) (const_int 2) |
10047 | (const_int 4) (const_int 6) | |
10048 | (const_int 8) (const_int 10) | |
10049 | (const_int 12) (const_int 14)]))) | |
5deb404d | 10050 | (sign_extend:V8SI |
10051 | (vec_select:V8HI | |
abd4f58b | 10052 | (match_operand:V16HI 2 "nonimmediate_operand") |
04d95c72 | 10053 | (parallel [(const_int 0) (const_int 2) |
10054 | (const_int 4) (const_int 6) | |
10055 | (const_int 8) (const_int 10) | |
10056 | (const_int 12) (const_int 14)])))) | |
5deb404d | 10057 | (mult:V8SI |
10058 | (sign_extend:V8SI | |
10059 | (vec_select:V8HI (match_dup 1) | |
04d95c72 | 10060 | (parallel [(const_int 1) (const_int 3) |
10061 | (const_int 5) (const_int 7) | |
10062 | (const_int 9) (const_int 11) | |
10063 | (const_int 13) (const_int 15)]))) | |
5deb404d | 10064 | (sign_extend:V8SI |
10065 | (vec_select:V8HI (match_dup 2) | |
04d95c72 | 10066 | (parallel [(const_int 1) (const_int 3) |
10067 | (const_int 5) (const_int 7) | |
10068 | (const_int 9) (const_int 11) | |
10069 | (const_int 13) (const_int 15)]))))))] | |
5deb404d | 10070 | "TARGET_AVX2" |
10071 | "ix86_fixup_binary_operands_no_copy (MULT, V16HImode, operands);") | |
10072 | ||
5deb404d | 10073 | (define_insn "*avx2_pmaddwd" |
10074 | [(set (match_operand:V8SI 0 "register_operand" "=x") | |
10075 | (plus:V8SI | |
10076 | (mult:V8SI | |
10077 | (sign_extend:V8SI | |
10078 | (vec_select:V8HI | |
10079 | (match_operand:V16HI 1 "nonimmediate_operand" "%x") | |
04d95c72 | 10080 | (parallel [(const_int 0) (const_int 2) |
10081 | (const_int 4) (const_int 6) | |
10082 | (const_int 8) (const_int 10) | |
10083 | (const_int 12) (const_int 14)]))) | |
5deb404d | 10084 | (sign_extend:V8SI |
10085 | (vec_select:V8HI | |
10086 | (match_operand:V16HI 2 "nonimmediate_operand" "xm") | |
04d95c72 | 10087 | (parallel [(const_int 0) (const_int 2) |
10088 | (const_int 4) (const_int 6) | |
10089 | (const_int 8) (const_int 10) | |
10090 | (const_int 12) (const_int 14)])))) | |
5deb404d | 10091 | (mult:V8SI |
10092 | (sign_extend:V8SI | |
10093 | (vec_select:V8HI (match_dup 1) | |
04d95c72 | 10094 | (parallel [(const_int 1) (const_int 3) |
10095 | (const_int 5) (const_int 7) | |
10096 | (const_int 9) (const_int 11) | |
10097 | (const_int 13) (const_int 15)]))) | |
5deb404d | 10098 | (sign_extend:V8SI |
10099 | (vec_select:V8HI (match_dup 2) | |
04d95c72 | 10100 | (parallel [(const_int 1) (const_int 3) |
10101 | (const_int 5) (const_int 7) | |
10102 | (const_int 9) (const_int 11) | |
10103 | (const_int 13) (const_int 15)]))))))] | |
5deb404d | 10104 | "TARGET_AVX2 && ix86_binary_operator_ok (MULT, V16HImode, operands)" |
10105 | "vpmaddwd\t{%2, %1, %0|%0, %1, %2}" | |
10106 | [(set_attr "type" "sseiadd") | |
10107 | (set_attr "prefix" "vex") | |
10108 | (set_attr "mode" "OI")]) | |
10109 | ||
04d95c72 | 10110 | (define_expand "sse2_pmaddwd" |
10111 | [(set (match_operand:V4SI 0 "register_operand") | |
10112 | (plus:V4SI | |
10113 | (mult:V4SI | |
10114 | (sign_extend:V4SI | |
10115 | (vec_select:V4HI | |
10116 | (match_operand:V8HI 1 "nonimmediate_operand") | |
10117 | (parallel [(const_int 0) (const_int 2) | |
10118 | (const_int 4) (const_int 6)]))) | |
10119 | (sign_extend:V4SI | |
10120 | (vec_select:V4HI | |
10121 | (match_operand:V8HI 2 "nonimmediate_operand") | |
10122 | (parallel [(const_int 0) (const_int 2) | |
10123 | (const_int 4) (const_int 6)])))) | |
10124 | (mult:V4SI | |
10125 | (sign_extend:V4SI | |
10126 | (vec_select:V4HI (match_dup 1) | |
10127 | (parallel [(const_int 1) (const_int 3) | |
10128 | (const_int 5) (const_int 7)]))) | |
10129 | (sign_extend:V4SI | |
10130 | (vec_select:V4HI (match_dup 2) | |
10131 | (parallel [(const_int 1) (const_int 3) | |
10132 | (const_int 5) (const_int 7)]))))))] | |
10133 | "TARGET_SSE2" | |
10134 | "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);") | |
10135 | ||
7c839b3f | 10136 | (define_insn "*sse2_pmaddwd" |
d8f82f6b | 10137 | [(set (match_operand:V4SI 0 "register_operand" "=x,x") |
5802c0cb | 10138 | (plus:V4SI |
10139 | (mult:V4SI | |
10140 | (sign_extend:V4SI | |
10141 | (vec_select:V4HI | |
d8f82f6b | 10142 | (match_operand:V8HI 1 "nonimmediate_operand" "%0,x") |
04d95c72 | 10143 | (parallel [(const_int 0) (const_int 2) |
10144 | (const_int 4) (const_int 6)]))) | |
5802c0cb | 10145 | (sign_extend:V4SI |
10146 | (vec_select:V4HI | |
d8f82f6b | 10147 | (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm") |
04d95c72 | 10148 | (parallel [(const_int 0) (const_int 2) |
10149 | (const_int 4) (const_int 6)])))) | |
5802c0cb | 10150 | (mult:V4SI |
10151 | (sign_extend:V4SI | |
10152 | (vec_select:V4HI (match_dup 1) | |
04d95c72 | 10153 | (parallel [(const_int 1) (const_int 3) |
10154 | (const_int 5) (const_int 7)]))) | |
5802c0cb | 10155 | (sign_extend:V4SI |
10156 | (vec_select:V4HI (match_dup 2) | |
04d95c72 | 10157 | (parallel [(const_int 1) (const_int 3) |
10158 | (const_int 5) (const_int 7)]))))))] | |
70169283 | 10159 | "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)" |
d8f82f6b | 10160 | "@ |
10161 | pmaddwd\t{%2, %0|%0, %2} | |
10162 | vpmaddwd\t{%2, %1, %0|%0, %1, %2}" | |
10163 | [(set_attr "isa" "noavx,avx") | |
10164 | (set_attr "type" "sseiadd") | |
fbfe006e | 10165 | (set_attr "atom_unit" "simul") |
d8f82f6b | 10166 | (set_attr "prefix_data16" "1,*") |
10167 | (set_attr "prefix" "orig,vex") | |
5802c0cb | 10168 | (set_attr "mode" "TI")]) |
10169 | ||
4c79b3a9 | 10170 | (define_insn "avx512dq_mul<mode>3<mask_name>" |
10171 | [(set (match_operand:VI8 0 "register_operand" "=v") | |
10172 | (mult:VI8 | |
10173 | (match_operand:VI8 1 "register_operand" "v") | |
10174 | (match_operand:VI8 2 "nonimmediate_operand" "vm")))] | |
10175 | "TARGET_AVX512DQ && <mask_mode512bit_condition>" | |
10176 | "vpmullq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}" | |
10177 | [(set_attr "type" "sseimul") | |
10178 | (set_attr "prefix" "evex") | |
10179 | (set_attr "mode" "<sseinsnmode>")]) | |
10180 | ||
5220cab6 | 10181 | (define_expand "mul<mode>3<mask_name>" |
c6cff444 | 10182 | [(set (match_operand:VI4_AVX512F 0 "register_operand") |
10183 | (mult:VI4_AVX512F | |
10184 | (match_operand:VI4_AVX512F 1 "general_vector_operand") | |
10185 | (match_operand:VI4_AVX512F 2 "general_vector_operand")))] | |
5220cab6 | 10186 | "TARGET_SSE2 && <mask_mode512bit_condition>" |
c195473e | 10187 | { |
3737d3e4 | 10188 | if (TARGET_SSE4_1) |
087cf0d3 | 10189 | { |
ebdfd365 | 10190 | if (!nonimmediate_operand (operands[1], <MODE>mode)) |
10191 | operands[1] = force_reg (<MODE>mode, operands[1]); | |
10192 | if (!nonimmediate_operand (operands[2], <MODE>mode)) | |
10193 | operands[2] = force_reg (<MODE>mode, operands[2]); | |
087cf0d3 | 10194 | ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands); |
10195 | } | |
10196 | else | |
10197 | { | |
10198 | ix86_expand_sse2_mulv4si3 (operands[0], operands[1], operands[2]); | |
10199 | DONE; | |
10200 | } | |
c195473e | 10201 | }) |
10202 | ||
5220cab6 | 10203 | (define_insn "*<sse4_1_avx2>_mul<mode>3<mask_name>" |
0a32b282 | 10204 | [(set (match_operand:VI4_AVX512F 0 "register_operand" "=Yr,*x,v") |
c6cff444 | 10205 | (mult:VI4_AVX512F |
0a32b282 | 10206 | (match_operand:VI4_AVX512F 1 "nonimmediate_operand" "%0,0,v") |
10207 | (match_operand:VI4_AVX512F 2 "nonimmediate_operand" "Yrm,*xm,vm")))] | |
5220cab6 | 10208 | "TARGET_SSE4_1 && ix86_binary_operator_ok (MULT, <MODE>mode, operands) && <mask_mode512bit_condition>" |
d8f82f6b | 10209 | "@ |
0a32b282 | 10210 | pmulld\t{%2, %0|%0, %2} |
d8f82f6b | 10211 | pmulld\t{%2, %0|%0, %2} |
5220cab6 | 10212 | vpmulld\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}" |
0a32b282 | 10213 | [(set_attr "isa" "noavx,noavx,avx") |
d8f82f6b | 10214 | (set_attr "type" "sseimul") |
2d771892 | 10215 | (set_attr "prefix_extra" "1") |
0a32b282 | 10216 | (set_attr "prefix" "<mask_prefix4>") |
10217 | (set_attr "btver2_decode" "vector,vector,vector") | |
5deb404d | 10218 | (set_attr "mode" "<sseinsnmode>")]) |
2d771892 | 10219 | |
04c4f045 | 10220 | (define_expand "mul<mode>3" |
c6cff444 | 10221 | [(set (match_operand:VI8_AVX2_AVX512F 0 "register_operand") |
10222 | (mult:VI8_AVX2_AVX512F | |
10223 | (match_operand:VI8_AVX2_AVX512F 1 "register_operand") | |
10224 | (match_operand:VI8_AVX2_AVX512F 2 "register_operand")))] | |
04c4f045 | 10225 | "TARGET_SSE2" |
4b26818b | 10226 | { |
04c4f045 | 10227 | ix86_expand_sse2_mulvxdi3 (operands[0], operands[1], operands[2]); |
4b26818b | 10228 | DONE; |
10229 | }) | |
10230 | ||
16e84e52 | 10231 | (define_expand "vec_widen_<s>mult_hi_<mode>" |
abd4f58b | 10232 | [(match_operand:<sseunpackmode> 0 "register_operand") |
16e84e52 | 10233 | (any_extend:<sseunpackmode> |
3737d3e4 | 10234 | (match_operand:VI124_AVX2 1 "register_operand")) |
10235 | (match_operand:VI124_AVX2 2 "register_operand")] | |
5cd92c37 | 10236 | "TARGET_SSE2" |
1f428eb0 | 10237 | { |
3737d3e4 | 10238 | ix86_expand_mul_widen_hilo (operands[0], operands[1], operands[2], |
10239 | <u_bool>, true); | |
1f428eb0 | 10240 | DONE; |
10241 | }) | |
10242 | ||
16e84e52 | 10243 | (define_expand "vec_widen_<s>mult_lo_<mode>" |
abd4f58b | 10244 | [(match_operand:<sseunpackmode> 0 "register_operand") |
16e84e52 | 10245 | (any_extend:<sseunpackmode> |
3737d3e4 | 10246 | (match_operand:VI124_AVX2 1 "register_operand")) |
10247 | (match_operand:VI124_AVX2 2 "register_operand")] | |
5cd92c37 | 10248 | "TARGET_SSE2" |
c6c91d61 | 10249 | { |
3737d3e4 | 10250 | ix86_expand_mul_widen_hilo (operands[0], operands[1], operands[2], |
10251 | <u_bool>, false); | |
c6c91d61 | 10252 | DONE; |
10253 | }) | |
10254 | ||
5cd92c37 | 10255 | ;; Most widen_<s>mult_even_<mode> can be handled directly from other |
10256 | ;; named patterns, but signed V4SI needs special help for plain SSE2. | |
10257 | (define_expand "vec_widen_smult_even_v4si" | |
10258 | [(match_operand:V2DI 0 "register_operand") | |
ebdfd365 | 10259 | (match_operand:V4SI 1 "nonimmediate_operand") |
10260 | (match_operand:V4SI 2 "nonimmediate_operand")] | |
5cd92c37 | 10261 | "TARGET_SSE2" |
10262 | { | |
10263 | ix86_expand_mul_widen_evenodd (operands[0], operands[1], operands[2], | |
10264 | false, false); | |
10265 | DONE; | |
10266 | }) | |
10267 | ||
de9b8545 | 10268 | (define_expand "vec_widen_<s>mult_odd_<mode>" |
10269 | [(match_operand:<sseunpackmode> 0 "register_operand") | |
10270 | (any_extend:<sseunpackmode> | |
c6cff444 | 10271 | (match_operand:VI4_AVX512F 1 "general_vector_operand")) |
10272 | (match_operand:VI4_AVX512F 2 "general_vector_operand")] | |
5cd92c37 | 10273 | "TARGET_SSE2" |
de9b8545 | 10274 | { |
10275 | ix86_expand_mul_widen_evenodd (operands[0], operands[1], operands[2], | |
10276 | <u_bool>, true); | |
10277 | DONE; | |
10278 | }) | |
10279 | ||
2d71b728 | 10280 | (define_mode_attr SDOT_PMADD_SUF |
10281 | [(V32HI "512v32hi") (V16HI "") (V8HI "")]) | |
10282 | ||
16e84e52 | 10283 | (define_expand "sdot_prod<mode>" |
abd4f58b | 10284 | [(match_operand:<sseunpackmode> 0 "register_operand") |
10285 | (match_operand:VI2_AVX2 1 "register_operand") | |
10286 | (match_operand:VI2_AVX2 2 "register_operand") | |
10287 | (match_operand:<sseunpackmode> 3 "register_operand")] | |
4a61a337 | 10288 | "TARGET_SSE2" |
10289 | { | |
16e84e52 | 10290 | rtx t = gen_reg_rtx (<sseunpackmode>mode); |
2d71b728 | 10291 | emit_insn (gen_<sse2_avx2>_pmaddwd<SDOT_PMADD_SUF> (t, operands[1], operands[2])); |
d1f9b275 | 10292 | emit_insn (gen_rtx_SET (operands[0], |
16e84e52 | 10293 | gen_rtx_PLUS (<sseunpackmode>mode, |
10294 | operands[3], t))); | |
4a61a337 | 10295 | DONE; |
10296 | }) | |
10297 | ||
48bb0beb | 10298 | ;; Normally we use widen_mul_even/odd, but combine can't quite get it all |
10299 | ;; back together when madd is available. | |
10300 | (define_expand "sdot_prodv4si" | |
abd4f58b | 10301 | [(match_operand:V2DI 0 "register_operand") |
48bb0beb | 10302 | (match_operand:V4SI 1 "register_operand") |
abd4f58b | 10303 | (match_operand:V4SI 2 "register_operand") |
10304 | (match_operand:V2DI 3 "register_operand")] | |
48bb0beb | 10305 | "TARGET_XOP" |
16e84e52 | 10306 | { |
48bb0beb | 10307 | rtx t = gen_reg_rtx (V2DImode); |
10308 | emit_insn (gen_xop_pmacsdqh (t, operands[1], operands[2], operands[3])); | |
10309 | emit_insn (gen_xop_pmacsdql (operands[0], operands[1], operands[2], t)); | |
16e84e52 | 10310 | DONE; |
10311 | }) | |
10312 | ||
a2287001 | 10313 | (define_expand "usadv16qi" |
10314 | [(match_operand:V4SI 0 "register_operand") | |
10315 | (match_operand:V16QI 1 "register_operand") | |
10316 | (match_operand:V16QI 2 "nonimmediate_operand") | |
10317 | (match_operand:V4SI 3 "nonimmediate_operand")] | |
10318 | "TARGET_SSE2" | |
10319 | { | |
10320 | rtx t1 = gen_reg_rtx (V2DImode); | |
10321 | rtx t2 = gen_reg_rtx (V4SImode); | |
10322 | emit_insn (gen_sse2_psadbw (t1, operands[1], operands[2])); | |
10323 | convert_move (t2, t1, 0); | |
10324 | emit_insn (gen_addv4si3 (operands[0], t2, operands[3])); | |
10325 | DONE; | |
10326 | }) | |
10327 | ||
10328 | (define_expand "usadv32qi" | |
10329 | [(match_operand:V8SI 0 "register_operand") | |
10330 | (match_operand:V32QI 1 "register_operand") | |
10331 | (match_operand:V32QI 2 "nonimmediate_operand") | |
10332 | (match_operand:V8SI 3 "nonimmediate_operand")] | |
10333 | "TARGET_AVX2" | |
10334 | { | |
10335 | rtx t1 = gen_reg_rtx (V4DImode); | |
10336 | rtx t2 = gen_reg_rtx (V8SImode); | |
10337 | emit_insn (gen_avx2_psadbw (t1, operands[1], operands[2])); | |
10338 | convert_move (t2, t1, 0); | |
10339 | emit_insn (gen_addv8si3 (operands[0], t2, operands[3])); | |
10340 | DONE; | |
10341 | }) | |
10342 | ||
5802c0cb | 10343 | (define_insn "ashr<mode>3" |
5deb404d | 10344 | [(set (match_operand:VI24_AVX2 0 "register_operand" "=x,x") |
10345 | (ashiftrt:VI24_AVX2 | |
10346 | (match_operand:VI24_AVX2 1 "register_operand" "0,x") | |
d8f82f6b | 10347 | (match_operand:SI 2 "nonmemory_operand" "xN,xN")))] |
5802c0cb | 10348 | "TARGET_SSE2" |
d8f82f6b | 10349 | "@ |
63d5e521 | 10350 | psra<ssemodesuffix>\t{%2, %0|%0, %2} |
10351 | vpsra<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}" | |
d8f82f6b | 10352 | [(set_attr "isa" "noavx,avx") |
10353 | (set_attr "type" "sseishft") | |
00a0e418 | 10354 | (set (attr "length_immediate") |
abd4f58b | 10355 | (if_then_else (match_operand 2 "const_int_operand") |
00a0e418 | 10356 | (const_string "1") |
10357 | (const_string "0"))) | |
d8f82f6b | 10358 | (set_attr "prefix_data16" "1,*") |
10359 | (set_attr "prefix" "orig,vex") | |
5deb404d | 10360 | (set_attr "mode" "<sseinsnmode>")]) |
10361 | ||
4f545baf | 10362 | (define_insn "<mask_codefor>ashr<mode>3<mask_name>" |
10363 | [(set (match_operand:VI24_AVX512BW_1 0 "register_operand" "=v,v") | |
10364 | (ashiftrt:VI24_AVX512BW_1 | |
10365 | (match_operand:VI24_AVX512BW_1 1 "nonimmediate_operand" "v,vm") | |
10366 | (match_operand:SI 2 "nonmemory_operand" "v,N")))] | |
10367 | "TARGET_AVX512VL" | |
10368 | "vpsra<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}" | |
10369 | [(set_attr "type" "sseishft") | |
10370 | (set (attr "length_immediate") | |
10371 | (if_then_else (match_operand 2 "const_int_operand") | |
10372 | (const_string "1") | |
10373 | (const_string "0"))) | |
10374 | (set_attr "mode" "<sseinsnmode>")]) | |
10375 | ||
10376 | (define_insn "<mask_codefor>ashrv2di3<mask_name>" | |
10377 | [(set (match_operand:V2DI 0 "register_operand" "=v,v") | |
10378 | (ashiftrt:V2DI | |
10379 | (match_operand:V2DI 1 "nonimmediate_operand" "v,vm") | |
10380 | (match_operand:DI 2 "nonmemory_operand" "v,N")))] | |
10381 | "TARGET_AVX512VL" | |
10382 | "vpsraq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}" | |
10383 | [(set_attr "type" "sseishft") | |
10384 | (set (attr "length_immediate") | |
10385 | (if_then_else (match_operand 2 "const_int_operand") | |
10386 | (const_string "1") | |
10387 | (const_string "0"))) | |
10388 | (set_attr "mode" "TI")]) | |
10389 | ||
5220cab6 | 10390 | (define_insn "ashr<mode>3<mask_name>" |
4f545baf | 10391 | [(set (match_operand:VI248_AVX512BW_AVX512VL 0 "register_operand" "=v,v") |
10392 | (ashiftrt:VI248_AVX512BW_AVX512VL | |
10393 | (match_operand:VI248_AVX512BW_AVX512VL 1 "nonimmediate_operand" "v,vm") | |
5220cab6 | 10394 | (match_operand:SI 2 "nonmemory_operand" "v,N")))] |
4f545baf | 10395 | "TARGET_AVX512F" |
5220cab6 | 10396 | "vpsra<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}" |
10397 | [(set_attr "type" "sseishft") | |
10398 | (set (attr "length_immediate") | |
10399 | (if_then_else (match_operand 2 "const_int_operand") | |
10400 | (const_string "1") | |
10401 | (const_string "0"))) | |
10402 | (set_attr "mode" "<sseinsnmode>")]) | |
10403 | ||
4055e076 | 10404 | (define_insn "<shift_insn><mode>3<mask_name>" |
10405 | [(set (match_operand:VI2_AVX2_AVX512BW 0 "register_operand" "=x,v") | |
10406 | (any_lshift:VI2_AVX2_AVX512BW | |
10407 | (match_operand:VI2_AVX2_AVX512BW 1 "register_operand" "0,v") | |
10408 | (match_operand:SI 2 "nonmemory_operand" "xN,vN")))] | |
10409 | "TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>" | |
10410 | "@ | |
10411 | p<vshift><ssemodesuffix>\t{%2, %0|%0, %2} | |
10412 | vp<vshift><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}" | |
10413 | [(set_attr "isa" "noavx,avx") | |
10414 | (set_attr "type" "sseishft") | |
10415 | (set (attr "length_immediate") | |
10416 | (if_then_else (match_operand 2 "const_int_operand") | |
10417 | (const_string "1") | |
10418 | (const_string "0"))) | |
10419 | (set_attr "prefix_data16" "1,*") | |
10420 | (set_attr "prefix" "orig,vex") | |
10421 | (set_attr "mode" "<sseinsnmode>")]) | |
10422 | ||
10423 | (define_insn "<shift_insn><mode>3<mask_name>" | |
10424 | [(set (match_operand:VI48_AVX2 0 "register_operand" "=x,v") | |
10425 | (any_lshift:VI48_AVX2 | |
10426 | (match_operand:VI48_AVX2 1 "register_operand" "0,v") | |
10427 | (match_operand:SI 2 "nonmemory_operand" "xN,vN")))] | |
10428 | "TARGET_SSE2 && <mask_mode512bit_condition>" | |
d8f82f6b | 10429 | "@ |
3297e0a4 | 10430 | p<vshift><ssemodesuffix>\t{%2, %0|%0, %2} |
4055e076 | 10431 | vp<vshift><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}" |
d8f82f6b | 10432 | [(set_attr "isa" "noavx,avx") |
10433 | (set_attr "type" "sseishft") | |
00a0e418 | 10434 | (set (attr "length_immediate") |
abd4f58b | 10435 | (if_then_else (match_operand 2 "const_int_operand") |
00a0e418 | 10436 | (const_string "1") |
10437 | (const_string "0"))) | |
d8f82f6b | 10438 | (set_attr "prefix_data16" "1,*") |
10439 | (set_attr "prefix" "orig,vex") | |
335adffd | 10440 | (set_attr "mode" "<sseinsnmode>")]) |
5802c0cb | 10441 | |
5220cab6 | 10442 | (define_insn "<shift_insn><mode>3<mask_name>" |
d2ff59d6 | 10443 | [(set (match_operand:VI48_512 0 "register_operand" "=v,v") |
10444 | (any_lshift:VI48_512 | |
23afdab7 | 10445 | (match_operand:VI48_512 1 "nonimmediate_operand" "v,m") |
d2ff59d6 | 10446 | (match_operand:SI 2 "nonmemory_operand" "vN,N")))] |
5220cab6 | 10447 | "TARGET_AVX512F && <mask_mode512bit_condition>" |
10448 | "vp<vshift><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}" | |
d2ff59d6 | 10449 | [(set_attr "isa" "avx512f") |
10450 | (set_attr "type" "sseishft") | |
10451 | (set (attr "length_immediate") | |
10452 | (if_then_else (match_operand 2 "const_int_operand") | |
10453 | (const_string "1") | |
10454 | (const_string "0"))) | |
10455 | (set_attr "prefix" "evex") | |
10456 | (set_attr "mode" "<sseinsnmode>")]) | |
10457 | ||
5220cab6 | 10458 | |
41b1d2cc | 10459 | (define_expand "vec_shl_<mode>" |
09e640e6 | 10460 | [(set (match_dup 3) |
5deb404d | 10461 | (ashift:V1TI |
abd4f58b | 10462 | (match_operand:VI_128 1 "register_operand") |
09e640e6 | 10463 | (match_operand:SI 2 "const_0_to_255_mul_8_operand"))) |
10464 | (set (match_operand:VI_128 0 "register_operand") (match_dup 4))] | |
41b1d2cc | 10465 | "TARGET_SSE2" |
10466 | { | |
c98fd3f6 | 10467 | operands[1] = gen_lowpart (V1TImode, operands[1]); |
09e640e6 | 10468 | operands[3] = gen_reg_rtx (V1TImode); |
10469 | operands[4] = gen_lowpart (<MODE>mode, operands[3]); | |
41b1d2cc | 10470 | }) |
10471 | ||
5deb404d | 10472 | (define_insn "<sse2_avx2>_ashl<mode>3" |
fd6b07be | 10473 | [(set (match_operand:VIMAX_AVX2 0 "register_operand" "=x,v") |
5deb404d | 10474 | (ashift:VIMAX_AVX2 |
fd6b07be | 10475 | (match_operand:VIMAX_AVX2 1 "register_operand" "0,v") |
d8f82f6b | 10476 | (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n,n")))] |
10477 | "TARGET_SSE2" | |
10478 | { | |
10479 | operands[2] = GEN_INT (INTVAL (operands[2]) / 8); | |
10480 | ||
10481 | switch (which_alternative) | |
10482 | { | |
10483 | case 0: | |
10484 | return "pslldq\t{%2, %0|%0, %2}"; | |
10485 | case 1: | |
10486 | return "vpslldq\t{%2, %1, %0|%0, %1, %2}"; | |
10487 | default: | |
10488 | gcc_unreachable (); | |
10489 | } | |
10490 | } | |
10491 | [(set_attr "isa" "noavx,avx") | |
10492 | (set_attr "type" "sseishft") | |
10493 | (set_attr "length_immediate" "1") | |
10494 | (set_attr "prefix_data16" "1,*") | |
10495 | (set_attr "prefix" "orig,vex") | |
5deb404d | 10496 | (set_attr "mode" "<sseinsnmode>")]) |
d8f82f6b | 10497 | |
41b1d2cc | 10498 | (define_expand "vec_shr_<mode>" |
09e640e6 | 10499 | [(set (match_dup 3) |
5deb404d | 10500 | (lshiftrt:V1TI |
abd4f58b | 10501 | (match_operand:VI_128 1 "register_operand") |
09e640e6 | 10502 | (match_operand:SI 2 "const_0_to_255_mul_8_operand"))) |
10503 | (set (match_operand:VI_128 0 "register_operand") (match_dup 4))] | |
41b1d2cc | 10504 | "TARGET_SSE2" |
10505 | { | |
c98fd3f6 | 10506 | operands[1] = gen_lowpart (V1TImode, operands[1]); |
09e640e6 | 10507 | operands[3] = gen_reg_rtx (V1TImode); |
10508 | operands[4] = gen_lowpart (<MODE>mode, operands[3]); | |
41b1d2cc | 10509 | }) |
10510 | ||
bb7ad312 | 10511 | (define_insn "<sse2_avx2>_lshr<mode>3" |
fd6b07be | 10512 | [(set (match_operand:VIMAX_AVX2 0 "register_operand" "=x,v") |
bb7ad312 | 10513 | (lshiftrt:VIMAX_AVX2 |
fd6b07be | 10514 | (match_operand:VIMAX_AVX2 1 "register_operand" "0,v") |
d8f82f6b | 10515 | (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n,n")))] |
5802c0cb | 10516 | "TARGET_SSE2" |
d8f82f6b | 10517 | { |
10518 | operands[2] = GEN_INT (INTVAL (operands[2]) / 8); | |
5802c0cb | 10519 | |
d8f82f6b | 10520 | switch (which_alternative) |
10521 | { | |
10522 | case 0: | |
10523 | return "psrldq\t{%2, %0|%0, %2}"; | |
10524 | case 1: | |
10525 | return "vpsrldq\t{%2, %1, %0|%0, %1, %2}"; | |
10526 | default: | |
10527 | gcc_unreachable (); | |
10528 | } | |
10529 | } | |
10530 | [(set_attr "isa" "noavx,avx") | |
10531 | (set_attr "type" "sseishft") | |
10532 | (set_attr "length_immediate" "1") | |
10533 | (set_attr "atom_unit" "sishuf") | |
10534 | (set_attr "prefix_data16" "1,*") | |
10535 | (set_attr "prefix" "orig,vex") | |
bb7ad312 | 10536 | (set_attr "mode" "<sseinsnmode>")]) |
10537 | ||
3d038641 | 10538 | (define_insn "<avx512>_<rotate>v<mode><mask_name>" |
10539 | [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v") | |
10540 | (any_rotate:VI48_AVX512VL | |
10541 | (match_operand:VI48_AVX512VL 1 "register_operand" "v") | |
10542 | (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "vm")))] | |
d2ff59d6 | 10543 | "TARGET_AVX512F" |
5220cab6 | 10544 | "vp<rotate>v<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}" |
d2ff59d6 | 10545 | [(set_attr "prefix" "evex") |
10546 | (set_attr "mode" "<sseinsnmode>")]) | |
10547 | ||
3d038641 | 10548 | (define_insn "<avx512>_<rotate><mode><mask_name>" |
10549 | [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v") | |
10550 | (any_rotate:VI48_AVX512VL | |
10551 | (match_operand:VI48_AVX512VL 1 "nonimmediate_operand" "vm") | |
d2ff59d6 | 10552 | (match_operand:SI 2 "const_0_to_255_operand")))] |
10553 | "TARGET_AVX512F" | |
5220cab6 | 10554 | "vp<rotate><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}" |
d2ff59d6 | 10555 | [(set_attr "prefix" "evex") |
10556 | (set_attr "mode" "<sseinsnmode>")]) | |
2af8cce6 | 10557 | |
5dd4f649 | 10558 | (define_expand "<code><mode>3" |
10559 | [(set (match_operand:VI124_256_AVX512F_AVX512BW 0 "register_operand") | |
10560 | (maxmin:VI124_256_AVX512F_AVX512BW | |
10561 | (match_operand:VI124_256_AVX512F_AVX512BW 1 "nonimmediate_operand") | |
10562 | (match_operand:VI124_256_AVX512F_AVX512BW 2 "nonimmediate_operand")))] | |
10563 | "TARGET_AVX2" | |
bb7ad312 | 10564 | "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);") |
10565 | ||
5dd4f649 | 10566 | (define_insn "*avx2_<code><mode>3" |
10567 | [(set (match_operand:VI124_256 0 "register_operand" "=v") | |
10568 | (maxmin:VI124_256 | |
10569 | (match_operand:VI124_256 1 "nonimmediate_operand" "%v") | |
10570 | (match_operand:VI124_256 2 "nonimmediate_operand" "vm")))] | |
10571 | "TARGET_AVX2 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)" | |
10572 | "vp<maxmin_int><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}" | |
bb7ad312 | 10573 | [(set_attr "type" "sseiadd") |
10574 | (set_attr "prefix_extra" "1") | |
5dd4f649 | 10575 | (set_attr "prefix" "vex") |
bb7ad312 | 10576 | (set_attr "mode" "OI")]) |
5802c0cb | 10577 | |
5dd4f649 | 10578 | (define_expand "<code><mode>3_mask" |
10579 | [(set (match_operand:VI48_AVX512VL 0 "register_operand") | |
10580 | (vec_merge:VI48_AVX512VL | |
10581 | (maxmin:VI48_AVX512VL | |
10582 | (match_operand:VI48_AVX512VL 1 "nonimmediate_operand") | |
10583 | (match_operand:VI48_AVX512VL 2 "nonimmediate_operand")) | |
10584 | (match_operand:VI48_AVX512VL 3 "vector_move_operand") | |
10585 | (match_operand:<avx512fmaskmode> 4 "register_operand")))] | |
10586 | "TARGET_AVX512F" | |
10587 | "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);") | |
10588 | ||
10589 | (define_insn "*avx512bw_<code><mode>3<mask_name>" | |
10590 | [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v") | |
10591 | (maxmin:VI48_AVX512VL | |
10592 | (match_operand:VI48_AVX512VL 1 "nonimmediate_operand" "%v") | |
10593 | (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "vm")))] | |
10594 | "TARGET_AVX512F && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)" | |
10595 | "vp<maxmin_int><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}" | |
10596 | [(set_attr "type" "sseiadd") | |
10597 | (set_attr "prefix_extra" "1") | |
10598 | (set_attr "prefix" "maybe_evex") | |
10599 | (set_attr "mode" "<sseinsnmode>")]) | |
10600 | ||
b02673c1 | 10601 | (define_insn "<mask_codefor><code><mode>3<mask_name>" |
5dd4f649 | 10602 | [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v") |
10603 | (maxmin:VI12_AVX512VL | |
10604 | (match_operand:VI12_AVX512VL 1 "register_operand" "v") | |
10605 | (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm")))] | |
10606 | "TARGET_AVX512BW" | |
b02673c1 | 10607 | "vp<maxmin_int><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}" |
10608 | [(set_attr "type" "sseiadd") | |
10609 | (set_attr "prefix" "evex") | |
10610 | (set_attr "mode" "<sseinsnmode>")]) | |
10611 | ||
230eb963 | 10612 | (define_expand "<code><mode>3" |
5dd4f649 | 10613 | [(set (match_operand:VI8_AVX2_AVX512BW 0 "register_operand") |
10614 | (maxmin:VI8_AVX2_AVX512BW | |
10615 | (match_operand:VI8_AVX2_AVX512BW 1 "register_operand") | |
10616 | (match_operand:VI8_AVX2_AVX512BW 2 "register_operand")))] | |
2af8cce6 | 10617 | "TARGET_SSE4_2" |
10618 | { | |
5dd4f649 | 10619 | if (TARGET_AVX512F |
10620 | && (<MODE>mode == V8DImode || TARGET_AVX512VL)) | |
10621 | ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands); | |
10622 | else | |
10623 | { | |
10624 | enum rtx_code code; | |
10625 | rtx xops[6]; | |
10626 | bool ok; | |
5deb404d | 10627 | |
2af8cce6 | 10628 | |
5dd4f649 | 10629 | xops[0] = operands[0]; |
2af8cce6 | 10630 | |
5dd4f649 | 10631 | if (<CODE> == SMAX || <CODE> == UMAX) |
10632 | { | |
10633 | xops[1] = operands[1]; | |
10634 | xops[2] = operands[2]; | |
10635 | } | |
10636 | else | |
10637 | { | |
10638 | xops[1] = operands[2]; | |
10639 | xops[2] = operands[1]; | |
10640 | } | |
2af8cce6 | 10641 | |
5dd4f649 | 10642 | code = (<CODE> == UMAX || <CODE> == UMIN) ? GTU : GT; |
2af8cce6 | 10643 | |
5dd4f649 | 10644 | xops[3] = gen_rtx_fmt_ee (code, VOIDmode, operands[1], operands[2]); |
10645 | xops[4] = operands[1]; | |
10646 | xops[5] = operands[2]; | |
10647 | ||
10648 | ok = ix86_expand_int_vcond (xops); | |
10649 | gcc_assert (ok); | |
10650 | DONE; | |
10651 | } | |
2af8cce6 | 10652 | }) |
10653 | ||
10654 | (define_expand "<code><mode>3" | |
abd4f58b | 10655 | [(set (match_operand:VI124_128 0 "register_operand") |
885c8b76 | 10656 | (smaxmin:VI124_128 |
abd4f58b | 10657 | (match_operand:VI124_128 1 "nonimmediate_operand") |
10658 | (match_operand:VI124_128 2 "nonimmediate_operand")))] | |
2af8cce6 | 10659 | "TARGET_SSE2" |
10660 | { | |
10661 | if (TARGET_SSE4_1 || <MODE>mode == V8HImode) | |
10662 | ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands); | |
10663 | else | |
10664 | { | |
10665 | rtx xops[6]; | |
10666 | bool ok; | |
10667 | ||
10668 | xops[0] = operands[0]; | |
23d77b6d | 10669 | operands[1] = force_reg (<MODE>mode, operands[1]); |
10670 | operands[2] = force_reg (<MODE>mode, operands[2]); | |
2af8cce6 | 10671 | |
10672 | if (<CODE> == SMAX) | |
10673 | { | |
10674 | xops[1] = operands[1]; | |
10675 | xops[2] = operands[2]; | |
10676 | } | |
10677 | else | |
10678 | { | |
10679 | xops[1] = operands[2]; | |
10680 | xops[2] = operands[1]; | |
10681 | } | |
10682 | ||
10683 | xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]); | |
10684 | xops[4] = operands[1]; | |
10685 | xops[5] = operands[2]; | |
10686 | ||
10687 | ok = ix86_expand_int_vcond (xops); | |
10688 | gcc_assert (ok); | |
10689 | DONE; | |
10690 | } | |
10691 | }) | |
5deb404d | 10692 | |
0bdab484 | 10693 | (define_insn "*sse4_1_<code><mode>3<mask_name>" |
0a32b282 | 10694 | [(set (match_operand:VI14_128 0 "register_operand" "=Yr,*x,v") |
d8f82f6b | 10695 | (smaxmin:VI14_128 |
0a32b282 | 10696 | (match_operand:VI14_128 1 "nonimmediate_operand" "%0,0,v") |
10697 | (match_operand:VI14_128 2 "nonimmediate_operand" "Yrm,*xm,vm")))] | |
0bdab484 | 10698 | "TARGET_SSE4_1 |
10699 | && <mask_mode512bit_condition> | |
10700 | && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)" | |
d8f82f6b | 10701 | "@ |
0a32b282 | 10702 | p<maxmin_int><ssemodesuffix>\t{%2, %0|%0, %2} |
63d5e521 | 10703 | p<maxmin_int><ssemodesuffix>\t{%2, %0|%0, %2} |
0bdab484 | 10704 | vp<maxmin_int><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}" |
0a32b282 | 10705 | [(set_attr "isa" "noavx,noavx,avx") |
d8f82f6b | 10706 | (set_attr "type" "sseiadd") |
0a32b282 | 10707 | (set_attr "prefix_extra" "1,1,*") |
10708 | (set_attr "prefix" "orig,orig,vex") | |
1f46c1d5 | 10709 | (set_attr "mode" "TI")]) |
10710 | ||
9409fce7 | 10711 | (define_insn "*<code>v8hi3" |
d8f82f6b | 10712 | [(set (match_operand:V8HI 0 "register_operand" "=x,x") |
9409fce7 | 10713 | (smaxmin:V8HI |
d8f82f6b | 10714 | (match_operand:V8HI 1 "nonimmediate_operand" "%0,x") |
10715 | (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")))] | |
9409fce7 | 10716 | "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, V8HImode, operands)" |
d8f82f6b | 10717 | "@ |
10718 | p<maxmin_int>w\t{%2, %0|%0, %2} | |
10719 | vp<maxmin_int>w\t{%2, %1, %0|%0, %1, %2}" | |
10720 | [(set_attr "isa" "noavx,avx") | |
10721 | (set_attr "type" "sseiadd") | |
10722 | (set_attr "prefix_data16" "1,*") | |
10723 | (set_attr "prefix_extra" "*,1") | |
10724 | (set_attr "prefix" "orig,vex") | |
5802c0cb | 10725 | (set_attr "mode" "TI")]) |
10726 | ||
230eb963 | 10727 | (define_expand "<code><mode>3" |
abd4f58b | 10728 | [(set (match_operand:VI124_128 0 "register_operand") |
885c8b76 | 10729 | (umaxmin:VI124_128 |
abd4f58b | 10730 | (match_operand:VI124_128 1 "nonimmediate_operand") |
10731 | (match_operand:VI124_128 2 "nonimmediate_operand")))] | |
e313c83f | 10732 | "TARGET_SSE2" |
10733 | { | |
2af8cce6 | 10734 | if (TARGET_SSE4_1 || <MODE>mode == V16QImode) |
230eb963 | 10735 | ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands); |
2af8cce6 | 10736 | else if (<CODE> == UMAX && <MODE>mode == V8HImode) |
10737 | { | |
10738 | rtx op0 = operands[0], op2 = operands[2], op3 = op0; | |
23d77b6d | 10739 | operands[1] = force_reg (<MODE>mode, operands[1]); |
2af8cce6 | 10740 | if (rtx_equal_p (op3, op2)) |
10741 | op3 = gen_reg_rtx (V8HImode); | |
10742 | emit_insn (gen_sse2_ussubv8hi3 (op3, operands[1], op2)); | |
10743 | emit_insn (gen_addv8hi3 (op0, op3, op2)); | |
10744 | DONE; | |
10745 | } | |
2d771892 | 10746 | else |
5bd1ff1d | 10747 | { |
10748 | rtx xops[6]; | |
10749 | bool ok; | |
10750 | ||
23d77b6d | 10751 | operands[1] = force_reg (<MODE>mode, operands[1]); |
10752 | operands[2] = force_reg (<MODE>mode, operands[2]); | |
10753 | ||
5bd1ff1d | 10754 | xops[0] = operands[0]; |
2af8cce6 | 10755 | |
10756 | if (<CODE> == UMAX) | |
10757 | { | |
10758 | xops[1] = operands[1]; | |
10759 | xops[2] = operands[2]; | |
10760 | } | |
10761 | else | |
10762 | { | |
10763 | xops[1] = operands[2]; | |
10764 | xops[2] = operands[1]; | |
10765 | } | |
10766 | ||
10767 | xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]); | |
5bd1ff1d | 10768 | xops[4] = operands[1]; |
10769 | xops[5] = operands[2]; | |
2af8cce6 | 10770 | |
5bd1ff1d | 10771 | ok = ix86_expand_int_vcond (xops); |
10772 | gcc_assert (ok); | |
10773 | DONE; | |
10774 | } | |
41b1d2cc | 10775 | }) |
10776 | ||
0bdab484 | 10777 | (define_insn "*sse4_1_<code><mode>3<mask_name>" |
0a32b282 | 10778 | [(set (match_operand:VI24_128 0 "register_operand" "=Yr,*x,v") |
d8f82f6b | 10779 | (umaxmin:VI24_128 |
0a32b282 | 10780 | (match_operand:VI24_128 1 "nonimmediate_operand" "%0,0,v") |
10781 | (match_operand:VI24_128 2 "nonimmediate_operand" "Yrm,*xm,vm")))] | |
0bdab484 | 10782 | "TARGET_SSE4_1 |
10783 | && <mask_mode512bit_condition> | |
10784 | && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)" | |
d8f82f6b | 10785 | "@ |
0a32b282 | 10786 | p<maxmin_int><ssemodesuffix>\t{%2, %0|%0, %2} |
63d5e521 | 10787 | p<maxmin_int><ssemodesuffix>\t{%2, %0|%0, %2} |
0bdab484 | 10788 | vp<maxmin_int><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}" |
0a32b282 | 10789 | [(set_attr "isa" "noavx,noavx,avx") |
d8f82f6b | 10790 | (set_attr "type" "sseiadd") |
0a32b282 | 10791 | (set_attr "prefix_extra" "1,1,*") |
10792 | (set_attr "prefix" "orig,orig,vex") | |
d8f82f6b | 10793 | (set_attr "mode" "TI")]) |
10794 | ||
10795 | (define_insn "*<code>v16qi3" | |
10796 | [(set (match_operand:V16QI 0 "register_operand" "=x,x") | |
10797 | (umaxmin:V16QI | |
10798 | (match_operand:V16QI 1 "nonimmediate_operand" "%0,x") | |
10799 | (match_operand:V16QI 2 "nonimmediate_operand" "xm,xm")))] | |
10800 | "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, V16QImode, operands)" | |
10801 | "@ | |
10802 | p<maxmin_int>b\t{%2, %0|%0, %2} | |
10803 | vp<maxmin_int>b\t{%2, %1, %0|%0, %1, %2}" | |
10804 | [(set_attr "isa" "noavx,avx") | |
10805 | (set_attr "type" "sseiadd") | |
10806 | (set_attr "prefix_data16" "1,*") | |
10807 | (set_attr "prefix_extra" "*,1") | |
10808 | (set_attr "prefix" "orig,vex") | |
10809 | (set_attr "mode" "TI")]) | |
10810 | ||
5802c0cb | 10811 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; |
10812 | ;; | |
10813 | ;; Parallel integral comparisons | |
10814 | ;; | |
10815 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | |
10816 | ||
5deb404d | 10817 | (define_expand "avx2_eq<mode>3" |
abd4f58b | 10818 | [(set (match_operand:VI_256 0 "register_operand") |
18594fc6 | 10819 | (eq:VI_256 |
abd4f58b | 10820 | (match_operand:VI_256 1 "nonimmediate_operand") |
10821 | (match_operand:VI_256 2 "nonimmediate_operand")))] | |
5deb404d | 10822 | "TARGET_AVX2" |
10823 | "ix86_fixup_binary_operands_no_copy (EQ, <MODE>mode, operands);") | |
10824 | ||
10825 | (define_insn "*avx2_eq<mode>3" | |
18594fc6 | 10826 | [(set (match_operand:VI_256 0 "register_operand" "=x") |
10827 | (eq:VI_256 | |
10828 | (match_operand:VI_256 1 "nonimmediate_operand" "%x") | |
10829 | (match_operand:VI_256 2 "nonimmediate_operand" "xm")))] | |
5deb404d | 10830 | "TARGET_AVX2 && ix86_binary_operator_ok (EQ, <MODE>mode, operands)" |
10831 | "vpcmpeq<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}" | |
10832 | [(set_attr "type" "ssecmp") | |
10833 | (set_attr "prefix_extra" "1") | |
10834 | (set_attr "prefix" "vex") | |
10835 | (set_attr "mode" "OI")]) | |
10836 | ||
6b76cef2 | 10837 | (define_expand "<avx512>_eq<mode>3<mask_scalar_merge_name>" |
10838 | [(set (match_operand:<avx512fmaskmode> 0 "register_operand") | |
10839 | (unspec:<avx512fmaskmode> | |
10840 | [(match_operand:VI12_AVX512VL 1 "register_operand") | |
10841 | (match_operand:VI12_AVX512VL 2 "nonimmediate_operand")] | |
10842 | UNSPEC_MASKED_EQ))] | |
10843 | "TARGET_AVX512BW" | |
10844 | "ix86_fixup_binary_operands_no_copy (EQ, <MODE>mode, operands);") | |
10845 | ||
10846 | (define_expand "<avx512>_eq<mode>3<mask_scalar_merge_name>" | |
d2ff59d6 | 10847 | [(set (match_operand:<avx512fmaskmode> 0 "register_operand") |
10848 | (unspec:<avx512fmaskmode> | |
6b76cef2 | 10849 | [(match_operand:VI48_AVX512VL 1 "register_operand") |
10850 | (match_operand:VI48_AVX512VL 2 "nonimmediate_operand")] | |
d2ff59d6 | 10851 | UNSPEC_MASKED_EQ))] |
10852 | "TARGET_AVX512F" | |
10853 | "ix86_fixup_binary_operands_no_copy (EQ, <MODE>mode, operands);") | |
10854 | ||
6b76cef2 | 10855 | (define_insn "<avx512>_eq<mode>3<mask_scalar_merge_name>_1" |
a31e7f46 | 10856 | [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk") |
d2ff59d6 | 10857 | (unspec:<avx512fmaskmode> |
6b76cef2 | 10858 | [(match_operand:VI12_AVX512VL 1 "register_operand" "%v") |
10859 | (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm")] | |
10860 | UNSPEC_MASKED_EQ))] | |
10861 | "TARGET_AVX512F && ix86_binary_operator_ok (EQ, <MODE>mode, operands)" | |
10862 | "vpcmpeq<ssemodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}" | |
10863 | [(set_attr "type" "ssecmp") | |
10864 | (set_attr "prefix_extra" "1") | |
10865 | (set_attr "prefix" "evex") | |
10866 | (set_attr "mode" "<sseinsnmode>")]) | |
10867 | ||
10868 | (define_insn "<avx512>_eq<mode>3<mask_scalar_merge_name>_1" | |
10869 | [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk") | |
10870 | (unspec:<avx512fmaskmode> | |
10871 | [(match_operand:VI48_AVX512VL 1 "register_operand" "%v") | |
10872 | (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "vm")] | |
d2ff59d6 | 10873 | UNSPEC_MASKED_EQ))] |
10874 | "TARGET_AVX512F && ix86_binary_operator_ok (EQ, <MODE>mode, operands)" | |
c3d9b089 | 10875 | "vpcmpeq<ssemodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}" |
d2ff59d6 | 10876 | [(set_attr "type" "ssecmp") |
10877 | (set_attr "prefix_extra" "1") | |
10878 | (set_attr "prefix" "evex") | |
10879 | (set_attr "mode" "<sseinsnmode>")]) | |
10880 | ||
d8f82f6b | 10881 | (define_insn "*sse4_1_eqv2di3" |
0a32b282 | 10882 | [(set (match_operand:V2DI 0 "register_operand" "=Yr,*x,x") |
d8f82f6b | 10883 | (eq:V2DI |
0a32b282 | 10884 | (match_operand:V2DI 1 "nonimmediate_operand" "%0,0,x") |
10885 | (match_operand:V2DI 2 "nonimmediate_operand" "Yrm,*xm,xm")))] | |
d8f82f6b | 10886 | "TARGET_SSE4_1 && ix86_binary_operator_ok (EQ, V2DImode, operands)" |
10887 | "@ | |
0a32b282 | 10888 | pcmpeqq\t{%2, %0|%0, %2} |
d8f82f6b | 10889 | pcmpeqq\t{%2, %0|%0, %2} |
10890 | vpcmpeqq\t{%2, %1, %0|%0, %1, %2}" | |
0a32b282 | 10891 | [(set_attr "isa" "noavx,noavx,avx") |
d8f82f6b | 10892 | (set_attr "type" "ssecmp") |
10893 | (set_attr "prefix_extra" "1") | |
0a32b282 | 10894 | (set_attr "prefix" "orig,orig,vex") |
ed30e0a6 | 10895 | (set_attr "mode" "TI")]) |
10896 | ||
7c839b3f | 10897 | (define_insn "*sse2_eq<mode>3" |
d8f82f6b | 10898 | [(set (match_operand:VI124_128 0 "register_operand" "=x,x") |
10899 | (eq:VI124_128 | |
10900 | (match_operand:VI124_128 1 "nonimmediate_operand" "%0,x") | |
10901 | (match_operand:VI124_128 2 "nonimmediate_operand" "xm,xm")))] | |
18525343 | 10902 | "TARGET_SSE2 && !TARGET_XOP |
448e99f5 | 10903 | && ix86_binary_operator_ok (EQ, <MODE>mode, operands)" |
d8f82f6b | 10904 | "@ |
63d5e521 | 10905 | pcmpeq<ssemodesuffix>\t{%2, %0|%0, %2} |
10906 | vpcmpeq<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}" | |
d8f82f6b | 10907 | [(set_attr "isa" "noavx,avx") |
10908 | (set_attr "type" "ssecmp") | |
10909 | (set_attr "prefix_data16" "1,*") | |
10910 | (set_attr "prefix" "orig,vex") | |
5802c0cb | 10911 | (set_attr "mode" "TI")]) |
10912 | ||
d8f82f6b | 10913 | (define_expand "sse2_eq<mode>3" |
abd4f58b | 10914 | [(set (match_operand:VI124_128 0 "register_operand") |
d8f82f6b | 10915 | (eq:VI124_128 |
abd4f58b | 10916 | (match_operand:VI124_128 1 "nonimmediate_operand") |
10917 | (match_operand:VI124_128 2 "nonimmediate_operand")))] | |
d8f82f6b | 10918 | "TARGET_SSE2 && !TARGET_XOP " |
10919 | "ix86_fixup_binary_operands_no_copy (EQ, <MODE>mode, operands);") | |
10920 | ||
7c839b3f | 10921 | (define_expand "sse4_1_eqv2di3" |
abd4f58b | 10922 | [(set (match_operand:V2DI 0 "register_operand") |
7c839b3f | 10923 | (eq:V2DI |
abd4f58b | 10924 | (match_operand:V2DI 1 "nonimmediate_operand") |
10925 | (match_operand:V2DI 2 "nonimmediate_operand")))] | |
7c839b3f | 10926 | "TARGET_SSE4_1" |
10927 | "ix86_fixup_binary_operands_no_copy (EQ, V2DImode, operands);") | |
10928 | ||
d8f82f6b | 10929 | (define_insn "sse4_2_gtv2di3" |
0a32b282 | 10930 | [(set (match_operand:V2DI 0 "register_operand" "=Yr,*x,x") |
d8f82f6b | 10931 | (gt:V2DI |
0a32b282 | 10932 | (match_operand:V2DI 1 "register_operand" "0,0,x") |
10933 | (match_operand:V2DI 2 "nonimmediate_operand" "Yrm,*xm,xm")))] | |
d8f82f6b | 10934 | "TARGET_SSE4_2" |
10935 | "@ | |
0a32b282 | 10936 | pcmpgtq\t{%2, %0|%0, %2} |
d8f82f6b | 10937 | pcmpgtq\t{%2, %0|%0, %2} |
10938 | vpcmpgtq\t{%2, %1, %0|%0, %1, %2}" | |
0a32b282 | 10939 | [(set_attr "isa" "noavx,noavx,avx") |
d8f82f6b | 10940 | (set_attr "type" "ssecmp") |
2d771892 | 10941 | (set_attr "prefix_extra" "1") |
0a32b282 | 10942 | (set_attr "prefix" "orig,orig,vex") |
ed30e0a6 | 10943 | (set_attr "mode" "TI")]) |
10944 | ||
5deb404d | 10945 | (define_insn "avx2_gt<mode>3" |
18594fc6 | 10946 | [(set (match_operand:VI_256 0 "register_operand" "=x") |
10947 | (gt:VI_256 | |
10948 | (match_operand:VI_256 1 "register_operand" "x") | |
10949 | (match_operand:VI_256 2 "nonimmediate_operand" "xm")))] | |
5deb404d | 10950 | "TARGET_AVX2" |
10951 | "vpcmpgt<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}" | |
10952 | [(set_attr "type" "ssecmp") | |
10953 | (set_attr "prefix_extra" "1") | |
10954 | (set_attr "prefix" "vex") | |
10955 | (set_attr "mode" "OI")]) | |
10956 | ||
6b76cef2 | 10957 | (define_insn "<avx512>_gt<mode>3<mask_scalar_merge_name>" |
a31e7f46 | 10958 | [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk") |
d2ff59d6 | 10959 | (unspec:<avx512fmaskmode> |
6b76cef2 | 10960 | [(match_operand:VI48_AVX512VL 1 "register_operand" "v") |
10961 | (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "vm")] UNSPEC_MASKED_GT))] | |
d2ff59d6 | 10962 | "TARGET_AVX512F" |
c3d9b089 | 10963 | "vpcmpgt<ssemodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}" |
d2ff59d6 | 10964 | [(set_attr "type" "ssecmp") |
10965 | (set_attr "prefix_extra" "1") | |
10966 | (set_attr "prefix" "evex") | |
10967 | (set_attr "mode" "<sseinsnmode>")]) | |
10968 | ||
6b76cef2 | 10969 | (define_insn "<avx512>_gt<mode>3<mask_scalar_merge_name>" |
10970 | [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk") | |
10971 | (unspec:<avx512fmaskmode> | |
10972 | [(match_operand:VI12_AVX512VL 1 "register_operand" "v") | |
10973 | (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm")] UNSPEC_MASKED_GT))] | |
10974 | "TARGET_AVX512BW" | |
10975 | "vpcmpgt<ssemodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}" | |
10976 | [(set_attr "type" "ssecmp") | |
10977 | (set_attr "prefix_extra" "1") | |
10978 | (set_attr "prefix" "evex") | |
10979 | (set_attr "mode" "<sseinsnmode>")]) | |
10980 | ||
5802c0cb | 10981 | (define_insn "sse2_gt<mode>3" |
d8f82f6b | 10982 | [(set (match_operand:VI124_128 0 "register_operand" "=x,x") |
10983 | (gt:VI124_128 | |
10984 | (match_operand:VI124_128 1 "register_operand" "0,x") | |
10985 | (match_operand:VI124_128 2 "nonimmediate_operand" "xm,xm")))] | |
18525343 | 10986 | "TARGET_SSE2 && !TARGET_XOP" |
d8f82f6b | 10987 | "@ |
63d5e521 | 10988 | pcmpgt<ssemodesuffix>\t{%2, %0|%0, %2} |
10989 | vpcmpgt<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}" | |
d8f82f6b | 10990 | [(set_attr "isa" "noavx,avx") |
10991 | (set_attr "type" "ssecmp") | |
10992 | (set_attr "prefix_data16" "1,*") | |
10993 | (set_attr "prefix" "orig,vex") | |
f25d51c3 | 10994 | (set_attr "mode" "TI")]) |
10995 | ||
f23a3158 | 10996 | (define_expand "vcond<V_512:mode><VI_512:mode>" |
10997 | [(set (match_operand:V_512 0 "register_operand") | |
10998 | (if_then_else:V_512 | |
10999 | (match_operator 3 "" | |
11000 | [(match_operand:VI_512 4 "nonimmediate_operand") | |
11001 | (match_operand:VI_512 5 "general_operand")]) | |
11002 | (match_operand:V_512 1) | |
11003 | (match_operand:V_512 2)))] | |
11004 | "TARGET_AVX512F | |
11005 | && (GET_MODE_NUNITS (<V_512:MODE>mode) | |
11006 | == GET_MODE_NUNITS (<VI_512:MODE>mode))" | |
11007 | { | |
11008 | bool ok = ix86_expand_int_vcond (operands); | |
11009 | gcc_assert (ok); | |
11010 | DONE; | |
11011 | }) | |
11012 | ||
230eb963 | 11013 | (define_expand "vcond<V_256:mode><VI_256:mode>" |
abd4f58b | 11014 | [(set (match_operand:V_256 0 "register_operand") |
230eb963 | 11015 | (if_then_else:V_256 |
11016 | (match_operator 3 "" | |
abd4f58b | 11017 | [(match_operand:VI_256 4 "nonimmediate_operand") |
11018 | (match_operand:VI_256 5 "general_operand")]) | |
11019 | (match_operand:V_256 1) | |
11020 | (match_operand:V_256 2)))] | |
230eb963 | 11021 | "TARGET_AVX2 |
11022 | && (GET_MODE_NUNITS (<V_256:MODE>mode) | |
11023 | == GET_MODE_NUNITS (<VI_256:MODE>mode))" | |
11024 | { | |
11025 | bool ok = ix86_expand_int_vcond (operands); | |
11026 | gcc_assert (ok); | |
11027 | DONE; | |
11028 | }) | |
11029 | ||
d6b19f6b | 11030 | (define_expand "vcond<V_128:mode><VI124_128:mode>" |
abd4f58b | 11031 | [(set (match_operand:V_128 0 "register_operand") |
d6b19f6b | 11032 | (if_then_else:V_128 |
5deb404d | 11033 | (match_operator 3 "" |
abd4f58b | 11034 | [(match_operand:VI124_128 4 "nonimmediate_operand") |
11035 | (match_operand:VI124_128 5 "general_operand")]) | |
11036 | (match_operand:V_128 1) | |
11037 | (match_operand:V_128 2)))] | |
d6b19f6b | 11038 | "TARGET_SSE2 |
11039 | && (GET_MODE_NUNITS (<V_128:MODE>mode) | |
11040 | == GET_MODE_NUNITS (<VI124_128:MODE>mode))" | |
76405cce | 11041 | { |
17e313b0 | 11042 | bool ok = ix86_expand_int_vcond (operands); |
11043 | gcc_assert (ok); | |
11044 | DONE; | |
76405cce | 11045 | }) |
11046 | ||
d6b19f6b | 11047 | (define_expand "vcond<VI8F_128:mode>v2di" |
abd4f58b | 11048 | [(set (match_operand:VI8F_128 0 "register_operand") |
d6b19f6b | 11049 | (if_then_else:VI8F_128 |
5deb404d | 11050 | (match_operator 3 "" |
abd4f58b | 11051 | [(match_operand:V2DI 4 "nonimmediate_operand") |
11052 | (match_operand:V2DI 5 "general_operand")]) | |
11053 | (match_operand:VI8F_128 1) | |
11054 | (match_operand:VI8F_128 2)))] | |
d8f82f6b | 11055 | "TARGET_SSE4_2" |
11056 | { | |
11057 | bool ok = ix86_expand_int_vcond (operands); | |
11058 | gcc_assert (ok); | |
11059 | DONE; | |
11060 | }) | |
11061 | ||
f23a3158 | 11062 | (define_expand "vcondu<V_512:mode><VI_512:mode>" |
11063 | [(set (match_operand:V_512 0 "register_operand") | |
11064 | (if_then_else:V_512 | |
11065 | (match_operator 3 "" | |
11066 | [(match_operand:VI_512 4 "nonimmediate_operand") | |
11067 | (match_operand:VI_512 5 "nonimmediate_operand")]) | |
11068 | (match_operand:V_512 1 "general_operand") | |
11069 | (match_operand:V_512 2 "general_operand")))] | |
11070 | "TARGET_AVX512F | |
11071 | && (GET_MODE_NUNITS (<V_512:MODE>mode) | |
11072 | == GET_MODE_NUNITS (<VI_512:MODE>mode))" | |
11073 | { | |
11074 | bool ok = ix86_expand_int_vcond (operands); | |
11075 | gcc_assert (ok); | |
11076 | DONE; | |
11077 | }) | |
11078 | ||
230eb963 | 11079 | (define_expand "vcondu<V_256:mode><VI_256:mode>" |
abd4f58b | 11080 | [(set (match_operand:V_256 0 "register_operand") |
230eb963 | 11081 | (if_then_else:V_256 |
11082 | (match_operator 3 "" | |
abd4f58b | 11083 | [(match_operand:VI_256 4 "nonimmediate_operand") |
11084 | (match_operand:VI_256 5 "nonimmediate_operand")]) | |
11085 | (match_operand:V_256 1 "general_operand") | |
11086 | (match_operand:V_256 2 "general_operand")))] | |
230eb963 | 11087 | "TARGET_AVX2 |
11088 | && (GET_MODE_NUNITS (<V_256:MODE>mode) | |
11089 | == GET_MODE_NUNITS (<VI_256:MODE>mode))" | |
11090 | { | |
11091 | bool ok = ix86_expand_int_vcond (operands); | |
11092 | gcc_assert (ok); | |
11093 | DONE; | |
11094 | }) | |
11095 | ||
d6b19f6b | 11096 | (define_expand "vcondu<V_128:mode><VI124_128:mode>" |
abd4f58b | 11097 | [(set (match_operand:V_128 0 "register_operand") |
d6b19f6b | 11098 | (if_then_else:V_128 |
5deb404d | 11099 | (match_operator 3 "" |
abd4f58b | 11100 | [(match_operand:VI124_128 4 "nonimmediate_operand") |
11101 | (match_operand:VI124_128 5 "nonimmediate_operand")]) | |
11102 | (match_operand:V_128 1 "general_operand") | |
11103 | (match_operand:V_128 2 "general_operand")))] | |
d6b19f6b | 11104 | "TARGET_SSE2 |
11105 | && (GET_MODE_NUNITS (<V_128:MODE>mode) | |
11106 | == GET_MODE_NUNITS (<VI124_128:MODE>mode))" | |
76405cce | 11107 | { |
17e313b0 | 11108 | bool ok = ix86_expand_int_vcond (operands); |
11109 | gcc_assert (ok); | |
11110 | DONE; | |
76405cce | 11111 | }) |
11112 | ||
d6b19f6b | 11113 | (define_expand "vcondu<VI8F_128:mode>v2di" |
abd4f58b | 11114 | [(set (match_operand:VI8F_128 0 "register_operand") |
d6b19f6b | 11115 | (if_then_else:VI8F_128 |
5deb404d | 11116 | (match_operator 3 "" |
abd4f58b | 11117 | [(match_operand:V2DI 4 "nonimmediate_operand") |
11118 | (match_operand:V2DI 5 "nonimmediate_operand")]) | |
11119 | (match_operand:VI8F_128 1 "general_operand") | |
11120 | (match_operand:VI8F_128 2 "general_operand")))] | |
d8f82f6b | 11121 | "TARGET_SSE4_2" |
11122 | { | |
11123 | bool ok = ix86_expand_int_vcond (operands); | |
11124 | gcc_assert (ok); | |
11125 | DONE; | |
11126 | }) | |
11127 | ||
f4803722 | 11128 | (define_mode_iterator VEC_PERM_AVX2 |
12cbfa26 | 11129 | [V16QI V8HI V4SI V2DI V4SF V2DF |
a9e4de7b | 11130 | (V32QI "TARGET_AVX2") (V16HI "TARGET_AVX2") |
12cbfa26 | 11131 | (V8SI "TARGET_AVX2") (V4DI "TARGET_AVX2") |
697a43f8 | 11132 | (V8SF "TARGET_AVX2") (V4DF "TARGET_AVX2") |
11133 | (V16SF "TARGET_AVX512F") (V8DF "TARGET_AVX512F") | |
201f262d | 11134 | (V16SI "TARGET_AVX512F") (V8DI "TARGET_AVX512F") |
447dd191 | 11135 | (V32HI "TARGET_AVX512BW") (V64QI "TARGET_AVX512VBMI")]) |
12cbfa26 | 11136 | |
f4803722 | 11137 | (define_expand "vec_perm<mode>" |
abd4f58b | 11138 | [(match_operand:VEC_PERM_AVX2 0 "register_operand") |
11139 | (match_operand:VEC_PERM_AVX2 1 "register_operand") | |
11140 | (match_operand:VEC_PERM_AVX2 2 "register_operand") | |
11141 | (match_operand:<sseintvecmode> 3 "register_operand")] | |
12cbfa26 | 11142 | "TARGET_SSSE3 || TARGET_AVX || TARGET_XOP" |
6cf89e04 | 11143 | { |
f4803722 | 11144 | ix86_expand_vec_perm (operands); |
6cf89e04 | 11145 | DONE; |
11146 | }) | |
11147 | ||
6ae3cabe | 11148 | (define_mode_iterator VEC_PERM_CONST |
11149 | [(V4SF "TARGET_SSE") (V4SI "TARGET_SSE") | |
11150 | (V2DF "TARGET_SSE") (V2DI "TARGET_SSE") | |
11151 | (V16QI "TARGET_SSE2") (V8HI "TARGET_SSE2") | |
11152 | (V8SF "TARGET_AVX") (V4DF "TARGET_AVX") | |
11153 | (V8SI "TARGET_AVX") (V4DI "TARGET_AVX") | |
697a43f8 | 11154 | (V32QI "TARGET_AVX2") (V16HI "TARGET_AVX2") |
11155 | (V16SI "TARGET_AVX512F") (V8DI "TARGET_AVX512F") | |
201f262d | 11156 | (V16SF "TARGET_AVX512F") (V8DF "TARGET_AVX512F") |
271c02e8 | 11157 | (V32HI "TARGET_AVX512BW") (V64QI "TARGET_AVX512BW")]) |
6ae3cabe | 11158 | |
11159 | (define_expand "vec_perm_const<mode>" | |
abd4f58b | 11160 | [(match_operand:VEC_PERM_CONST 0 "register_operand") |
11161 | (match_operand:VEC_PERM_CONST 1 "register_operand") | |
11162 | (match_operand:VEC_PERM_CONST 2 "register_operand") | |
11163 | (match_operand:<sseintvecmode> 3)] | |
6ae3cabe | 11164 | "" |
11165 | { | |
11166 | if (ix86_expand_vec_perm_const (operands)) | |
11167 | DONE; | |
11168 | else | |
11169 | FAIL; | |
11170 | }) | |
11171 | ||
5802c0cb | 11172 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; |
11173 | ;; | |
349d9d0e | 11174 | ;; Parallel bitwise logical operations |
5802c0cb | 11175 | ;; |
11176 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | |
11177 | ||
11178 | (define_expand "one_cmpl<mode>2" | |
abd4f58b | 11179 | [(set (match_operand:VI 0 "register_operand") |
11180 | (xor:VI (match_operand:VI 1 "nonimmediate_operand") | |
ba2558f8 | 11181 | (match_dup 2)))] |
11182 | "TARGET_SSE" | |
5802c0cb | 11183 | { |
11184 | int i, n = GET_MODE_NUNITS (<MODE>mode); | |
11185 | rtvec v = rtvec_alloc (n); | |
11186 | ||
11187 | for (i = 0; i < n; ++i) | |
11188 | RTVEC_ELT (v, i) = constm1_rtx; | |
11189 | ||
11190 | operands[2] = force_reg (<MODE>mode, gen_rtx_CONST_VECTOR (<MODE>mode, v)); | |
11191 | }) | |
11192 | ||
12803fe0 | 11193 | (define_expand "<sse2_avx2>_andnot<mode>3" |
abd4f58b | 11194 | [(set (match_operand:VI_AVX2 0 "register_operand") |
c4530783 | 11195 | (and:VI_AVX2 |
abd4f58b | 11196 | (not:VI_AVX2 (match_operand:VI_AVX2 1 "register_operand")) |
11197 | (match_operand:VI_AVX2 2 "nonimmediate_operand")))] | |
12803fe0 | 11198 | "TARGET_SSE2") |
3a950715 | 11199 | |
12803fe0 | 11200 | (define_expand "<sse2_avx2>_andnot<mode>3_mask" |
11201 | [(set (match_operand:VI48_AVX512VL 0 "register_operand") | |
11202 | (vec_merge:VI48_AVX512VL | |
11203 | (and:VI48_AVX512VL | |
11204 | (not:VI48_AVX512VL | |
11205 | (match_operand:VI48_AVX512VL 1 "register_operand")) | |
11206 | (match_operand:VI48_AVX512VL 2 "nonimmediate_operand")) | |
11207 | (match_operand:VI48_AVX512VL 3 "vector_move_operand") | |
11208 | (match_operand:<avx512fmaskmode> 4 "register_operand")))] | |
11209 | "TARGET_AVX512F") | |
11210 | ||
11211 | (define_expand "<sse2_avx2>_andnot<mode>3_mask" | |
11212 | [(set (match_operand:VI12_AVX512VL 0 "register_operand") | |
11213 | (vec_merge:VI12_AVX512VL | |
11214 | (and:VI12_AVX512VL | |
11215 | (not:VI12_AVX512VL | |
11216 | (match_operand:VI12_AVX512VL 1 "register_operand")) | |
11217 | (match_operand:VI12_AVX512VL 2 "nonimmediate_operand")) | |
11218 | (match_operand:VI12_AVX512VL 3 "vector_move_operand") | |
11219 | (match_operand:<avx512fmaskmode> 4 "register_operand")))] | |
11220 | "TARGET_AVX512BW") | |
11221 | ||
11222 | (define_insn "*andnot<mode>3" | |
e13e1b39 | 11223 | [(set (match_operand:VI 0 "register_operand" "=x,v") |
ba2558f8 | 11224 | (and:VI |
e13e1b39 | 11225 | (not:VI (match_operand:VI 1 "register_operand" "0,v")) |
11226 | (match_operand:VI 2 "nonimmediate_operand" "xm,vm")))] | |
12803fe0 | 11227 | "TARGET_SSE" |
ba2558f8 | 11228 | { |
03ae25dc | 11229 | static char buf[64]; |
ba2558f8 | 11230 | const char *ops; |
c4530783 | 11231 | const char *tmp; |
11232 | ||
11233 | switch (get_attr_mode (insn)) | |
11234 | { | |
03ae25dc | 11235 | case MODE_XI: |
11236 | gcc_assert (TARGET_AVX512F); | |
c4530783 | 11237 | case MODE_OI: |
0607f34b | 11238 | gcc_assert (TARGET_AVX2 || TARGET_AVX512VL); |
c4530783 | 11239 | case MODE_TI: |
0607f34b | 11240 | gcc_assert (TARGET_SSE2 || TARGET_AVX512VL); |
11241 | switch (<MODE>mode) | |
11242 | { | |
11243 | case V16SImode: | |
11244 | case V8DImode: | |
11245 | if (TARGET_AVX512F) | |
11246 | { | |
11247 | tmp = "pandn<ssemodesuffix>"; | |
11248 | break; | |
11249 | } | |
11250 | case V8SImode: | |
11251 | case V4DImode: | |
11252 | case V4SImode: | |
11253 | case V2DImode: | |
11254 | if (TARGET_AVX512VL) | |
11255 | { | |
11256 | tmp = "pandn<ssemodesuffix>"; | |
11257 | break; | |
11258 | } | |
11259 | default: | |
11260 | tmp = TARGET_AVX512VL ? "pandnq" : "pandn"; | |
11261 | } | |
c4530783 | 11262 | break; |
11263 | ||
f5d830da | 11264 | case MODE_V16SF: |
11265 | gcc_assert (TARGET_AVX512F); | |
c4530783 | 11266 | case MODE_V8SF: |
11267 | gcc_assert (TARGET_AVX); | |
11268 | case MODE_V4SF: | |
11269 | gcc_assert (TARGET_SSE); | |
11270 | ||
11271 | tmp = "andnps"; | |
11272 | break; | |
11273 | ||
11274 | default: | |
11275 | gcc_unreachable (); | |
11276 | } | |
ed30e0a6 | 11277 | |
ba2558f8 | 11278 | switch (which_alternative) |
11279 | { | |
11280 | case 0: | |
11281 | ops = "%s\t{%%2, %%0|%%0, %%2}"; | |
11282 | break; | |
11283 | case 1: | |
5220cab6 | 11284 | ops = "v%s\t{%%2, %%1, %%0<mask_operand3_1>|%%0<mask_operand3_1>, %%1, %%2}"; |
ba2558f8 | 11285 | break; |
11286 | default: | |
11287 | gcc_unreachable (); | |
11288 | } | |
5802c0cb | 11289 | |
ba2558f8 | 11290 | snprintf (buf, sizeof (buf), ops, tmp); |
11291 | return buf; | |
11292 | } | |
11293 | [(set_attr "isa" "noavx,avx") | |
11294 | (set_attr "type" "sselog") | |
11295 | (set (attr "prefix_data16") | |
11296 | (if_then_else | |
11297 | (and (eq_attr "alternative" "0") | |
11298 | (eq_attr "mode" "TI")) | |
11299 | (const_string "1") | |
11300 | (const_string "*"))) | |
12803fe0 | 11301 | (set_attr "prefix" "orig,vex") |
ba2558f8 | 11302 | (set (attr "mode") |
7d460314 | 11303 | (cond [(and (match_test "<MODE_SIZE> == 16") |
11304 | (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")) | |
b1756286 | 11305 | (const_string "<ssePSmode>") |
11306 | (match_test "TARGET_AVX2") | |
11307 | (const_string "<sseinsnmode>") | |
11308 | (match_test "TARGET_AVX") | |
11309 | (if_then_else | |
ca94bc0d | 11310 | (match_test "<MODE_SIZE> > 16") |
b1756286 | 11311 | (const_string "V8SF") |
11312 | (const_string "<sseinsnmode>")) | |
11313 | (ior (not (match_test "TARGET_SSE2")) | |
11314 | (match_test "optimize_function_for_size_p (cfun)")) | |
11315 | (const_string "V4SF") | |
11316 | ] | |
11317 | (const_string "<sseinsnmode>")))]) | |
349d9d0e | 11318 | |
12803fe0 | 11319 | (define_insn "*andnot<mode>3_mask" |
11320 | [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v") | |
11321 | (vec_merge:VI48_AVX512VL | |
11322 | (and:VI48_AVX512VL | |
11323 | (not:VI48_AVX512VL | |
11324 | (match_operand:VI48_AVX512VL 1 "register_operand" "v")) | |
11325 | (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "vm")) | |
11326 | (match_operand:VI48_AVX512VL 3 "vector_move_operand" "0C") | |
11327 | (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))] | |
11328 | "TARGET_AVX512F" | |
11329 | "vpandn<ssemodesuffix>\t{%2, %1, %0%{%4%}%N3|%0%{%4%}%N3, %1, %2}"; | |
11330 | [(set_attr "type" "sselog") | |
11331 | (set_attr "prefix" "evex") | |
11332 | (set_attr "mode" "<sseinsnmode>")]) | |
11333 | ||
11334 | (define_insn "*andnot<mode>3_mask" | |
11335 | [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v") | |
11336 | (vec_merge:VI12_AVX512VL | |
11337 | (and:VI12_AVX512VL | |
11338 | (not:VI12_AVX512VL | |
11339 | (match_operand:VI12_AVX512VL 1 "register_operand" "v")) | |
11340 | (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm")) | |
11341 | (match_operand:VI12_AVX512VL 3 "vector_move_operand" "0C") | |
11342 | (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))] | |
11343 | "TARGET_AVX512BW" | |
11344 | "vpandn<ssemodesuffix>\t{%2, %1, %0%{%4%}%N3|%0%{%4%}%N3, %1, %2}"; | |
11345 | [(set_attr "type" "sselog") | |
11346 | (set_attr "prefix" "evex") | |
11347 | (set_attr "mode" "<sseinsnmode>")]) | |
11348 | ||
b6bc2701 | 11349 | (define_expand "<code><mode>3" |
abd4f58b | 11350 | [(set (match_operand:VI 0 "register_operand") |
ba2558f8 | 11351 | (any_logic:VI |
3a623316 | 11352 | (match_operand:VI 1 "nonimmediate_or_const_vector_operand") |
11353 | (match_operand:VI 2 "nonimmediate_or_const_vector_operand")))] | |
3a950715 | 11354 | "TARGET_SSE" |
3a623316 | 11355 | { |
11356 | ix86_expand_vector_logical_operator (<CODE>, <MODE>mode, operands); | |
11357 | DONE; | |
11358 | }) | |
5802c0cb | 11359 | |
5220cab6 | 11360 | (define_insn "<mask_codefor><code><mode>3<mask_name>" |
e9b578bf | 11361 | [(set (match_operand:VI48_AVX_AVX512F 0 "register_operand" "=x,v") |
11362 | (any_logic:VI48_AVX_AVX512F | |
11363 | (match_operand:VI48_AVX_AVX512F 1 "nonimmediate_operand" "%0,v") | |
11364 | (match_operand:VI48_AVX_AVX512F 2 "nonimmediate_operand" "xm,vm")))] | |
5220cab6 | 11365 | "TARGET_SSE && <mask_mode512bit_condition> |
b6bc2701 | 11366 | && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)" |
ba2558f8 | 11367 | { |
03ae25dc | 11368 | static char buf[64]; |
ba2558f8 | 11369 | const char *ops; |
c4530783 | 11370 | const char *tmp; |
11371 | ||
11372 | switch (get_attr_mode (insn)) | |
11373 | { | |
03ae25dc | 11374 | case MODE_XI: |
11375 | gcc_assert (TARGET_AVX512F); | |
c4530783 | 11376 | case MODE_OI: |
0607f34b | 11377 | gcc_assert (TARGET_AVX2 || TARGET_AVX512VL); |
c4530783 | 11378 | case MODE_TI: |
0607f34b | 11379 | gcc_assert (TARGET_SSE2 || TARGET_AVX512VL); |
11380 | switch (<MODE>mode) | |
11381 | { | |
11382 | case V16SImode: | |
11383 | case V8DImode: | |
11384 | if (TARGET_AVX512F) | |
11385 | { | |
11386 | tmp = "p<logic><ssemodesuffix>"; | |
11387 | break; | |
11388 | } | |
11389 | case V8SImode: | |
11390 | case V4DImode: | |
11391 | case V4SImode: | |
11392 | case V2DImode: | |
e9b578bf | 11393 | tmp = TARGET_AVX512VL ? "p<logic><ssemodesuffix>" : "p<logic>"; |
11394 | break; | |
11395 | default: | |
11396 | gcc_unreachable (); | |
11397 | } | |
11398 | break; | |
11399 | ||
11400 | case MODE_V8SF: | |
11401 | gcc_assert (TARGET_AVX); | |
11402 | case MODE_V4SF: | |
11403 | gcc_assert (TARGET_SSE); | |
11404 | gcc_assert (!<mask_applied>); | |
11405 | tmp = "<logic>ps"; | |
11406 | break; | |
11407 | ||
11408 | default: | |
11409 | gcc_unreachable (); | |
11410 | } | |
11411 | ||
11412 | switch (which_alternative) | |
11413 | { | |
11414 | case 0: | |
11415 | if (<mask_applied>) | |
11416 | ops = "v%s\t{%%2, %%0, %%0<mask_operand3_1>|%%0<mask_operand3_1>, %%0, %%2}"; | |
11417 | else | |
11418 | ops = "%s\t{%%2, %%0|%%0, %%2}"; | |
11419 | break; | |
11420 | case 1: | |
11421 | ops = "v%s\t{%%2, %%1, %%0<mask_operand3_1>|%%0<mask_operand3_1>, %%1, %%2}"; | |
11422 | break; | |
11423 | default: | |
11424 | gcc_unreachable (); | |
11425 | } | |
11426 | ||
11427 | snprintf (buf, sizeof (buf), ops, tmp); | |
11428 | return buf; | |
11429 | } | |
11430 | [(set_attr "isa" "noavx,avx") | |
11431 | (set_attr "type" "sselog") | |
11432 | (set (attr "prefix_data16") | |
11433 | (if_then_else | |
11434 | (and (eq_attr "alternative" "0") | |
11435 | (eq_attr "mode" "TI")) | |
11436 | (const_string "1") | |
11437 | (const_string "*"))) | |
11438 | (set_attr "prefix" "<mask_prefix3>") | |
11439 | (set (attr "mode") | |
11440 | (cond [(and (match_test "<MODE_SIZE> == 16") | |
11441 | (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")) | |
11442 | (const_string "<ssePSmode>") | |
11443 | (match_test "TARGET_AVX2") | |
11444 | (const_string "<sseinsnmode>") | |
11445 | (match_test "TARGET_AVX") | |
11446 | (if_then_else | |
11447 | (match_test "<MODE_SIZE> > 16") | |
11448 | (const_string "V8SF") | |
11449 | (const_string "<sseinsnmode>")) | |
11450 | (ior (not (match_test "TARGET_SSE2")) | |
11451 | (match_test "optimize_function_for_size_p (cfun)")) | |
11452 | (const_string "V4SF") | |
11453 | ] | |
11454 | (const_string "<sseinsnmode>")))]) | |
11455 | ||
11456 | (define_insn "*<code><mode>3" | |
11457 | [(set (match_operand:VI12_AVX_AVX512F 0 "register_operand" "=x,v") | |
11458 | (any_logic: VI12_AVX_AVX512F | |
11459 | (match_operand:VI12_AVX_AVX512F 1 "nonimmediate_operand" "%0,v") | |
11460 | (match_operand:VI12_AVX_AVX512F 2 "nonimmediate_operand" "xm,vm")))] | |
11461 | "TARGET_SSE && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)" | |
11462 | { | |
11463 | static char buf[64]; | |
11464 | const char *ops; | |
11465 | const char *tmp; | |
11466 | const char *ssesuffix; | |
11467 | ||
11468 | switch (get_attr_mode (insn)) | |
11469 | { | |
11470 | case MODE_XI: | |
11471 | gcc_assert (TARGET_AVX512F); | |
11472 | case MODE_OI: | |
11473 | gcc_assert (TARGET_AVX2 || TARGET_AVX512VL); | |
11474 | case MODE_TI: | |
11475 | gcc_assert (TARGET_SSE2 || TARGET_AVX512VL); | |
11476 | switch (<MODE>mode) | |
11477 | { | |
11478 | case V64QImode: | |
11479 | case V32HImode: | |
11480 | if (TARGET_AVX512F) | |
0607f34b | 11481 | { |
e9b578bf | 11482 | tmp = "p<logic>"; |
11483 | ssesuffix = "q"; | |
11484 | break; | |
11485 | } | |
11486 | case V32QImode: | |
11487 | case V16HImode: | |
11488 | case V16QImode: | |
11489 | case V8HImode: | |
11490 | if (TARGET_AVX512VL || TARGET_AVX2 || TARGET_SSE2) | |
11491 | { | |
11492 | tmp = "p<logic>"; | |
11493 | ssesuffix = TARGET_AVX512VL ? "q" : ""; | |
0607f34b | 11494 | break; |
11495 | } | |
11496 | default: | |
e9b578bf | 11497 | gcc_unreachable (); |
0607f34b | 11498 | } |
c4530783 | 11499 | break; |
11500 | ||
11501 | case MODE_V8SF: | |
11502 | gcc_assert (TARGET_AVX); | |
11503 | case MODE_V4SF: | |
11504 | gcc_assert (TARGET_SSE); | |
c4530783 | 11505 | tmp = "<logic>ps"; |
e9b578bf | 11506 | ssesuffix = ""; |
c4530783 | 11507 | break; |
11508 | ||
11509 | default: | |
11510 | gcc_unreachable (); | |
11511 | } | |
3a950715 | 11512 | |
ba2558f8 | 11513 | switch (which_alternative) |
11514 | { | |
11515 | case 0: | |
11516 | ops = "%s\t{%%2, %%0|%%0, %%2}"; | |
e9b578bf | 11517 | snprintf (buf, sizeof (buf), ops, tmp); |
ba2558f8 | 11518 | break; |
11519 | case 1: | |
e9b578bf | 11520 | ops = "v%s%s\t{%%2, %%1, %%0|%%0, %%1, %%2}"; |
11521 | snprintf (buf, sizeof (buf), ops, tmp, ssesuffix); | |
ba2558f8 | 11522 | break; |
11523 | default: | |
11524 | gcc_unreachable (); | |
11525 | } | |
ed30e0a6 | 11526 | |
ba2558f8 | 11527 | return buf; |
11528 | } | |
11529 | [(set_attr "isa" "noavx,avx") | |
11530 | (set_attr "type" "sselog") | |
11531 | (set (attr "prefix_data16") | |
11532 | (if_then_else | |
11533 | (and (eq_attr "alternative" "0") | |
11534 | (eq_attr "mode" "TI")) | |
11535 | (const_string "1") | |
11536 | (const_string "*"))) | |
5220cab6 | 11537 | (set_attr "prefix" "<mask_prefix3>") |
ba2558f8 | 11538 | (set (attr "mode") |
7d460314 | 11539 | (cond [(and (match_test "<MODE_SIZE> == 16") |
11540 | (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")) | |
b1756286 | 11541 | (const_string "<ssePSmode>") |
11542 | (match_test "TARGET_AVX2") | |
11543 | (const_string "<sseinsnmode>") | |
11544 | (match_test "TARGET_AVX") | |
11545 | (if_then_else | |
ca94bc0d | 11546 | (match_test "<MODE_SIZE> > 16") |
b1756286 | 11547 | (const_string "V8SF") |
11548 | (const_string "<sseinsnmode>")) | |
11549 | (ior (not (match_test "TARGET_SSE2")) | |
11550 | (match_test "optimize_function_for_size_p (cfun)")) | |
11551 | (const_string "V4SF") | |
11552 | ] | |
11553 | (const_string "<sseinsnmode>")))]) | |
ba2558f8 | 11554 | |
6b76cef2 | 11555 | (define_insn "<avx512>_testm<mode>3<mask_scalar_merge_name>" |
a31e7f46 | 11556 | [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk") |
d2ff59d6 | 11557 | (unspec:<avx512fmaskmode> |
6b76cef2 | 11558 | [(match_operand:VI12_AVX512VL 1 "register_operand" "v") |
11559 | (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm")] | |
11560 | UNSPEC_TESTM))] | |
11561 | "TARGET_AVX512BW" | |
11562 | "vptestm<ssemodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}" | |
11563 | [(set_attr "prefix" "evex") | |
11564 | (set_attr "mode" "<sseinsnmode>")]) | |
11565 | ||
11566 | (define_insn "<avx512>_testm<mode>3<mask_scalar_merge_name>" | |
11567 | [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk") | |
11568 | (unspec:<avx512fmaskmode> | |
11569 | [(match_operand:VI48_AVX512VL 1 "register_operand" "v") | |
11570 | (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "vm")] | |
d2ff59d6 | 11571 | UNSPEC_TESTM))] |
11572 | "TARGET_AVX512F" | |
c3d9b089 | 11573 | "vptestm<ssemodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}" |
d2ff59d6 | 11574 | [(set_attr "prefix" "evex") |
11575 | (set_attr "mode" "<sseinsnmode>")]) | |
11576 | ||
6b76cef2 | 11577 | (define_insn "<avx512>_testnm<mode>3<mask_scalar_merge_name>" |
11578 | [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk") | |
11579 | (unspec:<avx512fmaskmode> | |
11580 | [(match_operand:VI12_AVX512VL 1 "register_operand" "v") | |
11581 | (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm")] | |
11582 | UNSPEC_TESTNM))] | |
11583 | "TARGET_AVX512BW" | |
11584 | "vptestnm<ssemodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}" | |
11585 | [(set_attr "prefix" "evex") | |
11586 | (set_attr "mode" "<sseinsnmode>")]) | |
11587 | ||
11588 | (define_insn "<avx512>_testnm<mode>3<mask_scalar_merge_name>" | |
a31e7f46 | 11589 | [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk") |
d2ff59d6 | 11590 | (unspec:<avx512fmaskmode> |
6b76cef2 | 11591 | [(match_operand:VI48_AVX512VL 1 "register_operand" "v") |
11592 | (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "vm")] | |
d2ff59d6 | 11593 | UNSPEC_TESTNM))] |
f46a34a6 | 11594 | "TARGET_AVX512F" |
11595 | "vptestnm<ssemodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}" | |
d2ff59d6 | 11596 | [(set_attr "prefix" "evex") |
11597 | (set_attr "mode" "<sseinsnmode>")]) | |
11598 | ||
5802c0cb | 11599 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; |
11600 | ;; | |
11601 | ;; Parallel integral element swizzling | |
11602 | ;; | |
11603 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | |
11604 | ||
b6fc7168 | 11605 | (define_expand "vec_pack_trunc_<mode>" |
abd4f58b | 11606 | [(match_operand:<ssepackmode> 0 "register_operand") |
8f83f53e | 11607 | (match_operand:VI248_AVX2_8_AVX512F_24_AVX512BW 1 "register_operand") |
11608 | (match_operand:VI248_AVX2_8_AVX512F_24_AVX512BW 2 "register_operand")] | |
c6c91d61 | 11609 | "TARGET_SSE2" |
11610 | { | |
b6fc7168 | 11611 | rtx op1 = gen_lowpart (<ssepackmode>mode, operands[1]); |
11612 | rtx op2 = gen_lowpart (<ssepackmode>mode, operands[2]); | |
e2b81403 | 11613 | ix86_expand_vec_extract_even_odd (operands[0], op1, op2, 0); |
c6c91d61 | 11614 | DONE; |
11615 | }) | |
11616 | ||
0852690b | 11617 | (define_expand "vec_pack_trunc_qi" |
11618 | [(set (match_operand:HI 0 ("register_operand")) | |
11619 | (ior:HI (ashift:HI (zero_extend:HI (match_operand:QI 1 ("register_operand"))) | |
11620 | (const_int 8)) | |
11621 | (zero_extend:HI (match_operand:QI 2 ("register_operand")))))] | |
11622 | "TARGET_AVX512F") | |
11623 | ||
11624 | (define_expand "vec_pack_trunc_<mode>" | |
11625 | [(set (match_operand:<DOUBLEMASKMODE> 0 ("register_operand")) | |
11626 | (ior:<DOUBLEMASKMODE> (ashift:<DOUBLEMASKMODE> (zero_extend:<DOUBLEMASKMODE> (match_operand:SWI24 1 ("register_operand"))) | |
11627 | (match_dup 3)) | |
11628 | (zero_extend:<DOUBLEMASKMODE> (match_operand:SWI24 2 ("register_operand")))))] | |
11629 | "TARGET_AVX512BW" | |
11630 | { | |
11631 | operands[3] = GEN_INT (GET_MODE_BITSIZE (<MODE>mode)); | |
11632 | }) | |
11633 | ||
d8d386d2 | 11634 | (define_insn "<sse2_avx2>_packsswb<mask_name>" |
201f262d | 11635 | [(set (match_operand:VI1_AVX512 0 "register_operand" "=x,x") |
11636 | (vec_concat:VI1_AVX512 | |
5deb404d | 11637 | (ss_truncate:<ssehalfvecmode> |
d8d386d2 | 11638 | (match_operand:<sseunpackmode> 1 "register_operand" "0,v")) |
5deb404d | 11639 | (ss_truncate:<ssehalfvecmode> |
d8d386d2 | 11640 | (match_operand:<sseunpackmode> 2 "nonimmediate_operand" "xm,vm"))))] |
11641 | "TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>" | |
b11a97b3 | 11642 | "@ |
11643 | packsswb\t{%2, %0|%0, %2} | |
d8d386d2 | 11644 | vpacksswb\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}" |
b11a97b3 | 11645 | [(set_attr "isa" "noavx,avx") |
11646 | (set_attr "type" "sselog") | |
11647 | (set_attr "prefix_data16" "1,*") | |
d8d386d2 | 11648 | (set_attr "prefix" "orig,maybe_evex") |
5deb404d | 11649 | (set_attr "mode" "<sseinsnmode>")]) |
ed30e0a6 | 11650 | |
2d71b728 | 11651 | (define_insn "<sse2_avx2>_packssdw<mask_name>" |
11652 | [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,v") | |
5deb404d | 11653 | (vec_concat:VI2_AVX2 |
11654 | (ss_truncate:<ssehalfvecmode> | |
2d71b728 | 11655 | (match_operand:<sseunpackmode> 1 "register_operand" "0,v")) |
5deb404d | 11656 | (ss_truncate:<ssehalfvecmode> |
2d71b728 | 11657 | (match_operand:<sseunpackmode> 2 "nonimmediate_operand" "xm,vm"))))] |
11658 | "TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>" | |
b11a97b3 | 11659 | "@ |
11660 | packssdw\t{%2, %0|%0, %2} | |
2d71b728 | 11661 | vpackssdw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}" |
b11a97b3 | 11662 | [(set_attr "isa" "noavx,avx") |
11663 | (set_attr "type" "sselog") | |
11664 | (set_attr "prefix_data16" "1,*") | |
11665 | (set_attr "prefix" "orig,vex") | |
5deb404d | 11666 | (set_attr "mode" "<sseinsnmode>")]) |
ed30e0a6 | 11667 | |
d8d386d2 | 11668 | (define_insn "<sse2_avx2>_packuswb<mask_name>" |
201f262d | 11669 | [(set (match_operand:VI1_AVX512 0 "register_operand" "=x,x") |
11670 | (vec_concat:VI1_AVX512 | |
5deb404d | 11671 | (us_truncate:<ssehalfvecmode> |
d8d386d2 | 11672 | (match_operand:<sseunpackmode> 1 "register_operand" "0,v")) |
5deb404d | 11673 | (us_truncate:<ssehalfvecmode> |
d8d386d2 | 11674 | (match_operand:<sseunpackmode> 2 "nonimmediate_operand" "xm,vm"))))] |
11675 | "TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>" | |
b11a97b3 | 11676 | "@ |
11677 | packuswb\t{%2, %0|%0, %2} | |
d8d386d2 | 11678 | vpackuswb\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}" |
b11a97b3 | 11679 | [(set_attr "isa" "noavx,avx") |
11680 | (set_attr "type" "sselog") | |
11681 | (set_attr "prefix_data16" "1,*") | |
11682 | (set_attr "prefix" "orig,vex") | |
5deb404d | 11683 | (set_attr "mode" "<sseinsnmode>")]) |
ed30e0a6 | 11684 | |
8c409b91 | 11685 | (define_insn "avx512bw_interleave_highv64qi<mask_name>" |
11686 | [(set (match_operand:V64QI 0 "register_operand" "=v") | |
11687 | (vec_select:V64QI | |
11688 | (vec_concat:V128QI | |
11689 | (match_operand:V64QI 1 "register_operand" "v") | |
11690 | (match_operand:V64QI 2 "nonimmediate_operand" "vm")) | |
11691 | (parallel [(const_int 8) (const_int 72) | |
11692 | (const_int 9) (const_int 73) | |
11693 | (const_int 10) (const_int 74) | |
11694 | (const_int 11) (const_int 75) | |
11695 | (const_int 12) (const_int 76) | |
11696 | (const_int 13) (const_int 77) | |
11697 | (const_int 14) (const_int 78) | |
11698 | (const_int 15) (const_int 79) | |
11699 | (const_int 24) (const_int 88) | |
11700 | (const_int 25) (const_int 89) | |
11701 | (const_int 26) (const_int 90) | |
11702 | (const_int 27) (const_int 91) | |
11703 | (const_int 28) (const_int 92) | |
11704 | (const_int 29) (const_int 93) | |
11705 | (const_int 30) (const_int 94) | |
11706 | (const_int 31) (const_int 95) | |
11707 | (const_int 40) (const_int 104) | |
11708 | (const_int 41) (const_int 105) | |
11709 | (const_int 42) (const_int 106) | |
11710 | (const_int 43) (const_int 107) | |
11711 | (const_int 44) (const_int 108) | |
11712 | (const_int 45) (const_int 109) | |
11713 | (const_int 46) (const_int 110) | |
11714 | (const_int 47) (const_int 111) | |
11715 | (const_int 56) (const_int 120) | |
11716 | (const_int 57) (const_int 121) | |
11717 | (const_int 58) (const_int 122) | |
11718 | (const_int 59) (const_int 123) | |
11719 | (const_int 60) (const_int 124) | |
11720 | (const_int 61) (const_int 125) | |
11721 | (const_int 62) (const_int 126) | |
11722 | (const_int 63) (const_int 127)])))] | |
11723 | "TARGET_AVX512BW" | |
11724 | "vpunpckhbw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}" | |
11725 | [(set_attr "type" "sselog") | |
11726 | (set_attr "prefix" "evex") | |
11727 | (set_attr "mode" "XI")]) | |
11728 | ||
11729 | (define_insn "avx2_interleave_highv32qi<mask_name>" | |
11730 | [(set (match_operand:V32QI 0 "register_operand" "=v") | |
5deb404d | 11731 | (vec_select:V32QI |
11732 | (vec_concat:V64QI | |
8c409b91 | 11733 | (match_operand:V32QI 1 "register_operand" "v") |
11734 | (match_operand:V32QI 2 "nonimmediate_operand" "vm")) | |
5deb404d | 11735 | (parallel [(const_int 8) (const_int 40) |
11736 | (const_int 9) (const_int 41) | |
11737 | (const_int 10) (const_int 42) | |
11738 | (const_int 11) (const_int 43) | |
11739 | (const_int 12) (const_int 44) | |
11740 | (const_int 13) (const_int 45) | |
11741 | (const_int 14) (const_int 46) | |
11742 | (const_int 15) (const_int 47) | |
11743 | (const_int 24) (const_int 56) | |
11744 | (const_int 25) (const_int 57) | |
11745 | (const_int 26) (const_int 58) | |
11746 | (const_int 27) (const_int 59) | |
11747 | (const_int 28) (const_int 60) | |
11748 | (const_int 29) (const_int 61) | |
11749 | (const_int 30) (const_int 62) | |
a9e4de7b | 11750 | (const_int 31) (const_int 63)])))] |
8c409b91 | 11751 | "TARGET_AVX2 && <mask_avx512vl_condition>" |
11752 | "vpunpckhbw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}" | |
5deb404d | 11753 | [(set_attr "type" "sselog") |
8c409b91 | 11754 | (set_attr "prefix" "<mask_prefix>") |
5deb404d | 11755 | (set_attr "mode" "OI")]) |
11756 | ||
8c409b91 | 11757 | (define_insn "vec_interleave_highv16qi<mask_name>" |
11758 | [(set (match_operand:V16QI 0 "register_operand" "=x,v") | |
5802c0cb | 11759 | (vec_select:V16QI |
11760 | (vec_concat:V32QI | |
8c409b91 | 11761 | (match_operand:V16QI 1 "register_operand" "0,v") |
11762 | (match_operand:V16QI 2 "nonimmediate_operand" "xm,vm")) | |
5802c0cb | 11763 | (parallel [(const_int 8) (const_int 24) |
11764 | (const_int 9) (const_int 25) | |
11765 | (const_int 10) (const_int 26) | |
11766 | (const_int 11) (const_int 27) | |
009b318f | 11767 | (const_int 12) (const_int 28) |
5802c0cb | 11768 | (const_int 13) (const_int 29) |
11769 | (const_int 14) (const_int 30) | |
11770 | (const_int 15) (const_int 31)])))] | |
8c409b91 | 11771 | "TARGET_SSE2 && <mask_avx512vl_condition>" |
b11a97b3 | 11772 | "@ |
11773 | punpckhbw\t{%2, %0|%0, %2} | |
8c409b91 | 11774 | vpunpckhbw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}" |
b11a97b3 | 11775 | [(set_attr "isa" "noavx,avx") |
11776 | (set_attr "type" "sselog") | |
11777 | (set_attr "prefix_data16" "1,*") | |
8c409b91 | 11778 | (set_attr "prefix" "orig,<mask_prefix>") |
ed30e0a6 | 11779 | (set_attr "mode" "TI")]) |
11780 | ||
8c409b91 | 11781 | (define_insn "avx512bw_interleave_lowv64qi<mask_name>" |
11782 | [(set (match_operand:V64QI 0 "register_operand" "=v") | |
11783 | (vec_select:V64QI | |
11784 | (vec_concat:V128QI | |
11785 | (match_operand:V64QI 1 "register_operand" "v") | |
11786 | (match_operand:V64QI 2 "nonimmediate_operand" "vm")) | |
11787 | (parallel [(const_int 0) (const_int 64) | |
11788 | (const_int 1) (const_int 65) | |
11789 | (const_int 2) (const_int 66) | |
11790 | (const_int 3) (const_int 67) | |
11791 | (const_int 4) (const_int 68) | |
11792 | (const_int 5) (const_int 69) | |
11793 | (const_int 6) (const_int 70) | |
11794 | (const_int 7) (const_int 71) | |
11795 | (const_int 16) (const_int 80) | |
11796 | (const_int 17) (const_int 81) | |
11797 | (const_int 18) (const_int 82) | |
11798 | (const_int 19) (const_int 83) | |
11799 | (const_int 20) (const_int 84) | |
11800 | (const_int 21) (const_int 85) | |
11801 | (const_int 22) (const_int 86) | |
11802 | (const_int 23) (const_int 87) | |
11803 | (const_int 32) (const_int 96) | |
11804 | (const_int 33) (const_int 97) | |
11805 | (const_int 34) (const_int 98) | |
11806 | (const_int 35) (const_int 99) | |
11807 | (const_int 36) (const_int 100) | |
11808 | (const_int 37) (const_int 101) | |
11809 | (const_int 38) (const_int 102) | |
11810 | (const_int 39) (const_int 103) | |
11811 | (const_int 48) (const_int 112) | |
11812 | (const_int 49) (const_int 113) | |
11813 | (const_int 50) (const_int 114) | |
11814 | (const_int 51) (const_int 115) | |
11815 | (const_int 52) (const_int 116) | |
11816 | (const_int 53) (const_int 117) | |
11817 | (const_int 54) (const_int 118) | |
11818 | (const_int 55) (const_int 119)])))] | |
11819 | "TARGET_AVX512BW" | |
11820 | "vpunpcklbw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}" | |
11821 | [(set_attr "type" "sselog") | |
11822 | (set_attr "prefix" "evex") | |
11823 | (set_attr "mode" "XI")]) | |
11824 | ||
11825 | (define_insn "avx2_interleave_lowv32qi<mask_name>" | |
11826 | [(set (match_operand:V32QI 0 "register_operand" "=v") | |
5deb404d | 11827 | (vec_select:V32QI |
11828 | (vec_concat:V64QI | |
8c409b91 | 11829 | (match_operand:V32QI 1 "register_operand" "v") |
11830 | (match_operand:V32QI 2 "nonimmediate_operand" "vm")) | |
5deb404d | 11831 | (parallel [(const_int 0) (const_int 32) |
11832 | (const_int 1) (const_int 33) | |
11833 | (const_int 2) (const_int 34) | |
11834 | (const_int 3) (const_int 35) | |
11835 | (const_int 4) (const_int 36) | |
11836 | (const_int 5) (const_int 37) | |
11837 | (const_int 6) (const_int 38) | |
11838 | (const_int 7) (const_int 39) | |
5deb404d | 11839 | (const_int 16) (const_int 48) |
11840 | (const_int 17) (const_int 49) | |
11841 | (const_int 18) (const_int 50) | |
11842 | (const_int 19) (const_int 51) | |
11843 | (const_int 20) (const_int 52) | |
11844 | (const_int 21) (const_int 53) | |
11845 | (const_int 22) (const_int 54) | |
11846 | (const_int 23) (const_int 55)])))] | |
8c409b91 | 11847 | "TARGET_AVX2 && <mask_avx512vl_condition> && <mask_avx512bw_condition>" |
11848 | "vpunpcklbw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}" | |
5deb404d | 11849 | [(set_attr "type" "sselog") |
8c409b91 | 11850 | (set_attr "prefix" "maybe_vex") |
5deb404d | 11851 | (set_attr "mode" "OI")]) |
11852 | ||
8c409b91 | 11853 | (define_insn "vec_interleave_lowv16qi<mask_name>" |
11854 | [(set (match_operand:V16QI 0 "register_operand" "=x,v") | |
5802c0cb | 11855 | (vec_select:V16QI |
11856 | (vec_concat:V32QI | |
8c409b91 | 11857 | (match_operand:V16QI 1 "register_operand" "0,v") |
11858 | (match_operand:V16QI 2 "nonimmediate_operand" "xm,vm")) | |
5802c0cb | 11859 | (parallel [(const_int 0) (const_int 16) |
11860 | (const_int 1) (const_int 17) | |
11861 | (const_int 2) (const_int 18) | |
11862 | (const_int 3) (const_int 19) | |
11863 | (const_int 4) (const_int 20) | |
11864 | (const_int 5) (const_int 21) | |
11865 | (const_int 6) (const_int 22) | |
11866 | (const_int 7) (const_int 23)])))] | |
8c409b91 | 11867 | "TARGET_SSE2 && <mask_avx512vl_condition> && <mask_avx512bw_condition>" |
b11a97b3 | 11868 | "@ |
11869 | punpcklbw\t{%2, %0|%0, %2} | |
8c409b91 | 11870 | vpunpcklbw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}" |
b11a97b3 | 11871 | [(set_attr "isa" "noavx,avx") |
11872 | (set_attr "type" "sselog") | |
11873 | (set_attr "prefix_data16" "1,*") | |
11874 | (set_attr "prefix" "orig,vex") | |
ed30e0a6 | 11875 | (set_attr "mode" "TI")]) |
11876 | ||
8c409b91 | 11877 | (define_insn "avx512bw_interleave_highv32hi<mask_name>" |
11878 | [(set (match_operand:V32HI 0 "register_operand" "=v") | |
11879 | (vec_select:V32HI | |
11880 | (vec_concat:V64HI | |
11881 | (match_operand:V32HI 1 "register_operand" "v") | |
11882 | (match_operand:V32HI 2 "nonimmediate_operand" "vm")) | |
11883 | (parallel [(const_int 4) (const_int 36) | |
11884 | (const_int 5) (const_int 37) | |
11885 | (const_int 6) (const_int 38) | |
11886 | (const_int 7) (const_int 39) | |
11887 | (const_int 12) (const_int 44) | |
11888 | (const_int 13) (const_int 45) | |
11889 | (const_int 14) (const_int 46) | |
11890 | (const_int 15) (const_int 47) | |
11891 | (const_int 20) (const_int 52) | |
11892 | (const_int 21) (const_int 53) | |
11893 | (const_int 22) (const_int 54) | |
11894 | (const_int 23) (const_int 55) | |
11895 | (const_int 28) (const_int 60) | |
11896 | (const_int 29) (const_int 61) | |
11897 | (const_int 30) (const_int 62) | |
11898 | (const_int 31) (const_int 63)])))] | |
11899 | "TARGET_AVX512BW" | |
11900 | "vpunpckhwd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}" | |
11901 | [(set_attr "type" "sselog") | |
11902 | (set_attr "prefix" "evex") | |
11903 | (set_attr "mode" "XI")]) | |
11904 | ||
11905 | (define_insn "avx2_interleave_highv16hi<mask_name>" | |
11906 | [(set (match_operand:V16HI 0 "register_operand" "=v") | |
5deb404d | 11907 | (vec_select:V16HI |
11908 | (vec_concat:V32HI | |
8c409b91 | 11909 | (match_operand:V16HI 1 "register_operand" "v") |
11910 | (match_operand:V16HI 2 "nonimmediate_operand" "vm")) | |
5deb404d | 11911 | (parallel [(const_int 4) (const_int 20) |
11912 | (const_int 5) (const_int 21) | |
11913 | (const_int 6) (const_int 22) | |
11914 | (const_int 7) (const_int 23) | |
11915 | (const_int 12) (const_int 28) | |
11916 | (const_int 13) (const_int 29) | |
11917 | (const_int 14) (const_int 30) | |
11918 | (const_int 15) (const_int 31)])))] | |
8c409b91 | 11919 | "TARGET_AVX2 && <mask_avx512vl_condition> && <mask_avx512bw_condition>" |
11920 | "vpunpckhwd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}" | |
5deb404d | 11921 | [(set_attr "type" "sselog") |
8c409b91 | 11922 | (set_attr "prefix" "maybe_evex") |
5deb404d | 11923 | (set_attr "mode" "OI")]) |
11924 | ||
8c409b91 | 11925 | (define_insn "vec_interleave_highv8hi<mask_name>" |
11926 | [(set (match_operand:V8HI 0 "register_operand" "=x,v") | |
5802c0cb | 11927 | (vec_select:V8HI |
11928 | (vec_concat:V16HI | |
8c409b91 | 11929 | (match_operand:V8HI 1 "register_operand" "0,v") |
11930 | (match_operand:V8HI 2 "nonimmediate_operand" "xm,vm")) | |
5802c0cb | 11931 | (parallel [(const_int 4) (const_int 12) |
11932 | (const_int 5) (const_int 13) | |
11933 | (const_int 6) (const_int 14) | |
11934 | (const_int 7) (const_int 15)])))] | |
8c409b91 | 11935 | "TARGET_SSE2 && <mask_avx512vl_condition> && <mask_avx512bw_condition>" |
b11a97b3 | 11936 | "@ |
11937 | punpckhwd\t{%2, %0|%0, %2} | |
8c409b91 | 11938 | vpunpckhwd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}" |
b11a97b3 | 11939 | [(set_attr "isa" "noavx,avx") |
11940 | (set_attr "type" "sselog") | |
11941 | (set_attr "prefix_data16" "1,*") | |
8c409b91 | 11942 | (set_attr "prefix" "orig,maybe_vex") |
ed30e0a6 | 11943 | (set_attr "mode" "TI")]) |
11944 | ||
8c409b91 | 11945 | (define_insn "<mask_codefor>avx512bw_interleave_lowv32hi<mask_name>" |
11946 | [(set (match_operand:V32HI 0 "register_operand" "=v") | |
11947 | (vec_select:V32HI | |
11948 | (vec_concat:V64HI | |
11949 | (match_operand:V32HI 1 "register_operand" "v") | |
11950 | (match_operand:V32HI 2 "nonimmediate_operand" "vm")) | |
11951 | (parallel [(const_int 0) (const_int 32) | |
11952 | (const_int 1) (const_int 33) | |
11953 | (const_int 2) (const_int 34) | |
11954 | (const_int 3) (const_int 35) | |
11955 | (const_int 8) (const_int 40) | |
11956 | (const_int 9) (const_int 41) | |
11957 | (const_int 10) (const_int 42) | |
11958 | (const_int 11) (const_int 43) | |
11959 | (const_int 16) (const_int 48) | |
11960 | (const_int 17) (const_int 49) | |
11961 | (const_int 18) (const_int 50) | |
11962 | (const_int 19) (const_int 51) | |
11963 | (const_int 24) (const_int 56) | |
11964 | (const_int 25) (const_int 57) | |
11965 | (const_int 26) (const_int 58) | |
11966 | (const_int 27) (const_int 59)])))] | |
11967 | "TARGET_AVX512BW" | |
11968 | "vpunpcklwd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}" | |
11969 | [(set_attr "type" "sselog") | |
11970 | (set_attr "prefix" "evex") | |
11971 | (set_attr "mode" "XI")]) | |
11972 | ||
11973 | (define_insn "avx2_interleave_lowv16hi<mask_name>" | |
11974 | [(set (match_operand:V16HI 0 "register_operand" "=v") | |
5deb404d | 11975 | (vec_select:V16HI |
11976 | (vec_concat:V32HI | |
8c409b91 | 11977 | (match_operand:V16HI 1 "register_operand" "v") |
11978 | (match_operand:V16HI 2 "nonimmediate_operand" "vm")) | |
5deb404d | 11979 | (parallel [(const_int 0) (const_int 16) |
11980 | (const_int 1) (const_int 17) | |
11981 | (const_int 2) (const_int 18) | |
11982 | (const_int 3) (const_int 19) | |
11983 | (const_int 8) (const_int 24) | |
11984 | (const_int 9) (const_int 25) | |
11985 | (const_int 10) (const_int 26) | |
11986 | (const_int 11) (const_int 27)])))] | |
8c409b91 | 11987 | "TARGET_AVX2 && <mask_avx512vl_condition> && <mask_avx512bw_condition>" |
11988 | "vpunpcklwd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}" | |
5deb404d | 11989 | [(set_attr "type" "sselog") |
8c409b91 | 11990 | (set_attr "prefix" "maybe_evex") |
5deb404d | 11991 | (set_attr "mode" "OI")]) |
11992 | ||
8c409b91 | 11993 | (define_insn "vec_interleave_lowv8hi<mask_name>" |
11994 | [(set (match_operand:V8HI 0 "register_operand" "=x,v") | |
5802c0cb | 11995 | (vec_select:V8HI |
11996 | (vec_concat:V16HI | |
8c409b91 | 11997 | (match_operand:V8HI 1 "register_operand" "0,v") |
11998 | (match_operand:V8HI 2 "nonimmediate_operand" "xm,vm")) | |
5802c0cb | 11999 | (parallel [(const_int 0) (const_int 8) |
12000 | (const_int 1) (const_int 9) | |
12001 | (const_int 2) (const_int 10) | |
12002 | (const_int 3) (const_int 11)])))] | |
8c409b91 | 12003 | "TARGET_SSE2 && <mask_avx512vl_condition> && <mask_avx512bw_condition>" |
b11a97b3 | 12004 | "@ |
12005 | punpcklwd\t{%2, %0|%0, %2} | |
8c409b91 | 12006 | vpunpcklwd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}" |
b11a97b3 | 12007 | [(set_attr "isa" "noavx,avx") |
12008 | (set_attr "type" "sselog") | |
12009 | (set_attr "prefix_data16" "1,*") | |
8c409b91 | 12010 | (set_attr "prefix" "orig,maybe_evex") |
ed30e0a6 | 12011 | (set_attr "mode" "TI")]) |
12012 | ||
8c409b91 | 12013 | (define_insn "avx2_interleave_highv8si<mask_name>" |
12014 | [(set (match_operand:V8SI 0 "register_operand" "=v") | |
5deb404d | 12015 | (vec_select:V8SI |
12016 | (vec_concat:V16SI | |
8c409b91 | 12017 | (match_operand:V8SI 1 "register_operand" "v") |
12018 | (match_operand:V8SI 2 "nonimmediate_operand" "vm")) | |
5deb404d | 12019 | (parallel [(const_int 2) (const_int 10) |
12020 | (const_int 3) (const_int 11) | |
12021 | (const_int 6) (const_int 14) | |
12022 | (const_int 7) (const_int 15)])))] | |
8c409b91 | 12023 | "TARGET_AVX2 && <mask_avx512vl_condition>" |
12024 | "vpunpckhdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}" | |
5deb404d | 12025 | [(set_attr "type" "sselog") |
8c409b91 | 12026 | (set_attr "prefix" "maybe_evex") |
5deb404d | 12027 | (set_attr "mode" "OI")]) |
12028 | ||
5220cab6 | 12029 | (define_insn "<mask_codefor>avx512f_interleave_highv16si<mask_name>" |
697a43f8 | 12030 | [(set (match_operand:V16SI 0 "register_operand" "=v") |
12031 | (vec_select:V16SI | |
12032 | (vec_concat:V32SI | |
12033 | (match_operand:V16SI 1 "register_operand" "v") | |
12034 | (match_operand:V16SI 2 "nonimmediate_operand" "vm")) | |
12035 | (parallel [(const_int 2) (const_int 18) | |
12036 | (const_int 3) (const_int 19) | |
12037 | (const_int 6) (const_int 22) | |
12038 | (const_int 7) (const_int 23) | |
12039 | (const_int 10) (const_int 26) | |
12040 | (const_int 11) (const_int 27) | |
12041 | (const_int 14) (const_int 30) | |
12042 | (const_int 15) (const_int 31)])))] | |
12043 | "TARGET_AVX512F" | |
5220cab6 | 12044 | "vpunpckhdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}" |
697a43f8 | 12045 | [(set_attr "type" "sselog") |
12046 | (set_attr "prefix" "evex") | |
12047 | (set_attr "mode" "XI")]) | |
12048 | ||
12049 | ||
8c409b91 | 12050 | (define_insn "vec_interleave_highv4si<mask_name>" |
12051 | [(set (match_operand:V4SI 0 "register_operand" "=x,v") | |
5802c0cb | 12052 | (vec_select:V4SI |
12053 | (vec_concat:V8SI | |
8c409b91 | 12054 | (match_operand:V4SI 1 "register_operand" "0,v") |
12055 | (match_operand:V4SI 2 "nonimmediate_operand" "xm,vm")) | |
5802c0cb | 12056 | (parallel [(const_int 2) (const_int 6) |
12057 | (const_int 3) (const_int 7)])))] | |
8c409b91 | 12058 | "TARGET_SSE2 && <mask_avx512vl_condition>" |
b11a97b3 | 12059 | "@ |
12060 | punpckhdq\t{%2, %0|%0, %2} | |
8c409b91 | 12061 | vpunpckhdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}" |
b11a97b3 | 12062 | [(set_attr "isa" "noavx,avx") |
12063 | (set_attr "type" "sselog") | |
12064 | (set_attr "prefix_data16" "1,*") | |
8c409b91 | 12065 | (set_attr "prefix" "orig,maybe_vex") |
ed30e0a6 | 12066 | (set_attr "mode" "TI")]) |
12067 | ||
8c409b91 | 12068 | (define_insn "avx2_interleave_lowv8si<mask_name>" |
12069 | [(set (match_operand:V8SI 0 "register_operand" "=v") | |
5deb404d | 12070 | (vec_select:V8SI |
12071 | (vec_concat:V16SI | |
8c409b91 | 12072 | (match_operand:V8SI 1 "register_operand" "v") |
12073 | (match_operand:V8SI 2 "nonimmediate_operand" "vm")) | |
5deb404d | 12074 | (parallel [(const_int 0) (const_int 8) |
12075 | (const_int 1) (const_int 9) | |
12076 | (const_int 4) (const_int 12) | |
12077 | (const_int 5) (const_int 13)])))] | |
8c409b91 | 12078 | "TARGET_AVX2 && <mask_avx512vl_condition>" |
12079 | "vpunpckldq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}" | |
5deb404d | 12080 | [(set_attr "type" "sselog") |
8c409b91 | 12081 | (set_attr "prefix" "maybe_evex") |
5deb404d | 12082 | (set_attr "mode" "OI")]) |
12083 | ||
5220cab6 | 12084 | (define_insn "<mask_codefor>avx512f_interleave_lowv16si<mask_name>" |
697a43f8 | 12085 | [(set (match_operand:V16SI 0 "register_operand" "=v") |
12086 | (vec_select:V16SI | |
12087 | (vec_concat:V32SI | |
12088 | (match_operand:V16SI 1 "register_operand" "v") | |
12089 | (match_operand:V16SI 2 "nonimmediate_operand" "vm")) | |
12090 | (parallel [(const_int 0) (const_int 16) | |
12091 | (const_int 1) (const_int 17) | |
12092 | (const_int 4) (const_int 20) | |
12093 | (const_int 5) (const_int 21) | |
12094 | (const_int 8) (const_int 24) | |
12095 | (const_int 9) (const_int 25) | |
12096 | (const_int 12) (const_int 28) | |
12097 | (const_int 13) (const_int 29)])))] | |
12098 | "TARGET_AVX512F" | |
5220cab6 | 12099 | "vpunpckldq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}" |
697a43f8 | 12100 | [(set_attr "type" "sselog") |
12101 | (set_attr "prefix" "evex") | |
12102 | (set_attr "mode" "XI")]) | |
12103 | ||
8c409b91 | 12104 | (define_insn "vec_interleave_lowv4si<mask_name>" |
12105 | [(set (match_operand:V4SI 0 "register_operand" "=x,v") | |
5802c0cb | 12106 | (vec_select:V4SI |
12107 | (vec_concat:V8SI | |
8c409b91 | 12108 | (match_operand:V4SI 1 "register_operand" "0,v") |
12109 | (match_operand:V4SI 2 "nonimmediate_operand" "xm,vm")) | |
5802c0cb | 12110 | (parallel [(const_int 0) (const_int 4) |
12111 | (const_int 1) (const_int 5)])))] | |
8c409b91 | 12112 | "TARGET_SSE2 && <mask_avx512vl_condition>" |
b11a97b3 | 12113 | "@ |
12114 | punpckldq\t{%2, %0|%0, %2} | |
8c409b91 | 12115 | vpunpckldq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}" |
b11a97b3 | 12116 | [(set_attr "isa" "noavx,avx") |
12117 | (set_attr "type" "sselog") | |
12118 | (set_attr "prefix_data16" "1,*") | |
12119 | (set_attr "prefix" "orig,vex") | |
ed30e0a6 | 12120 | (set_attr "mode" "TI")]) |
12121 | ||
c241ed0e | 12122 | (define_expand "vec_interleave_high<mode>" |
12123 | [(match_operand:VI_256 0 "register_operand" "=x") | |
12124 | (match_operand:VI_256 1 "register_operand" "x") | |
12125 | (match_operand:VI_256 2 "nonimmediate_operand" "xm")] | |
12126 | "TARGET_AVX2" | |
12127 | { | |
12128 | rtx t1 = gen_reg_rtx (<MODE>mode); | |
12129 | rtx t2 = gen_reg_rtx (<MODE>mode); | |
09e640e6 | 12130 | rtx t3 = gen_reg_rtx (V4DImode); |
c241ed0e | 12131 | emit_insn (gen_avx2_interleave_low<mode> (t1, operands[1], operands[2])); |
12132 | emit_insn (gen_avx2_interleave_high<mode> (t2, operands[1], operands[2])); | |
09e640e6 | 12133 | emit_insn (gen_avx2_permv2ti (t3, gen_lowpart (V4DImode, t1), |
12134 | gen_lowpart (V4DImode, t2), | |
12135 | GEN_INT (1 + (3 << 4)))); | |
12136 | emit_move_insn (operands[0], gen_lowpart (<MODE>mode, t3)); | |
c241ed0e | 12137 | DONE; |
12138 | }) | |
12139 | ||
12140 | (define_expand "vec_interleave_low<mode>" | |
12141 | [(match_operand:VI_256 0 "register_operand" "=x") | |
12142 | (match_operand:VI_256 1 "register_operand" "x") | |
12143 | (match_operand:VI_256 2 "nonimmediate_operand" "xm")] | |
12144 | "TARGET_AVX2" | |
12145 | { | |
12146 | rtx t1 = gen_reg_rtx (<MODE>mode); | |
12147 | rtx t2 = gen_reg_rtx (<MODE>mode); | |
09e640e6 | 12148 | rtx t3 = gen_reg_rtx (V4DImode); |
c241ed0e | 12149 | emit_insn (gen_avx2_interleave_low<mode> (t1, operands[1], operands[2])); |
12150 | emit_insn (gen_avx2_interleave_high<mode> (t2, operands[1], operands[2])); | |
09e640e6 | 12151 | emit_insn (gen_avx2_permv2ti (t3, gen_lowpart (V4DImode, t1), |
12152 | gen_lowpart (V4DImode, t2), | |
12153 | GEN_INT (0 + (2 << 4)))); | |
12154 | emit_move_insn (operands[0], gen_lowpart (<MODE>mode, t3)); | |
c241ed0e | 12155 | DONE; |
12156 | }) | |
12157 | ||
d3d379e7 | 12158 | ;; Modes handled by pinsr patterns. |
12159 | (define_mode_iterator PINSR_MODE | |
12160 | [(V16QI "TARGET_SSE4_1") V8HI | |
12161 | (V4SI "TARGET_SSE4_1") | |
12162 | (V2DI "TARGET_SSE4_1 && TARGET_64BIT")]) | |
12163 | ||
12164 | (define_mode_attr sse2p4_1 | |
12165 | [(V16QI "sse4_1") (V8HI "sse2") | |
12166 | (V4SI "sse4_1") (V2DI "sse4_1")]) | |
12167 | ||
12168 | ;; sse4_1_pinsrd must come before sse2_loadld since it is preferred. | |
12169 | (define_insn "<sse2p4_1>_pinsr<ssemodesuffix>" | |
12170 | [(set (match_operand:PINSR_MODE 0 "register_operand" "=x,x,x,x") | |
12171 | (vec_merge:PINSR_MODE | |
12172 | (vec_duplicate:PINSR_MODE | |
12173 | (match_operand:<ssescalarmode> 2 "nonimmediate_operand" "r,m,r,m")) | |
12174 | (match_operand:PINSR_MODE 1 "register_operand" "0,0,x,x") | |
abd4f58b | 12175 | (match_operand:SI 3 "const_int_operand")))] |
d3d379e7 | 12176 | "TARGET_SSE2 |
12177 | && ((unsigned) exact_log2 (INTVAL (operands[3])) | |
12178 | < GET_MODE_NUNITS (<MODE>mode))" | |
5802c0cb | 12179 | { |
12180 | operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3]))); | |
b11a97b3 | 12181 | |
12182 | switch (which_alternative) | |
12183 | { | |
12184 | case 0: | |
d3d379e7 | 12185 | if (GET_MODE_SIZE (<ssescalarmode>mode) < GET_MODE_SIZE (SImode)) |
5deb404d | 12186 | return "pinsr<ssemodesuffix>\t{%3, %k2, %0|%0, %k2, %3}"; |
d3d379e7 | 12187 | /* FALLTHRU */ |
b11a97b3 | 12188 | case 1: |
d3d379e7 | 12189 | return "pinsr<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}"; |
b11a97b3 | 12190 | case 2: |
d3d379e7 | 12191 | if (GET_MODE_SIZE (<ssescalarmode>mode) < GET_MODE_SIZE (SImode)) |
5deb404d | 12192 | return "vpinsr<ssemodesuffix>\t{%3, %k2, %1, %0|%0, %1, %k2, %3}"; |
d3d379e7 | 12193 | /* FALLTHRU */ |
b11a97b3 | 12194 | case 3: |
d3d379e7 | 12195 | return "vpinsr<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"; |
b11a97b3 | 12196 | default: |
12197 | gcc_unreachable (); | |
12198 | } | |
5802c0cb | 12199 | } |
b11a97b3 | 12200 | [(set_attr "isa" "noavx,noavx,avx,avx") |
12201 | (set_attr "type" "sselog") | |
d3d379e7 | 12202 | (set (attr "prefix_rex") |
12203 | (if_then_else | |
6be3efec | 12204 | (and (not (match_test "TARGET_AVX")) |
d3d379e7 | 12205 | (eq (const_string "<MODE>mode") (const_string "V2DImode"))) |
12206 | (const_string "1") | |
12207 | (const_string "*"))) | |
12208 | (set (attr "prefix_data16") | |
12209 | (if_then_else | |
6be3efec | 12210 | (and (not (match_test "TARGET_AVX")) |
d3d379e7 | 12211 | (eq (const_string "<MODE>mode") (const_string "V8HImode"))) |
12212 | (const_string "1") | |
12213 | (const_string "*"))) | |
12214 | (set (attr "prefix_extra") | |
12215 | (if_then_else | |
6be3efec | 12216 | (and (not (match_test "TARGET_AVX")) |
d3d379e7 | 12217 | (eq (const_string "<MODE>mode") (const_string "V8HImode"))) |
12218 | (const_string "*") | |
12219 | (const_string "1"))) | |
00a0e418 | 12220 | (set_attr "length_immediate" "1") |
b11a97b3 | 12221 | (set_attr "prefix" "orig,orig,vex,vex") |
5802c0cb | 12222 | (set_attr "mode" "TI")]) |
12223 | ||
4e303d3a | 12224 | (define_expand "<extract_type>_vinsert<shuffletype><extract_suf>_mask" |
12225 | [(match_operand:AVX512_VEC 0 "register_operand") | |
12226 | (match_operand:AVX512_VEC 1 "register_operand") | |
5220cab6 | 12227 | (match_operand:<ssequartermode> 2 "nonimmediate_operand") |
12228 | (match_operand:SI 3 "const_0_to_3_operand") | |
4e303d3a | 12229 | (match_operand:AVX512_VEC 4 "register_operand") |
5220cab6 | 12230 | (match_operand:<avx512fmaskmode> 5 "register_operand")] |
12231 | "TARGET_AVX512F" | |
12232 | { | |
4e303d3a | 12233 | int mask,selector; |
12234 | mask = INTVAL (operands[3]); | |
6e256598 | 12235 | selector = GET_MODE_UNIT_SIZE (<MODE>mode) == 4 ? |
4e303d3a | 12236 | 0xFFFF ^ (0xF000 >> mask * 4) |
12237 | : 0xFF ^ (0xC0 >> mask * 2); | |
12238 | emit_insn (gen_<extract_type>_vinsert<shuffletype><extract_suf>_1_mask | |
12239 | (operands[0], operands[1], operands[2], GEN_INT (selector), | |
12240 | operands[4], operands[5])); | |
5220cab6 | 12241 | DONE; |
5220cab6 | 12242 | }) |
12243 | ||
4e303d3a | 12244 | (define_insn "<mask_codefor><extract_type>_vinsert<shuffletype><extract_suf>_1<mask_name>" |
12245 | [(set (match_operand:AVX512_VEC 0 "register_operand" "=v") | |
12246 | (vec_merge:AVX512_VEC | |
12247 | (match_operand:AVX512_VEC 1 "register_operand" "v") | |
12248 | (vec_duplicate:AVX512_VEC | |
8e9989b0 | 12249 | (match_operand:<ssequartermode> 2 "nonimmediate_operand" "vm")) |
12250 | (match_operand:SI 3 "const_int_operand" "n")))] | |
12251 | "TARGET_AVX512F" | |
12252 | { | |
12253 | int mask; | |
4e303d3a | 12254 | int selector = INTVAL (operands[3]); |
12255 | ||
12256 | if (selector == 0xFFF || selector == 0x3F) | |
12257 | mask = 0; | |
12258 | else if ( selector == 0xF0FF || selector == 0xCF) | |
12259 | mask = 1; | |
12260 | else if ( selector == 0xFF0F || selector == 0xF3) | |
12261 | mask = 2; | |
12262 | else if ( selector == 0xFFF0 || selector == 0xFC) | |
12263 | mask = 3; | |
8e9989b0 | 12264 | else |
12265 | gcc_unreachable (); | |
12266 | ||
12267 | operands[3] = GEN_INT (mask); | |
12268 | ||
4e303d3a | 12269 | return "vinsert<shuffletype><extract_suf>\t{%3, %2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2, %3}"; |
8e9989b0 | 12270 | } |
12271 | [(set_attr "type" "sselog") | |
12272 | (set_attr "length_immediate" "1") | |
12273 | (set_attr "prefix" "evex") | |
12274 | (set_attr "mode" "<sseinsnmode>")]) | |
12275 | ||
4e303d3a | 12276 | (define_expand "<extract_type_2>_vinsert<shuffletype><extract_suf_2>_mask" |
12277 | [(match_operand:AVX512_VEC_2 0 "register_operand") | |
12278 | (match_operand:AVX512_VEC_2 1 "register_operand") | |
5220cab6 | 12279 | (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand") |
12280 | (match_operand:SI 3 "const_0_to_1_operand") | |
4e303d3a | 12281 | (match_operand:AVX512_VEC_2 4 "register_operand") |
5220cab6 | 12282 | (match_operand:<avx512fmaskmode> 5 "register_operand")] |
12283 | "TARGET_AVX512F" | |
12284 | { | |
12285 | int mask = INTVAL (operands[3]); | |
12286 | if (mask == 0) | |
12287 | emit_insn (gen_vec_set_lo_<mode>_mask | |
12288 | (operands[0], operands[1], operands[2], | |
12289 | operands[4], operands[5])); | |
12290 | else | |
12291 | emit_insn (gen_vec_set_hi_<mode>_mask | |
12292 | (operands[0], operands[1], operands[2], | |
12293 | operands[4], operands[5])); | |
12294 | DONE; | |
12295 | }) | |
12296 | ||
4e303d3a | 12297 | (define_insn "vec_set_lo_<mode><mask_name>" |
12298 | [(set (match_operand:V16FI 0 "register_operand" "=v") | |
12299 | (vec_concat:V16FI | |
12300 | (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm") | |
12301 | (vec_select:<ssehalfvecmode> | |
12302 | (match_operand:V16FI 1 "register_operand" "v") | |
12303 | (parallel [(const_int 8) (const_int 9) | |
12304 | (const_int 10) (const_int 11) | |
12305 | (const_int 12) (const_int 13) | |
12306 | (const_int 14) (const_int 15)]))))] | |
12307 | "TARGET_AVX512DQ" | |
12308 | "vinsert<shuffletype>32x8\t{$0x0, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, $0x0}" | |
12309 | [(set_attr "type" "sselog") | |
12310 | (set_attr "length_immediate" "1") | |
12311 | (set_attr "prefix" "evex") | |
12312 | (set_attr "mode" "<sseinsnmode>")]) | |
12313 | ||
12314 | (define_insn "vec_set_hi_<mode><mask_name>" | |
12315 | [(set (match_operand:V16FI 0 "register_operand" "=v") | |
12316 | (vec_concat:V16FI | |
12317 | (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm") | |
12318 | (vec_select:<ssehalfvecmode> | |
12319 | (match_operand:V16FI 1 "register_operand" "v") | |
12320 | (parallel [(const_int 0) (const_int 1) | |
12321 | (const_int 2) (const_int 3) | |
12322 | (const_int 4) (const_int 5) | |
12323 | (const_int 6) (const_int 7)]))))] | |
12324 | "TARGET_AVX512DQ" | |
12325 | "vinsert<shuffletype>32x8\t{$0x1, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, $0x1}" | |
12326 | [(set_attr "type" "sselog") | |
12327 | (set_attr "length_immediate" "1") | |
12328 | (set_attr "prefix" "evex") | |
12329 | (set_attr "mode" "<sseinsnmode>")]) | |
12330 | ||
5220cab6 | 12331 | (define_insn "vec_set_lo_<mode><mask_name>" |
8e9989b0 | 12332 | [(set (match_operand:V8FI 0 "register_operand" "=v") |
12333 | (vec_concat:V8FI | |
12334 | (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm") | |
12335 | (vec_select:<ssehalfvecmode> | |
12336 | (match_operand:V8FI 1 "register_operand" "v") | |
12337 | (parallel [(const_int 4) (const_int 5) | |
12338 | (const_int 6) (const_int 7)]))))] | |
12339 | "TARGET_AVX512F" | |
5220cab6 | 12340 | "vinsert<shuffletype>64x4\t{$0x0, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, $0x0}" |
8e9989b0 | 12341 | [(set_attr "type" "sselog") |
12342 | (set_attr "length_immediate" "1") | |
12343 | (set_attr "prefix" "evex") | |
12344 | (set_attr "mode" "XI")]) | |
12345 | ||
5220cab6 | 12346 | (define_insn "vec_set_hi_<mode><mask_name>" |
8e9989b0 | 12347 | [(set (match_operand:V8FI 0 "register_operand" "=v") |
12348 | (vec_concat:V8FI | |
12349 | (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm") | |
12350 | (vec_select:<ssehalfvecmode> | |
12351 | (match_operand:V8FI 1 "register_operand" "v") | |
12352 | (parallel [(const_int 0) (const_int 1) | |
12353 | (const_int 2) (const_int 3)]))))] | |
12354 | "TARGET_AVX512F" | |
5220cab6 | 12355 | "vinsert<shuffletype>64x4\t{$0x1, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, $0x1}" |
8e9989b0 | 12356 | [(set_attr "type" "sselog") |
12357 | (set_attr "length_immediate" "1") | |
12358 | (set_attr "prefix" "evex") | |
12359 | (set_attr "mode" "XI")]) | |
12360 | ||
050e0a37 | 12361 | (define_expand "avx512dq_shuf_<shuffletype>64x2_mask" |
12362 | [(match_operand:VI8F_256 0 "register_operand") | |
12363 | (match_operand:VI8F_256 1 "register_operand") | |
12364 | (match_operand:VI8F_256 2 "nonimmediate_operand") | |
12365 | (match_operand:SI 3 "const_0_to_3_operand") | |
12366 | (match_operand:VI8F_256 4 "register_operand") | |
12367 | (match_operand:QI 5 "register_operand")] | |
12368 | "TARGET_AVX512DQ" | |
12369 | { | |
12370 | int mask = INTVAL (operands[3]); | |
12371 | emit_insn (gen_avx512dq_shuf_<shuffletype>64x2_1_mask | |
12372 | (operands[0], operands[1], operands[2], | |
12373 | GEN_INT (((mask >> 0) & 1) * 2 + 0), | |
12374 | GEN_INT (((mask >> 0) & 1) * 2 + 1), | |
12375 | GEN_INT (((mask >> 1) & 1) * 2 + 4), | |
12376 | GEN_INT (((mask >> 1) & 1) * 2 + 5), | |
12377 | operands[4], operands[5])); | |
12378 | DONE; | |
12379 | }) | |
12380 | ||
12381 | (define_insn "<mask_codefor>avx512dq_shuf_<shuffletype>64x2_1<mask_name>" | |
12382 | [(set (match_operand:VI8F_256 0 "register_operand" "=v") | |
12383 | (vec_select:VI8F_256 | |
12384 | (vec_concat:<ssedoublemode> | |
12385 | (match_operand:VI8F_256 1 "register_operand" "v") | |
12386 | (match_operand:VI8F_256 2 "nonimmediate_operand" "vm")) | |
12387 | (parallel [(match_operand 3 "const_0_to_3_operand") | |
12388 | (match_operand 4 "const_0_to_3_operand") | |
12389 | (match_operand 5 "const_4_to_7_operand") | |
12390 | (match_operand 6 "const_4_to_7_operand")])))] | |
12391 | "TARGET_AVX512VL | |
12392 | && (INTVAL (operands[3]) == (INTVAL (operands[4]) - 1) | |
12393 | && INTVAL (operands[5]) == (INTVAL (operands[6]) - 1))" | |
12394 | { | |
12395 | int mask; | |
12396 | mask = INTVAL (operands[3]) / 2; | |
12397 | mask |= (INTVAL (operands[5]) - 4) / 2 << 1; | |
12398 | operands[3] = GEN_INT (mask); | |
12399 | return "vshuf<shuffletype>64x2\t{%3, %2, %1, %0<mask_operand7>|%0<mask_operand7>, %1, %2, %3}"; | |
12400 | } | |
12401 | [(set_attr "type" "sselog") | |
12402 | (set_attr "length_immediate" "1") | |
12403 | (set_attr "prefix" "evex") | |
12404 | (set_attr "mode" "XI")]) | |
12405 | ||
5220cab6 | 12406 | (define_expand "avx512f_shuf_<shuffletype>64x2_mask" |
12407 | [(match_operand:V8FI 0 "register_operand") | |
12408 | (match_operand:V8FI 1 "register_operand") | |
12409 | (match_operand:V8FI 2 "nonimmediate_operand") | |
12410 | (match_operand:SI 3 "const_0_to_255_operand") | |
12411 | (match_operand:V8FI 4 "register_operand") | |
12412 | (match_operand:QI 5 "register_operand")] | |
12413 | "TARGET_AVX512F" | |
12414 | { | |
12415 | int mask = INTVAL (operands[3]); | |
12416 | emit_insn (gen_avx512f_shuf_<shuffletype>64x2_1_mask | |
12417 | (operands[0], operands[1], operands[2], | |
12418 | GEN_INT (((mask >> 0) & 3) * 2), | |
12419 | GEN_INT (((mask >> 0) & 3) * 2 + 1), | |
12420 | GEN_INT (((mask >> 2) & 3) * 2), | |
12421 | GEN_INT (((mask >> 2) & 3) * 2 + 1), | |
12422 | GEN_INT (((mask >> 4) & 3) * 2 + 8), | |
12423 | GEN_INT (((mask >> 4) & 3) * 2 + 9), | |
12424 | GEN_INT (((mask >> 6) & 3) * 2 + 8), | |
12425 | GEN_INT (((mask >> 6) & 3) * 2 + 9), | |
12426 | operands[4], operands[5])); | |
12427 | DONE; | |
12428 | }) | |
12429 | ||
12430 | (define_insn "avx512f_shuf_<shuffletype>64x2_1<mask_name>" | |
8e9989b0 | 12431 | [(set (match_operand:V8FI 0 "register_operand" "=v") |
12432 | (vec_select:V8FI | |
12433 | (vec_concat:<ssedoublemode> | |
12434 | (match_operand:V8FI 1 "register_operand" "v") | |
12435 | (match_operand:V8FI 2 "nonimmediate_operand" "vm")) | |
12436 | (parallel [(match_operand 3 "const_0_to_7_operand") | |
12437 | (match_operand 4 "const_0_to_7_operand") | |
12438 | (match_operand 5 "const_0_to_7_operand") | |
12439 | (match_operand 6 "const_0_to_7_operand") | |
12440 | (match_operand 7 "const_8_to_15_operand") | |
12441 | (match_operand 8 "const_8_to_15_operand") | |
12442 | (match_operand 9 "const_8_to_15_operand") | |
12443 | (match_operand 10 "const_8_to_15_operand")])))] | |
12444 | "TARGET_AVX512F | |
12445 | && (INTVAL (operands[3]) == (INTVAL (operands[4]) - 1) | |
12446 | && INTVAL (operands[5]) == (INTVAL (operands[6]) - 1) | |
12447 | && INTVAL (operands[7]) == (INTVAL (operands[8]) - 1) | |
12448 | && INTVAL (operands[9]) == (INTVAL (operands[10]) - 1))" | |
12449 | { | |
12450 | int mask; | |
12451 | mask = INTVAL (operands[3]) / 2; | |
12452 | mask |= INTVAL (operands[5]) / 2 << 2; | |
12453 | mask |= (INTVAL (operands[7]) - 8) / 2 << 4; | |
12454 | mask |= (INTVAL (operands[9]) - 8) / 2 << 6; | |
12455 | operands[3] = GEN_INT (mask); | |
12456 | ||
5220cab6 | 12457 | return "vshuf<shuffletype>64x2\t{%3, %2, %1, %0<mask_operand11>|%0<mask_operand11>, %1, %2, %3}"; |
8e9989b0 | 12458 | } |
12459 | [(set_attr "type" "sselog") | |
12460 | (set_attr "length_immediate" "1") | |
12461 | (set_attr "prefix" "evex") | |
12462 | (set_attr "mode" "<sseinsnmode>")]) | |
12463 | ||
050e0a37 | 12464 | (define_expand "avx512vl_shuf_<shuffletype>32x4_mask" |
12465 | [(match_operand:VI4F_256 0 "register_operand") | |
12466 | (match_operand:VI4F_256 1 "register_operand") | |
12467 | (match_operand:VI4F_256 2 "nonimmediate_operand") | |
12468 | (match_operand:SI 3 "const_0_to_3_operand") | |
12469 | (match_operand:VI4F_256 4 "register_operand") | |
12470 | (match_operand:QI 5 "register_operand")] | |
12471 | "TARGET_AVX512VL" | |
12472 | { | |
12473 | int mask = INTVAL (operands[3]); | |
12474 | emit_insn (gen_avx512vl_shuf_<shuffletype>32x4_1_mask | |
12475 | (operands[0], operands[1], operands[2], | |
12476 | GEN_INT (((mask >> 0) & 1) * 4 + 0), | |
12477 | GEN_INT (((mask >> 0) & 1) * 4 + 1), | |
12478 | GEN_INT (((mask >> 0) & 1) * 4 + 2), | |
12479 | GEN_INT (((mask >> 0) & 1) * 4 + 3), | |
12480 | GEN_INT (((mask >> 1) & 1) * 4 + 8), | |
12481 | GEN_INT (((mask >> 1) & 1) * 4 + 9), | |
12482 | GEN_INT (((mask >> 1) & 1) * 4 + 10), | |
12483 | GEN_INT (((mask >> 1) & 1) * 4 + 11), | |
12484 | operands[4], operands[5])); | |
12485 | DONE; | |
12486 | }) | |
12487 | ||
12488 | (define_insn "<mask_codefor>avx512vl_shuf_<shuffletype>32x4_1<mask_name>" | |
12489 | [(set (match_operand:VI4F_256 0 "register_operand" "=v") | |
12490 | (vec_select:VI4F_256 | |
12491 | (vec_concat:<ssedoublemode> | |
12492 | (match_operand:VI4F_256 1 "register_operand" "v") | |
12493 | (match_operand:VI4F_256 2 "nonimmediate_operand" "vm")) | |
12494 | (parallel [(match_operand 3 "const_0_to_7_operand") | |
12495 | (match_operand 4 "const_0_to_7_operand") | |
12496 | (match_operand 5 "const_0_to_7_operand") | |
12497 | (match_operand 6 "const_0_to_7_operand") | |
12498 | (match_operand 7 "const_8_to_15_operand") | |
12499 | (match_operand 8 "const_8_to_15_operand") | |
12500 | (match_operand 9 "const_8_to_15_operand") | |
12501 | (match_operand 10 "const_8_to_15_operand")])))] | |
12502 | "TARGET_AVX512VL | |
12503 | && (INTVAL (operands[3]) == (INTVAL (operands[4]) - 1) | |
12504 | && INTVAL (operands[3]) == (INTVAL (operands[5]) - 2) | |
12505 | && INTVAL (operands[3]) == (INTVAL (operands[6]) - 3) | |
12506 | && INTVAL (operands[7]) == (INTVAL (operands[8]) - 1) | |
12507 | && INTVAL (operands[7]) == (INTVAL (operands[9]) - 2) | |
12508 | && INTVAL (operands[7]) == (INTVAL (operands[10]) - 3))" | |
12509 | { | |
12510 | int mask; | |
12511 | mask = INTVAL (operands[3]) / 4; | |
12512 | mask |= (INTVAL (operands[7]) - 8) / 4 << 1; | |
12513 | operands[3] = GEN_INT (mask); | |
12514 | ||
12515 | return "vshuf<shuffletype>32x4\t{%3, %2, %1, %0<mask_operand11>|%0<mask_operand11>, %1, %2, %3}"; | |
12516 | } | |
12517 | [(set_attr "type" "sselog") | |
12518 | (set_attr "length_immediate" "1") | |
12519 | (set_attr "prefix" "evex") | |
12520 | (set_attr "mode" "<sseinsnmode>")]) | |
12521 | ||
5220cab6 | 12522 | (define_expand "avx512f_shuf_<shuffletype>32x4_mask" |
12523 | [(match_operand:V16FI 0 "register_operand") | |
12524 | (match_operand:V16FI 1 "register_operand") | |
12525 | (match_operand:V16FI 2 "nonimmediate_operand") | |
12526 | (match_operand:SI 3 "const_0_to_255_operand") | |
12527 | (match_operand:V16FI 4 "register_operand") | |
12528 | (match_operand:HI 5 "register_operand")] | |
12529 | "TARGET_AVX512F" | |
12530 | { | |
12531 | int mask = INTVAL (operands[3]); | |
12532 | emit_insn (gen_avx512f_shuf_<shuffletype>32x4_1_mask | |
12533 | (operands[0], operands[1], operands[2], | |
12534 | GEN_INT (((mask >> 0) & 3) * 4), | |
12535 | GEN_INT (((mask >> 0) & 3) * 4 + 1), | |
12536 | GEN_INT (((mask >> 0) & 3) * 4 + 2), | |
12537 | GEN_INT (((mask >> 0) & 3) * 4 + 3), | |
12538 | GEN_INT (((mask >> 2) & 3) * 4), | |
12539 | GEN_INT (((mask >> 2) & 3) * 4 + 1), | |
12540 | GEN_INT (((mask >> 2) & 3) * 4 + 2), | |
12541 | GEN_INT (((mask >> 2) & 3) * 4 + 3), | |
12542 | GEN_INT (((mask >> 4) & 3) * 4 + 16), | |
12543 | GEN_INT (((mask >> 4) & 3) * 4 + 17), | |
12544 | GEN_INT (((mask >> 4) & 3) * 4 + 18), | |
12545 | GEN_INT (((mask >> 4) & 3) * 4 + 19), | |
12546 | GEN_INT (((mask >> 6) & 3) * 4 + 16), | |
12547 | GEN_INT (((mask >> 6) & 3) * 4 + 17), | |
12548 | GEN_INT (((mask >> 6) & 3) * 4 + 18), | |
12549 | GEN_INT (((mask >> 6) & 3) * 4 + 19), | |
12550 | operands[4], operands[5])); | |
12551 | DONE; | |
12552 | }) | |
12553 | ||
12554 | (define_insn "avx512f_shuf_<shuffletype>32x4_1<mask_name>" | |
8e9989b0 | 12555 | [(set (match_operand:V16FI 0 "register_operand" "=v") |
12556 | (vec_select:V16FI | |
12557 | (vec_concat:<ssedoublemode> | |
12558 | (match_operand:V16FI 1 "register_operand" "v") | |
12559 | (match_operand:V16FI 2 "nonimmediate_operand" "vm")) | |
12560 | (parallel [(match_operand 3 "const_0_to_15_operand") | |
12561 | (match_operand 4 "const_0_to_15_operand") | |
12562 | (match_operand 5 "const_0_to_15_operand") | |
12563 | (match_operand 6 "const_0_to_15_operand") | |
12564 | (match_operand 7 "const_0_to_15_operand") | |
12565 | (match_operand 8 "const_0_to_15_operand") | |
12566 | (match_operand 9 "const_0_to_15_operand") | |
12567 | (match_operand 10 "const_0_to_15_operand") | |
12568 | (match_operand 11 "const_16_to_31_operand") | |
12569 | (match_operand 12 "const_16_to_31_operand") | |
12570 | (match_operand 13 "const_16_to_31_operand") | |
12571 | (match_operand 14 "const_16_to_31_operand") | |
12572 | (match_operand 15 "const_16_to_31_operand") | |
12573 | (match_operand 16 "const_16_to_31_operand") | |
12574 | (match_operand 17 "const_16_to_31_operand") | |
12575 | (match_operand 18 "const_16_to_31_operand")])))] | |
12576 | "TARGET_AVX512F | |
12577 | && (INTVAL (operands[3]) == (INTVAL (operands[4]) - 1) | |
12578 | && INTVAL (operands[3]) == (INTVAL (operands[5]) - 2) | |
12579 | && INTVAL (operands[3]) == (INTVAL (operands[6]) - 3) | |
12580 | && INTVAL (operands[7]) == (INTVAL (operands[8]) - 1) | |
12581 | && INTVAL (operands[7]) == (INTVAL (operands[9]) - 2) | |
12582 | && INTVAL (operands[7]) == (INTVAL (operands[10]) - 3) | |
12583 | && INTVAL (operands[11]) == (INTVAL (operands[12]) - 1) | |
12584 | && INTVAL (operands[11]) == (INTVAL (operands[13]) - 2) | |
12585 | && INTVAL (operands[11]) == (INTVAL (operands[14]) - 3) | |
12586 | && INTVAL (operands[15]) == (INTVAL (operands[16]) - 1) | |
12587 | && INTVAL (operands[15]) == (INTVAL (operands[17]) - 2) | |
12588 | && INTVAL (operands[15]) == (INTVAL (operands[18]) - 3))" | |
12589 | { | |
12590 | int mask; | |
12591 | mask = INTVAL (operands[3]) / 4; | |
12592 | mask |= INTVAL (operands[7]) / 4 << 2; | |
12593 | mask |= (INTVAL (operands[11]) - 16) / 4 << 4; | |
12594 | mask |= (INTVAL (operands[15]) - 16) / 4 << 6; | |
12595 | operands[3] = GEN_INT (mask); | |
12596 | ||
5220cab6 | 12597 | return "vshuf<shuffletype>32x4\t{%3, %2, %1, %0<mask_operand19>|%0<mask_operand19>, %1, %2, %3}"; |
8e9989b0 | 12598 | } |
12599 | [(set_attr "type" "sselog") | |
12600 | (set_attr "length_immediate" "1") | |
12601 | (set_attr "prefix" "evex") | |
12602 | (set_attr "mode" "<sseinsnmode>")]) | |
12603 | ||
5220cab6 | 12604 | (define_expand "avx512f_pshufdv3_mask" |
12605 | [(match_operand:V16SI 0 "register_operand") | |
12606 | (match_operand:V16SI 1 "nonimmediate_operand") | |
12607 | (match_operand:SI 2 "const_0_to_255_operand") | |
12608 | (match_operand:V16SI 3 "register_operand") | |
12609 | (match_operand:HI 4 "register_operand")] | |
12610 | "TARGET_AVX512F" | |
12611 | { | |
12612 | int mask = INTVAL (operands[2]); | |
12613 | emit_insn (gen_avx512f_pshufd_1_mask (operands[0], operands[1], | |
12614 | GEN_INT ((mask >> 0) & 3), | |
12615 | GEN_INT ((mask >> 2) & 3), | |
12616 | GEN_INT ((mask >> 4) & 3), | |
12617 | GEN_INT ((mask >> 6) & 3), | |
12618 | GEN_INT (((mask >> 0) & 3) + 4), | |
12619 | GEN_INT (((mask >> 2) & 3) + 4), | |
12620 | GEN_INT (((mask >> 4) & 3) + 4), | |
12621 | GEN_INT (((mask >> 6) & 3) + 4), | |
12622 | GEN_INT (((mask >> 0) & 3) + 8), | |
12623 | GEN_INT (((mask >> 2) & 3) + 8), | |
12624 | GEN_INT (((mask >> 4) & 3) + 8), | |
12625 | GEN_INT (((mask >> 6) & 3) + 8), | |
12626 | GEN_INT (((mask >> 0) & 3) + 12), | |
12627 | GEN_INT (((mask >> 2) & 3) + 12), | |
12628 | GEN_INT (((mask >> 4) & 3) + 12), | |
12629 | GEN_INT (((mask >> 6) & 3) + 12), | |
12630 | operands[3], operands[4])); | |
12631 | DONE; | |
12632 | }) | |
12633 | ||
12634 | (define_insn "avx512f_pshufd_1<mask_name>" | |
8e9989b0 | 12635 | [(set (match_operand:V16SI 0 "register_operand" "=v") |
12636 | (vec_select:V16SI | |
12637 | (match_operand:V16SI 1 "nonimmediate_operand" "vm") | |
12638 | (parallel [(match_operand 2 "const_0_to_3_operand") | |
12639 | (match_operand 3 "const_0_to_3_operand") | |
12640 | (match_operand 4 "const_0_to_3_operand") | |
12641 | (match_operand 5 "const_0_to_3_operand") | |
12642 | (match_operand 6 "const_4_to_7_operand") | |
12643 | (match_operand 7 "const_4_to_7_operand") | |
12644 | (match_operand 8 "const_4_to_7_operand") | |
12645 | (match_operand 9 "const_4_to_7_operand") | |
12646 | (match_operand 10 "const_8_to_11_operand") | |
12647 | (match_operand 11 "const_8_to_11_operand") | |
12648 | (match_operand 12 "const_8_to_11_operand") | |
12649 | (match_operand 13 "const_8_to_11_operand") | |
12650 | (match_operand 14 "const_12_to_15_operand") | |
12651 | (match_operand 15 "const_12_to_15_operand") | |
12652 | (match_operand 16 "const_12_to_15_operand") | |
12653 | (match_operand 17 "const_12_to_15_operand")])))] | |
12654 | "TARGET_AVX512F | |
12655 | && INTVAL (operands[2]) + 4 == INTVAL (operands[6]) | |
12656 | && INTVAL (operands[3]) + 4 == INTVAL (operands[7]) | |
12657 | && INTVAL (operands[4]) + 4 == INTVAL (operands[8]) | |
12658 | && INTVAL (operands[5]) + 4 == INTVAL (operands[9]) | |
12659 | && INTVAL (operands[2]) + 8 == INTVAL (operands[10]) | |
12660 | && INTVAL (operands[3]) + 8 == INTVAL (operands[11]) | |
12661 | && INTVAL (operands[4]) + 8 == INTVAL (operands[12]) | |
12662 | && INTVAL (operands[5]) + 8 == INTVAL (operands[13]) | |
12663 | && INTVAL (operands[2]) + 12 == INTVAL (operands[14]) | |
12664 | && INTVAL (operands[3]) + 12 == INTVAL (operands[15]) | |
12665 | && INTVAL (operands[4]) + 12 == INTVAL (operands[16]) | |
12666 | && INTVAL (operands[5]) + 12 == INTVAL (operands[17])" | |
12667 | { | |
12668 | int mask = 0; | |
12669 | mask |= INTVAL (operands[2]) << 0; | |
12670 | mask |= INTVAL (operands[3]) << 2; | |
12671 | mask |= INTVAL (operands[4]) << 4; | |
12672 | mask |= INTVAL (operands[5]) << 6; | |
12673 | operands[2] = GEN_INT (mask); | |
12674 | ||
5220cab6 | 12675 | return "vpshufd\t{%2, %1, %0<mask_operand18>|%0<mask_operand18>, %1, %2}"; |
8e9989b0 | 12676 | } |
12677 | [(set_attr "type" "sselog1") | |
12678 | (set_attr "prefix" "evex") | |
12679 | (set_attr "length_immediate" "1") | |
12680 | (set_attr "mode" "XI")]) | |
12681 | ||
050e0a37 | 12682 | (define_expand "avx512vl_pshufdv3_mask" |
12683 | [(match_operand:V8SI 0 "register_operand") | |
12684 | (match_operand:V8SI 1 "nonimmediate_operand") | |
12685 | (match_operand:SI 2 "const_0_to_255_operand") | |
12686 | (match_operand:V8SI 3 "register_operand") | |
12687 | (match_operand:QI 4 "register_operand")] | |
12688 | "TARGET_AVX512VL" | |
12689 | { | |
12690 | int mask = INTVAL (operands[2]); | |
12691 | emit_insn (gen_avx2_pshufd_1_mask (operands[0], operands[1], | |
12692 | GEN_INT ((mask >> 0) & 3), | |
12693 | GEN_INT ((mask >> 2) & 3), | |
12694 | GEN_INT ((mask >> 4) & 3), | |
12695 | GEN_INT ((mask >> 6) & 3), | |
12696 | GEN_INT (((mask >> 0) & 3) + 4), | |
12697 | GEN_INT (((mask >> 2) & 3) + 4), | |
12698 | GEN_INT (((mask >> 4) & 3) + 4), | |
12699 | GEN_INT (((mask >> 6) & 3) + 4), | |
12700 | operands[3], operands[4])); | |
12701 | DONE; | |
12702 | }) | |
12703 | ||
5deb404d | 12704 | (define_expand "avx2_pshufdv3" |
abd4f58b | 12705 | [(match_operand:V8SI 0 "register_operand") |
12706 | (match_operand:V8SI 1 "nonimmediate_operand") | |
12707 | (match_operand:SI 2 "const_0_to_255_operand")] | |
5deb404d | 12708 | "TARGET_AVX2" |
12709 | { | |
12710 | int mask = INTVAL (operands[2]); | |
12711 | emit_insn (gen_avx2_pshufd_1 (operands[0], operands[1], | |
12712 | GEN_INT ((mask >> 0) & 3), | |
12713 | GEN_INT ((mask >> 2) & 3), | |
12714 | GEN_INT ((mask >> 4) & 3), | |
a9e4de7b | 12715 | GEN_INT ((mask >> 6) & 3), |
12716 | GEN_INT (((mask >> 0) & 3) + 4), | |
12717 | GEN_INT (((mask >> 2) & 3) + 4), | |
12718 | GEN_INT (((mask >> 4) & 3) + 4), | |
12719 | GEN_INT (((mask >> 6) & 3) + 4))); | |
5deb404d | 12720 | DONE; |
12721 | }) | |
12722 | ||
050e0a37 | 12723 | (define_insn "avx2_pshufd_1<mask_name>" |
12724 | [(set (match_operand:V8SI 0 "register_operand" "=v") | |
5deb404d | 12725 | (vec_select:V8SI |
050e0a37 | 12726 | (match_operand:V8SI 1 "nonimmediate_operand" "vm") |
abd4f58b | 12727 | (parallel [(match_operand 2 "const_0_to_3_operand") |
12728 | (match_operand 3 "const_0_to_3_operand") | |
12729 | (match_operand 4 "const_0_to_3_operand") | |
12730 | (match_operand 5 "const_0_to_3_operand") | |
12731 | (match_operand 6 "const_4_to_7_operand") | |
12732 | (match_operand 7 "const_4_to_7_operand") | |
12733 | (match_operand 8 "const_4_to_7_operand") | |
12734 | (match_operand 9 "const_4_to_7_operand")])))] | |
a9e4de7b | 12735 | "TARGET_AVX2 |
050e0a37 | 12736 | && <mask_avx512vl_condition> |
a9e4de7b | 12737 | && INTVAL (operands[2]) + 4 == INTVAL (operands[6]) |
12738 | && INTVAL (operands[3]) + 4 == INTVAL (operands[7]) | |
12739 | && INTVAL (operands[4]) + 4 == INTVAL (operands[8]) | |
12740 | && INTVAL (operands[5]) + 4 == INTVAL (operands[9])" | |
5deb404d | 12741 | { |
12742 | int mask = 0; | |
12743 | mask |= INTVAL (operands[2]) << 0; | |
12744 | mask |= INTVAL (operands[3]) << 2; | |
12745 | mask |= INTVAL (operands[4]) << 4; | |
12746 | mask |= INTVAL (operands[5]) << 6; | |
12747 | operands[2] = GEN_INT (mask); | |
12748 | ||
050e0a37 | 12749 | return "vpshufd\t{%2, %1, %0<mask_operand10>|%0<mask_operand10>, %1, %2}"; |
5deb404d | 12750 | } |
12751 | [(set_attr "type" "sselog1") | |
050e0a37 | 12752 | (set_attr "prefix" "maybe_evex") |
5deb404d | 12753 | (set_attr "length_immediate" "1") |
12754 | (set_attr "mode" "OI")]) | |
12755 | ||
050e0a37 | 12756 | (define_expand "avx512vl_pshufd_mask" |
12757 | [(match_operand:V4SI 0 "register_operand") | |
12758 | (match_operand:V4SI 1 "nonimmediate_operand") | |
12759 | (match_operand:SI 2 "const_0_to_255_operand") | |
12760 | (match_operand:V4SI 3 "register_operand") | |
12761 | (match_operand:QI 4 "register_operand")] | |
12762 | "TARGET_AVX512VL" | |
12763 | { | |
12764 | int mask = INTVAL (operands[2]); | |
12765 | emit_insn (gen_sse2_pshufd_1_mask (operands[0], operands[1], | |
12766 | GEN_INT ((mask >> 0) & 3), | |
12767 | GEN_INT ((mask >> 2) & 3), | |
12768 | GEN_INT ((mask >> 4) & 3), | |
12769 | GEN_INT ((mask >> 6) & 3), | |
12770 | operands[3], operands[4])); | |
12771 | DONE; | |
12772 | }) | |
12773 | ||
5802c0cb | 12774 | (define_expand "sse2_pshufd" |
abd4f58b | 12775 | [(match_operand:V4SI 0 "register_operand") |
12776 | (match_operand:V4SI 1 "nonimmediate_operand") | |
12777 | (match_operand:SI 2 "const_int_operand")] | |
5802c0cb | 12778 | "TARGET_SSE2" |
12779 | { | |
12780 | int mask = INTVAL (operands[2]); | |
12781 | emit_insn (gen_sse2_pshufd_1 (operands[0], operands[1], | |
12782 | GEN_INT ((mask >> 0) & 3), | |
12783 | GEN_INT ((mask >> 2) & 3), | |
12784 | GEN_INT ((mask >> 4) & 3), | |
12785 | GEN_INT ((mask >> 6) & 3))); | |
12786 | DONE; | |
12787 | }) | |
12788 | ||
050e0a37 | 12789 | (define_insn "sse2_pshufd_1<mask_name>" |
12790 | [(set (match_operand:V4SI 0 "register_operand" "=v") | |
5802c0cb | 12791 | (vec_select:V4SI |
050e0a37 | 12792 | (match_operand:V4SI 1 "nonimmediate_operand" "vm") |
abd4f58b | 12793 | (parallel [(match_operand 2 "const_0_to_3_operand") |
12794 | (match_operand 3 "const_0_to_3_operand") | |
12795 | (match_operand 4 "const_0_to_3_operand") | |
12796 | (match_operand 5 "const_0_to_3_operand")])))] | |
050e0a37 | 12797 | "TARGET_SSE2 && <mask_avx512vl_condition>" |
5802c0cb | 12798 | { |
12799 | int mask = 0; | |
12800 | mask |= INTVAL (operands[2]) << 0; | |
12801 | mask |= INTVAL (operands[3]) << 2; | |
12802 | mask |= INTVAL (operands[4]) << 4; | |
12803 | mask |= INTVAL (operands[5]) << 6; | |
12804 | operands[2] = GEN_INT (mask); | |
12805 | ||
050e0a37 | 12806 | return "%vpshufd\t{%2, %1, %0<mask_operand6>|%0<mask_operand6>, %1, %2}"; |
5802c0cb | 12807 | } |
12808 | [(set_attr "type" "sselog1") | |
1f346cbc | 12809 | (set_attr "prefix_data16" "1") |
050e0a37 | 12810 | (set_attr "prefix" "<mask_prefix2>") |
00a0e418 | 12811 | (set_attr "length_immediate" "1") |
5802c0cb | 12812 | (set_attr "mode" "TI")]) |
12813 | ||
7b988cc3 | 12814 | (define_insn "<mask_codefor>avx512bw_pshuflwv32hi<mask_name>" |
12815 | [(set (match_operand:V32HI 0 "register_operand" "=v") | |
12816 | (unspec:V32HI | |
12817 | [(match_operand:V32HI 1 "nonimmediate_operand" "vm") | |
12818 | (match_operand:SI 2 "const_0_to_255_operand" "n")] | |
12819 | UNSPEC_PSHUFLW))] | |
12820 | "TARGET_AVX512BW" | |
12821 | "vpshuflw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}" | |
12822 | [(set_attr "type" "sselog") | |
12823 | (set_attr "prefix" "evex") | |
12824 | (set_attr "mode" "XI")]) | |
12825 | ||
12826 | (define_expand "avx512vl_pshuflwv3_mask" | |
12827 | [(match_operand:V16HI 0 "register_operand") | |
12828 | (match_operand:V16HI 1 "nonimmediate_operand") | |
12829 | (match_operand:SI 2 "const_0_to_255_operand") | |
12830 | (match_operand:V16HI 3 "register_operand") | |
12831 | (match_operand:HI 4 "register_operand")] | |
12832 | "TARGET_AVX512VL && TARGET_AVX512BW" | |
12833 | { | |
12834 | int mask = INTVAL (operands[2]); | |
12835 | emit_insn (gen_avx2_pshuflw_1_mask (operands[0], operands[1], | |
12836 | GEN_INT ((mask >> 0) & 3), | |
12837 | GEN_INT ((mask >> 2) & 3), | |
12838 | GEN_INT ((mask >> 4) & 3), | |
12839 | GEN_INT ((mask >> 6) & 3), | |
12840 | GEN_INT (((mask >> 0) & 3) + 8), | |
12841 | GEN_INT (((mask >> 2) & 3) + 8), | |
12842 | GEN_INT (((mask >> 4) & 3) + 8), | |
12843 | GEN_INT (((mask >> 6) & 3) + 8), | |
12844 | operands[3], operands[4])); | |
12845 | DONE; | |
12846 | }) | |
12847 | ||
5deb404d | 12848 | (define_expand "avx2_pshuflwv3" |
abd4f58b | 12849 | [(match_operand:V16HI 0 "register_operand") |
12850 | (match_operand:V16HI 1 "nonimmediate_operand") | |
12851 | (match_operand:SI 2 "const_0_to_255_operand")] | |
5deb404d | 12852 | "TARGET_AVX2" |
12853 | { | |
12854 | int mask = INTVAL (operands[2]); | |
12855 | emit_insn (gen_avx2_pshuflw_1 (operands[0], operands[1], | |
12856 | GEN_INT ((mask >> 0) & 3), | |
12857 | GEN_INT ((mask >> 2) & 3), | |
12858 | GEN_INT ((mask >> 4) & 3), | |
a9e4de7b | 12859 | GEN_INT ((mask >> 6) & 3), |
12860 | GEN_INT (((mask >> 0) & 3) + 8), | |
12861 | GEN_INT (((mask >> 2) & 3) + 8), | |
12862 | GEN_INT (((mask >> 4) & 3) + 8), | |
12863 | GEN_INT (((mask >> 6) & 3) + 8))); | |
5deb404d | 12864 | DONE; |
12865 | }) | |
12866 | ||
7b988cc3 | 12867 | (define_insn "avx2_pshuflw_1<mask_name>" |
12868 | [(set (match_operand:V16HI 0 "register_operand" "=v") | |
5deb404d | 12869 | (vec_select:V16HI |
7b988cc3 | 12870 | (match_operand:V16HI 1 "nonimmediate_operand" "vm") |
abd4f58b | 12871 | (parallel [(match_operand 2 "const_0_to_3_operand") |
12872 | (match_operand 3 "const_0_to_3_operand") | |
12873 | (match_operand 4 "const_0_to_3_operand") | |
12874 | (match_operand 5 "const_0_to_3_operand") | |
5deb404d | 12875 | (const_int 4) |
12876 | (const_int 5) | |
12877 | (const_int 6) | |
12878 | (const_int 7) | |
abd4f58b | 12879 | (match_operand 6 "const_8_to_11_operand") |
12880 | (match_operand 7 "const_8_to_11_operand") | |
12881 | (match_operand 8 "const_8_to_11_operand") | |
12882 | (match_operand 9 "const_8_to_11_operand") | |
5deb404d | 12883 | (const_int 12) |
12884 | (const_int 13) | |
12885 | (const_int 14) | |
12886 | (const_int 15)])))] | |
a9e4de7b | 12887 | "TARGET_AVX2 |
7b988cc3 | 12888 | && <mask_avx512bw_condition> && <mask_avx512vl_condition> |
a9e4de7b | 12889 | && INTVAL (operands[2]) + 8 == INTVAL (operands[6]) |
12890 | && INTVAL (operands[3]) + 8 == INTVAL (operands[7]) | |
12891 | && INTVAL (operands[4]) + 8 == INTVAL (operands[8]) | |
12892 | && INTVAL (operands[5]) + 8 == INTVAL (operands[9])" | |
5deb404d | 12893 | { |
12894 | int mask = 0; | |
12895 | mask |= INTVAL (operands[2]) << 0; | |
12896 | mask |= INTVAL (operands[3]) << 2; | |
12897 | mask |= INTVAL (operands[4]) << 4; | |
12898 | mask |= INTVAL (operands[5]) << 6; | |
12899 | operands[2] = GEN_INT (mask); | |
12900 | ||
7b988cc3 | 12901 | return "vpshuflw\t{%2, %1, %0<mask_operand10>|%0<mask_operand10>, %1, %2}"; |
5deb404d | 12902 | } |
12903 | [(set_attr "type" "sselog") | |
7b988cc3 | 12904 | (set_attr "prefix" "maybe_evex") |
5deb404d | 12905 | (set_attr "length_immediate" "1") |
12906 | (set_attr "mode" "OI")]) | |
12907 | ||
7b988cc3 | 12908 | (define_expand "avx512vl_pshuflw_mask" |
12909 | [(match_operand:V8HI 0 "register_operand") | |
12910 | (match_operand:V8HI 1 "nonimmediate_operand") | |
12911 | (match_operand:SI 2 "const_0_to_255_operand") | |
12912 | (match_operand:V8HI 3 "register_operand") | |
12913 | (match_operand:QI 4 "register_operand")] | |
12914 | "TARGET_AVX512VL && TARGET_AVX512BW" | |
12915 | { | |
12916 | int mask = INTVAL (operands[2]); | |
12917 | emit_insn (gen_sse2_pshuflw_1_mask (operands[0], operands[1], | |
12918 | GEN_INT ((mask >> 0) & 3), | |
12919 | GEN_INT ((mask >> 2) & 3), | |
12920 | GEN_INT ((mask >> 4) & 3), | |
12921 | GEN_INT ((mask >> 6) & 3), | |
12922 | operands[3], operands[4])); | |
12923 | DONE; | |
12924 | }) | |
12925 | ||
5802c0cb | 12926 | (define_expand "sse2_pshuflw" |
abd4f58b | 12927 | [(match_operand:V8HI 0 "register_operand") |
12928 | (match_operand:V8HI 1 "nonimmediate_operand") | |
12929 | (match_operand:SI 2 "const_int_operand")] | |
5802c0cb | 12930 | "TARGET_SSE2" |
12931 | { | |
12932 | int mask = INTVAL (operands[2]); | |
12933 | emit_insn (gen_sse2_pshuflw_1 (operands[0], operands[1], | |
12934 | GEN_INT ((mask >> 0) & 3), | |
12935 | GEN_INT ((mask >> 2) & 3), | |
12936 | GEN_INT ((mask >> 4) & 3), | |
12937 | GEN_INT ((mask >> 6) & 3))); | |
12938 | DONE; | |
12939 | }) | |
12940 | ||
7b988cc3 | 12941 | (define_insn "sse2_pshuflw_1<mask_name>" |
12942 | [(set (match_operand:V8HI 0 "register_operand" "=v") | |
5802c0cb | 12943 | (vec_select:V8HI |
7b988cc3 | 12944 | (match_operand:V8HI 1 "nonimmediate_operand" "vm") |
abd4f58b | 12945 | (parallel [(match_operand 2 "const_0_to_3_operand") |
12946 | (match_operand 3 "const_0_to_3_operand") | |
12947 | (match_operand 4 "const_0_to_3_operand") | |
12948 | (match_operand 5 "const_0_to_3_operand") | |
5802c0cb | 12949 | (const_int 4) |
12950 | (const_int 5) | |
12951 | (const_int 6) | |
12952 | (const_int 7)])))] | |
7b988cc3 | 12953 | "TARGET_SSE2 && <mask_avx512bw_condition> && <mask_avx512vl_condition>" |
5802c0cb | 12954 | { |
12955 | int mask = 0; | |
12956 | mask |= INTVAL (operands[2]) << 0; | |
12957 | mask |= INTVAL (operands[3]) << 2; | |
12958 | mask |= INTVAL (operands[4]) << 4; | |
12959 | mask |= INTVAL (operands[5]) << 6; | |
12960 | operands[2] = GEN_INT (mask); | |
12961 | ||
7b988cc3 | 12962 | return "%vpshuflw\t{%2, %1, %0<mask_operand6>|%0<mask_operand6>, %1, %2}"; |
5802c0cb | 12963 | } |
12964 | [(set_attr "type" "sselog") | |
00a0e418 | 12965 | (set_attr "prefix_data16" "0") |
1f346cbc | 12966 | (set_attr "prefix_rep" "1") |
ed30e0a6 | 12967 | (set_attr "prefix" "maybe_vex") |
00a0e418 | 12968 | (set_attr "length_immediate" "1") |
5802c0cb | 12969 | (set_attr "mode" "TI")]) |
12970 | ||
5deb404d | 12971 | (define_expand "avx2_pshufhwv3" |
abd4f58b | 12972 | [(match_operand:V16HI 0 "register_operand") |
12973 | (match_operand:V16HI 1 "nonimmediate_operand") | |
12974 | (match_operand:SI 2 "const_0_to_255_operand")] | |
5deb404d | 12975 | "TARGET_AVX2" |
12976 | { | |
12977 | int mask = INTVAL (operands[2]); | |
12978 | emit_insn (gen_avx2_pshufhw_1 (operands[0], operands[1], | |
12979 | GEN_INT (((mask >> 0) & 3) + 4), | |
12980 | GEN_INT (((mask >> 2) & 3) + 4), | |
12981 | GEN_INT (((mask >> 4) & 3) + 4), | |
a9e4de7b | 12982 | GEN_INT (((mask >> 6) & 3) + 4), |
12983 | GEN_INT (((mask >> 0) & 3) + 12), | |
12984 | GEN_INT (((mask >> 2) & 3) + 12), | |
12985 | GEN_INT (((mask >> 4) & 3) + 12), | |
12986 | GEN_INT (((mask >> 6) & 3) + 12))); | |
5deb404d | 12987 | DONE; |
12988 | }) | |
12989 | ||
7b988cc3 | 12990 | (define_insn "<mask_codefor>avx512bw_pshufhwv32hi<mask_name>" |
12991 | [(set (match_operand:V32HI 0 "register_operand" "=v") | |
12992 | (unspec:V32HI | |
12993 | [(match_operand:V32HI 1 "nonimmediate_operand" "vm") | |
12994 | (match_operand:SI 2 "const_0_to_255_operand" "n")] | |
12995 | UNSPEC_PSHUFHW))] | |
12996 | "TARGET_AVX512BW" | |
12997 | "vpshufhw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}" | |
12998 | [(set_attr "type" "sselog") | |
12999 | (set_attr "prefix" "evex") | |
13000 | (set_attr "mode" "XI")]) | |
13001 | ||
13002 | (define_expand "avx512vl_pshufhwv3_mask" | |
13003 | [(match_operand:V16HI 0 "register_operand") | |
13004 | (match_operand:V16HI 1 "nonimmediate_operand") | |
13005 | (match_operand:SI 2 "const_0_to_255_operand") | |
13006 | (match_operand:V16HI 3 "register_operand") | |
13007 | (match_operand:HI 4 "register_operand")] | |
13008 | "TARGET_AVX512VL && TARGET_AVX512BW" | |
13009 | { | |
13010 | int mask = INTVAL (operands[2]); | |
13011 | emit_insn (gen_avx2_pshufhw_1_mask (operands[0], operands[1], | |
13012 | GEN_INT (((mask >> 0) & 3) + 4), | |
13013 | GEN_INT (((mask >> 2) & 3) + 4), | |
13014 | GEN_INT (((mask >> 4) & 3) + 4), | |
13015 | GEN_INT (((mask >> 6) & 3) + 4), | |
13016 | GEN_INT (((mask >> 0) & 3) + 12), | |
13017 | GEN_INT (((mask >> 2) & 3) + 12), | |
13018 | GEN_INT (((mask >> 4) & 3) + 12), | |
13019 | GEN_INT (((mask >> 6) & 3) + 12), | |
13020 | operands[3], operands[4])); | |
13021 | DONE; | |
13022 | }) | |
13023 | ||
13024 | (define_insn "avx2_pshufhw_1<mask_name>" | |
13025 | [(set (match_operand:V16HI 0 "register_operand" "=v") | |
5deb404d | 13026 | (vec_select:V16HI |
7b988cc3 | 13027 | (match_operand:V16HI 1 "nonimmediate_operand" "vm") |
5deb404d | 13028 | (parallel [(const_int 0) |
13029 | (const_int 1) | |
13030 | (const_int 2) | |
13031 | (const_int 3) | |
abd4f58b | 13032 | (match_operand 2 "const_4_to_7_operand") |
13033 | (match_operand 3 "const_4_to_7_operand") | |
13034 | (match_operand 4 "const_4_to_7_operand") | |
13035 | (match_operand 5 "const_4_to_7_operand") | |
5deb404d | 13036 | (const_int 8) |
13037 | (const_int 9) | |
13038 | (const_int 10) | |
13039 | (const_int 11) | |
abd4f58b | 13040 | (match_operand 6 "const_12_to_15_operand") |
13041 | (match_operand 7 "const_12_to_15_operand") | |
13042 | (match_operand 8 "const_12_to_15_operand") | |
13043 | (match_operand 9 "const_12_to_15_operand")])))] | |
a9e4de7b | 13044 | "TARGET_AVX2 |
7b988cc3 | 13045 | && <mask_avx512bw_condition> && <mask_avx512vl_condition> |
a9e4de7b | 13046 | && INTVAL (operands[2]) + 8 == INTVAL (operands[6]) |
13047 | && INTVAL (operands[3]) + 8 == INTVAL (operands[7]) | |
13048 | && INTVAL (operands[4]) + 8 == INTVAL (operands[8]) | |
13049 | && INTVAL (operands[5]) + 8 == INTVAL (operands[9])" | |
5deb404d | 13050 | { |
13051 | int mask = 0; | |
13052 | mask |= (INTVAL (operands[2]) - 4) << 0; | |
13053 | mask |= (INTVAL (operands[3]) - 4) << 2; | |
13054 | mask |= (INTVAL (operands[4]) - 4) << 4; | |
13055 | mask |= (INTVAL (operands[5]) - 4) << 6; | |
13056 | operands[2] = GEN_INT (mask); | |
13057 | ||
7b988cc3 | 13058 | return "vpshufhw\t{%2, %1, %0<mask_operand10>|%0<mask_operand10>, %1, %2}"; |
5deb404d | 13059 | } |
13060 | [(set_attr "type" "sselog") | |
7b988cc3 | 13061 | (set_attr "prefix" "maybe_evex") |
5deb404d | 13062 | (set_attr "length_immediate" "1") |
13063 | (set_attr "mode" "OI")]) | |
13064 | ||
7b988cc3 | 13065 | (define_expand "avx512vl_pshufhw_mask" |
13066 | [(match_operand:V8HI 0 "register_operand") | |
13067 | (match_operand:V8HI 1 "nonimmediate_operand") | |
13068 | (match_operand:SI 2 "const_0_to_255_operand") | |
13069 | (match_operand:V8HI 3 "register_operand") | |
13070 | (match_operand:QI 4 "register_operand")] | |
13071 | "TARGET_AVX512VL && TARGET_AVX512BW" | |
13072 | { | |
13073 | int mask = INTVAL (operands[2]); | |
13074 | emit_insn (gen_sse2_pshufhw_1_mask (operands[0], operands[1], | |
13075 | GEN_INT (((mask >> 0) & 3) + 4), | |
13076 | GEN_INT (((mask >> 2) & 3) + 4), | |
13077 | GEN_INT (((mask >> 4) & 3) + 4), | |
13078 | GEN_INT (((mask >> 6) & 3) + 4), | |
13079 | operands[3], operands[4])); | |
13080 | DONE; | |
13081 | }) | |
13082 | ||
5802c0cb | 13083 | (define_expand "sse2_pshufhw" |
abd4f58b | 13084 | [(match_operand:V8HI 0 "register_operand") |
13085 | (match_operand:V8HI 1 "nonimmediate_operand") | |
13086 | (match_operand:SI 2 "const_int_operand")] | |
5802c0cb | 13087 | "TARGET_SSE2" |
13088 | { | |
13089 | int mask = INTVAL (operands[2]); | |
13090 | emit_insn (gen_sse2_pshufhw_1 (operands[0], operands[1], | |
13091 | GEN_INT (((mask >> 0) & 3) + 4), | |
13092 | GEN_INT (((mask >> 2) & 3) + 4), | |
13093 | GEN_INT (((mask >> 4) & 3) + 4), | |
13094 | GEN_INT (((mask >> 6) & 3) + 4))); | |
13095 | DONE; | |
13096 | }) | |
13097 | ||
7b988cc3 | 13098 | (define_insn "sse2_pshufhw_1<mask_name>" |
13099 | [(set (match_operand:V8HI 0 "register_operand" "=v") | |
5802c0cb | 13100 | (vec_select:V8HI |
7b988cc3 | 13101 | (match_operand:V8HI 1 "nonimmediate_operand" "vm") |
5802c0cb | 13102 | (parallel [(const_int 0) |
13103 | (const_int 1) | |
13104 | (const_int 2) | |
13105 | (const_int 3) | |
abd4f58b | 13106 | (match_operand 2 "const_4_to_7_operand") |
13107 | (match_operand 3 "const_4_to_7_operand") | |
13108 | (match_operand 4 "const_4_to_7_operand") | |
13109 | (match_operand 5 "const_4_to_7_operand")])))] | |
7b988cc3 | 13110 | "TARGET_SSE2 && <mask_avx512bw_condition> && <mask_avx512vl_condition>" |
5802c0cb | 13111 | { |
13112 | int mask = 0; | |
13113 | mask |= (INTVAL (operands[2]) - 4) << 0; | |
13114 | mask |= (INTVAL (operands[3]) - 4) << 2; | |
13115 | mask |= (INTVAL (operands[4]) - 4) << 4; | |
13116 | mask |= (INTVAL (operands[5]) - 4) << 6; | |
13117 | operands[2] = GEN_INT (mask); | |
13118 | ||
7b988cc3 | 13119 | return "%vpshufhw\t{%2, %1, %0<mask_operand6>|%0<mask_operand6>, %1, %2}"; |
5802c0cb | 13120 | } |
13121 | [(set_attr "type" "sselog") | |
1f346cbc | 13122 | (set_attr "prefix_rep" "1") |
00a0e418 | 13123 | (set_attr "prefix_data16" "0") |
ed30e0a6 | 13124 | (set_attr "prefix" "maybe_vex") |
00a0e418 | 13125 | (set_attr "length_immediate" "1") |
5802c0cb | 13126 | (set_attr "mode" "TI")]) |
13127 | ||
13128 | (define_expand "sse2_loadd" | |
abd4f58b | 13129 | [(set (match_operand:V4SI 0 "register_operand") |
5802c0cb | 13130 | (vec_merge:V4SI |
13131 | (vec_duplicate:V4SI | |
abd4f58b | 13132 | (match_operand:SI 1 "nonimmediate_operand")) |
5802c0cb | 13133 | (match_dup 2) |
13134 | (const_int 1)))] | |
ad2c46cf | 13135 | "TARGET_SSE" |
5802c0cb | 13136 | "operands[2] = CONST0_RTX (V4SImode);") |
13137 | ||
13138 | (define_insn "sse2_loadld" | |
f30b3ad6 | 13139 | [(set (match_operand:V4SI 0 "register_operand" "=x,Yi,x,x,x") |
5802c0cb | 13140 | (vec_merge:V4SI |
13141 | (vec_duplicate:V4SI | |
f30b3ad6 | 13142 | (match_operand:SI 2 "nonimmediate_operand" "m ,r ,m,x,x")) |
13143 | (match_operand:V4SI 1 "reg_or_0_operand" "C ,C ,C,0,x") | |
5802c0cb | 13144 | (const_int 1)))] |
ad2c46cf | 13145 | "TARGET_SSE" |
5802c0cb | 13146 | "@ |
b11a97b3 | 13147 | %vmovd\t{%2, %0|%0, %2} |
13148 | %vmovd\t{%2, %0|%0, %2} | |
ad2c46cf | 13149 | movss\t{%2, %0|%0, %2} |
b11a97b3 | 13150 | movss\t{%2, %0|%0, %2} |
13151 | vmovss\t{%2, %1, %0|%0, %1, %2}" | |
dea561ed | 13152 | [(set_attr "isa" "sse2,sse2,noavx,noavx,avx") |
b11a97b3 | 13153 | (set_attr "type" "ssemov") |
13154 | (set_attr "prefix" "maybe_vex,maybe_vex,orig,orig,vex") | |
13155 | (set_attr "mode" "TI,TI,V4SF,SF,SF")]) | |
5802c0cb | 13156 | |
7d9c40e2 | 13157 | ;; QI and HI modes handled by pextr patterns. |
13158 | (define_mode_iterator PEXTR_MODE12 | |
13159 | [(V16QI "TARGET_SSE4_1") V8HI]) | |
13160 | ||
1087c60b | 13161 | (define_insn "*vec_extract<mode>" |
7d9c40e2 | 13162 | [(set (match_operand:<ssescalarmode> 0 "register_sse4nonimm_operand" "=r,m") |
1087c60b | 13163 | (vec_select:<ssescalarmode> |
7d9c40e2 | 13164 | (match_operand:PEXTR_MODE12 1 "register_operand" "x,x") |
1087c60b | 13165 | (parallel |
13166 | [(match_operand:SI 2 "const_0_to_<ssescalarnummask>_operand")])))] | |
7d9c40e2 | 13167 | "TARGET_SSE2" |
13168 | "%vpextr<ssemodesuffix>\t{%2, %1, %k0|%k0, %1, %2}" | |
13169 | [(set_attr "isa" "*,sse4") | |
13170 | (set_attr "type" "sselog1") | |
13171 | (set_attr "prefix_data16" "1") | |
1087c60b | 13172 | (set (attr "prefix_extra") |
13173 | (if_then_else | |
13174 | (and (eq_attr "alternative" "0") | |
13175 | (eq (const_string "<MODE>mode") (const_string "V8HImode"))) | |
13176 | (const_string "*") | |
13177 | (const_string "1"))) | |
13178 | (set_attr "length_immediate" "1") | |
13179 | (set_attr "prefix" "maybe_vex") | |
13180 | (set_attr "mode" "TI")]) | |
13181 | ||
7d9c40e2 | 13182 | (define_insn "*vec_extract<PEXTR_MODE12:mode>_zext" |
1087c60b | 13183 | [(set (match_operand:SWI48 0 "register_operand" "=r") |
13184 | (zero_extend:SWI48 | |
7d9c40e2 | 13185 | (vec_select:<PEXTR_MODE12:ssescalarmode> |
13186 | (match_operand:PEXTR_MODE12 1 "register_operand" "x") | |
1087c60b | 13187 | (parallel |
7d9c40e2 | 13188 | [(match_operand:SI 2 |
13189 | "const_0_to_<PEXTR_MODE12:ssescalarnummask>_operand")]))))] | |
1087c60b | 13190 | "TARGET_SSE2" |
7d9c40e2 | 13191 | "%vpextr<PEXTR_MODE12:ssemodesuffix>\t{%2, %1, %k0|%k0, %1, %2}" |
1087c60b | 13192 | [(set_attr "type" "sselog1") |
13193 | (set_attr "prefix_data16" "1") | |
7d9c40e2 | 13194 | (set (attr "prefix_extra") |
13195 | (if_then_else | |
13196 | (eq (const_string "<PEXTR_MODE12:MODE>mode") (const_string "V8HImode")) | |
13197 | (const_string "*") | |
13198 | (const_string "1"))) | |
1087c60b | 13199 | (set_attr "length_immediate" "1") |
13200 | (set_attr "prefix" "maybe_vex") | |
13201 | (set_attr "mode" "TI")]) | |
13202 | ||
27fc86e0 | 13203 | (define_insn "*vec_extract<mode>_mem" |
1087c60b | 13204 | [(set (match_operand:<ssescalarmode> 0 "register_operand" "=r") |
13205 | (vec_select:<ssescalarmode> | |
27fc86e0 | 13206 | (match_operand:VI12_128 1 "memory_operand" "o") |
1087c60b | 13207 | (parallel |
13208 | [(match_operand 2 "const_0_to_<ssescalarnummask>_operand")])))] | |
13209 | "TARGET_SSE" | |
27fc86e0 | 13210 | "#") |
1087c60b | 13211 | |
fe4df2ce | 13212 | (define_insn "*vec_extract<ssevecmodelower>_0" |
1087c60b | 13213 | [(set (match_operand:SWI48 0 "nonimmediate_operand" "=r ,r,x ,m") |
fe4df2ce | 13214 | (vec_select:SWI48 |
1087c60b | 13215 | (match_operand:<ssevecmode> 1 "nonimmediate_operand" "mYj,x,xm,x") |
5802c0cb | 13216 | (parallel [(const_int 0)])))] |
fe4df2ce | 13217 | "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))" |
2071a184 | 13218 | "#" |
1087c60b | 13219 | [(set_attr "isa" "*,sse4,*,*")]) |
5802c0cb | 13220 | |
b8403b2e | 13221 | (define_insn_and_split "*vec_extractv4si_0_zext" |
13222 | [(set (match_operand:DI 0 "register_operand" "=r") | |
13223 | (zero_extend:DI | |
13224 | (vec_select:SI | |
13225 | (match_operand:V4SI 1 "register_operand" "x") | |
13226 | (parallel [(const_int 0)]))))] | |
13227 | "TARGET_64BIT && TARGET_SSE2 && TARGET_INTER_UNIT_MOVES_FROM_VEC" | |
13228 | "#" | |
13229 | "&& reload_completed" | |
13230 | [(set (match_dup 0) (zero_extend:DI (match_dup 1)))] | |
13231 | "operands[1] = gen_rtx_REG (SImode, REGNO (operands[1]));") | |
13232 | ||
fe4df2ce | 13233 | (define_insn "*vec_extractv2di_0_sse" |
13234 | [(set (match_operand:DI 0 "nonimmediate_operand" "=x,m") | |
f957796f | 13235 | (vec_select:DI |
fe4df2ce | 13236 | (match_operand:V2DI 1 "nonimmediate_operand" "xm,x") |
f957796f | 13237 | (parallel [(const_int 0)])))] |
fe4df2ce | 13238 | "TARGET_SSE && !TARGET_64BIT |
13239 | && !(MEM_P (operands[0]) && MEM_P (operands[1]))" | |
13240 | "#") | |
f957796f | 13241 | |
fe4df2ce | 13242 | (define_split |
2071a184 | 13243 | [(set (match_operand:SWI48x 0 "nonimmediate_operand") |
fe4df2ce | 13244 | (vec_select:SWI48x |
27fc86e0 | 13245 | (match_operand:<ssevecmode> 1 "register_operand") |
5802c0cb | 13246 | (parallel [(const_int 0)])))] |
fe4df2ce | 13247 | "TARGET_SSE && reload_completed" |
13248 | [(set (match_dup 0) (match_dup 1))] | |
27fc86e0 | 13249 | "operands[1] = gen_rtx_REG (<MODE>mode, REGNO (operands[1]));") |
5802c0cb | 13250 | |
2071a184 | 13251 | (define_insn "*vec_extractv4si" |
0a32b282 | 13252 | [(set (match_operand:SI 0 "nonimmediate_operand" "=rm,Yr,*x,x") |
2071a184 | 13253 | (vec_select:SI |
0a32b282 | 13254 | (match_operand:V4SI 1 "register_operand" "x,0,0,x") |
2071a184 | 13255 | (parallel [(match_operand:SI 2 "const_0_to_3_operand")])))] |
13256 | "TARGET_SSE4_1" | |
b8403b2e | 13257 | { |
13258 | switch (which_alternative) | |
13259 | { | |
13260 | case 0: | |
13261 | return "%vpextrd\t{%2, %1, %0|%0, %1, %2}"; | |
13262 | ||
13263 | case 1: | |
0a32b282 | 13264 | case 2: |
b8403b2e | 13265 | operands [2] = GEN_INT (INTVAL (operands[2]) * 4); |
13266 | return "psrldq\t{%2, %0|%0, %2}"; | |
13267 | ||
0a32b282 | 13268 | case 3: |
b8403b2e | 13269 | operands [2] = GEN_INT (INTVAL (operands[2]) * 4); |
13270 | return "vpsrldq\t{%2, %1, %0|%0, %1, %2}"; | |
13271 | ||
13272 | default: | |
13273 | gcc_unreachable (); | |
13274 | } | |
13275 | } | |
0a32b282 | 13276 | [(set_attr "isa" "*,noavx,noavx,avx") |
13277 | (set_attr "type" "sselog1,sseishft1,sseishft1,sseishft1") | |
13278 | (set_attr "prefix_extra" "1,*,*,*") | |
2071a184 | 13279 | (set_attr "length_immediate" "1") |
0a32b282 | 13280 | (set_attr "prefix" "maybe_vex,orig,orig,vex") |
2071a184 | 13281 | (set_attr "mode" "TI")]) |
13282 | ||
13283 | (define_insn "*vec_extractv4si_zext" | |
13284 | [(set (match_operand:DI 0 "register_operand" "=r") | |
13285 | (zero_extend:DI | |
13286 | (vec_select:SI | |
13287 | (match_operand:V4SI 1 "register_operand" "x") | |
13288 | (parallel [(match_operand:SI 2 "const_0_to_3_operand")]))))] | |
13289 | "TARGET_64BIT && TARGET_SSE4_1" | |
13290 | "%vpextrd\t{%2, %1, %k0|%k0, %1, %2}" | |
13291 | [(set_attr "type" "sselog1") | |
13292 | (set_attr "prefix_extra" "1") | |
13293 | (set_attr "length_immediate" "1") | |
13294 | (set_attr "prefix" "maybe_vex") | |
13295 | (set_attr "mode" "TI")]) | |
fe4df2ce | 13296 | |
27fc86e0 | 13297 | (define_insn "*vec_extractv4si_mem" |
fe4df2ce | 13298 | [(set (match_operand:SI 0 "register_operand" "=x,r") |
13299 | (vec_select:SI | |
13300 | (match_operand:V4SI 1 "memory_operand" "o,o") | |
13301 | (parallel [(match_operand 2 "const_0_to_3_operand")])))] | |
13302 | "TARGET_SSE" | |
27fc86e0 | 13303 | "#") |
5802c0cb | 13304 | |
b8403b2e | 13305 | (define_insn_and_split "*vec_extractv4si_zext_mem" |
13306 | [(set (match_operand:DI 0 "register_operand" "=x,r") | |
13307 | (zero_extend:DI | |
13308 | (vec_select:SI | |
13309 | (match_operand:V4SI 1 "memory_operand" "o,o") | |
13310 | (parallel [(match_operand:SI 2 "const_0_to_3_operand")]))))] | |
13311 | "TARGET_64BIT && TARGET_SSE" | |
13312 | "#" | |
13313 | "&& reload_completed" | |
13314 | [(set (match_dup 0) (zero_extend:DI (match_dup 1)))] | |
13315 | { | |
13316 | operands[1] = adjust_address (operands[1], SImode, INTVAL (operands[2]) * 4); | |
13317 | }) | |
13318 | ||
1e541240 | 13319 | (define_insn "*vec_extractv2di_1" |
2071a184 | 13320 | [(set (match_operand:DI 0 "nonimmediate_operand" "=rm,m,x,x,x,x,r") |
a150ee15 | 13321 | (vec_select:DI |
2071a184 | 13322 | (match_operand:V2DI 1 "nonimmediate_operand" "x ,x,0,x,x,o,o") |
a150ee15 | 13323 | (parallel [(const_int 1)])))] |
fe4df2ce | 13324 | "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))" |
a150ee15 | 13325 | "@ |
2071a184 | 13326 | %vpextrq\t{$1, %1, %0|%0, %1, 1} |
b11a97b3 | 13327 | %vmovhps\t{%1, %0|%0, %1} |
c5823964 | 13328 | psrldq\t{$8, %0|%0, 8} |
b11a97b3 | 13329 | vpsrldq\t{$8, %1, %0|%0, %1, 8} |
daa8e621 | 13330 | movhlps\t{%1, %0|%0, %1} |
fe4df2ce | 13331 | # |
13332 | #" | |
2071a184 | 13333 | [(set_attr "isa" "x64_sse4,*,sse2_noavx,avx,noavx,*,x64") |
13334 | (set_attr "type" "sselog1,ssemov,sseishft1,sseishft1,ssemov,ssemov,imov") | |
13335 | (set_attr "length_immediate" "1,*,1,1,*,*,*") | |
2071a184 | 13336 | (set_attr "prefix_rex" "1,*,*,*,*,*,*") |
13337 | (set_attr "prefix_extra" "1,*,*,*,*,*,*") | |
13338 | (set_attr "prefix" "maybe_vex,maybe_vex,orig,vex,orig,*,*") | |
13339 | (set_attr "mode" "TI,V2SF,TI,TI,V4SF,DI,DI")]) | |
fe4df2ce | 13340 | |
13341 | (define_split | |
27fc86e0 | 13342 | [(set (match_operand:<ssescalarmode> 0 "register_operand") |
13343 | (vec_select:<ssescalarmode> | |
13344 | (match_operand:VI_128 1 "memory_operand") | |
13345 | (parallel | |
13346 | [(match_operand 2 "const_0_to_<ssescalarnummask>_operand")])))] | |
fe4df2ce | 13347 | "TARGET_SSE && reload_completed" |
13348 | [(set (match_dup 0) (match_dup 1))] | |
27fc86e0 | 13349 | { |
13350 | int offs = INTVAL (operands[2]) * GET_MODE_SIZE (<ssescalarmode>mode); | |
13351 | ||
13352 | operands[1] = adjust_address (operands[1], <ssescalarmode>mode, offs); | |
13353 | }) | |
a150ee15 | 13354 | |
f02daedb | 13355 | ;; Turn SImode or DImode extraction from arbitrary SSE/AVX/AVX512F |
13356 | ;; vector modes into vec_extract*. | |
13357 | (define_split | |
13358 | [(set (match_operand:SWI48x 0 "nonimmediate_operand") | |
13359 | (match_operand:SWI48x 1 "register_operand"))] | |
13360 | "can_create_pseudo_p () | |
e15c0942 | 13361 | && SUBREG_P (operands[1]) |
f02daedb | 13362 | && REG_P (SUBREG_REG (operands[1])) |
13363 | && (GET_MODE_CLASS (GET_MODE (SUBREG_REG (operands[1]))) == MODE_VECTOR_INT | |
13364 | || (GET_MODE_CLASS (GET_MODE (SUBREG_REG (operands[1]))) | |
13365 | == MODE_VECTOR_FLOAT)) | |
13366 | && SUBREG_BYTE (operands[1]) == 0 | |
13367 | && TARGET_SSE | |
13368 | && (GET_MODE_SIZE (GET_MODE (SUBREG_REG (operands[1]))) == 16 | |
13369 | || (GET_MODE_SIZE (GET_MODE (SUBREG_REG (operands[1]))) == 32 | |
13370 | && TARGET_AVX) | |
13371 | || (GET_MODE_SIZE (GET_MODE (SUBREG_REG (operands[1]))) == 64 | |
13372 | && TARGET_AVX512F)) | |
13373 | && (<MODE>mode == SImode || TARGET_64BIT || MEM_P (operands[0]))" | |
13374 | [(set (match_dup 0) (vec_select:SWI48x (match_dup 1) | |
13375 | (parallel [(const_int 0)])))] | |
13376 | { | |
13377 | rtx tmp; | |
13378 | operands[1] = SUBREG_REG (operands[1]); | |
13379 | switch (GET_MODE_SIZE (GET_MODE (operands[1]))) | |
13380 | { | |
13381 | case 64: | |
13382 | if (<MODE>mode == SImode) | |
13383 | { | |
13384 | tmp = gen_reg_rtx (V8SImode); | |
13385 | emit_insn (gen_vec_extract_lo_v16si (tmp, | |
13386 | gen_lowpart (V16SImode, | |
13387 | operands[1]))); | |
13388 | } | |
13389 | else | |
13390 | { | |
13391 | tmp = gen_reg_rtx (V4DImode); | |
13392 | emit_insn (gen_vec_extract_lo_v8di (tmp, | |
13393 | gen_lowpart (V8DImode, | |
13394 | operands[1]))); | |
13395 | } | |
13396 | operands[1] = tmp; | |
13397 | /* FALLTHRU */ | |
13398 | case 32: | |
13399 | tmp = gen_reg_rtx (<ssevecmode>mode); | |
13400 | if (<MODE>mode == SImode) | |
13401 | emit_insn (gen_vec_extract_lo_v8si (tmp, gen_lowpart (V8SImode, | |
13402 | operands[1]))); | |
13403 | else | |
13404 | emit_insn (gen_vec_extract_lo_v4di (tmp, gen_lowpart (V4DImode, | |
13405 | operands[1]))); | |
13406 | operands[1] = tmp; | |
13407 | break; | |
13408 | case 16: | |
13409 | operands[1] = gen_lowpart (<ssevecmode>mode, operands[1]); | |
13410 | break; | |
13411 | } | |
13412 | }) | |
13413 | ||
b4a46c88 | 13414 | (define_insn "*vec_concatv2si_sse4_1" |
0a281fd0 | 13415 | [(set (match_operand:V2SI 0 "register_operand" |
13416 | "=Yr,*x,x, Yr,*x,x, x, *y,*y") | |
b4a46c88 | 13417 | (vec_concat:V2SI |
0a281fd0 | 13418 | (match_operand:SI 1 "nonimmediate_operand" |
13419 | " 0, 0,x, 0,0, x,rm, 0,rm") | |
13420 | (match_operand:SI 2 "vector_move_operand" | |
13421 | " rm,rm,rm,Yr,*x,x, C,*ym, C")))] | |
13422 | "TARGET_SSE4_1 && !(MEM_P (operands[1]) && MEM_P (operands[2]))" | |
b4a46c88 | 13423 | "@ |
0a32b282 | 13424 | pinsrd\t{$1, %2, %0|%0, %2, 1} |
1e541240 | 13425 | pinsrd\t{$1, %2, %0|%0, %2, 1} |
13426 | vpinsrd\t{$1, %2, %1, %0|%0, %1, %2, 1} | |
d3d9aac1 | 13427 | punpckldq\t{%2, %0|%0, %2} |
0a32b282 | 13428 | punpckldq\t{%2, %0|%0, %2} |
b11a97b3 | 13429 | vpunpckldq\t{%2, %1, %0|%0, %1, %2} |
13430 | %vmovd\t{%1, %0|%0, %1} | |
d3d9aac1 | 13431 | punpckldq\t{%2, %0|%0, %2} |
13432 | movd\t{%1, %0|%0, %1}" | |
0a32b282 | 13433 | [(set_attr "isa" "noavx,noavx,avx,noavx,noavx,avx,*,*,*") |
13434 | (set_attr "type" "sselog,sselog,sselog,sselog,sselog,sselog,ssemov,mmxcvt,mmxmov") | |
13435 | (set_attr "prefix_extra" "1,1,1,*,*,*,*,*,*") | |
13436 | (set_attr "length_immediate" "1,1,1,*,*,*,*,*,*") | |
13437 | (set_attr "prefix" "orig,orig,vex,orig,orig,vex,maybe_vex,orig,orig") | |
13438 | (set_attr "mode" "TI,TI,TI,TI,TI,TI,TI,DI,DI")]) | |
b4a46c88 | 13439 | |
ad2c46cf | 13440 | ;; ??? In theory we can match memory for the MMX alternative, but allowing |
13441 | ;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE | |
13442 | ;; alternatives pretty much forces the MMX alternative to be chosen. | |
65c52515 | 13443 | (define_insn "*vec_concatv2si" |
13444 | [(set (match_operand:V2SI 0 "register_operand" "=x,x ,*y,x,x,*y,*y") | |
ad2c46cf | 13445 | (vec_concat:V2SI |
65c52515 | 13446 | (match_operand:SI 1 "nonimmediate_operand" " 0,rm,rm,0,m, 0,*rm") |
13447 | (match_operand:SI 2 "reg_or_0_operand" " x,C ,C, x,C,*y,C")))] | |
13448 | "TARGET_SSE && !TARGET_SSE4_1" | |
ad2c46cf | 13449 | "@ |
13450 | punpckldq\t{%2, %0|%0, %2} | |
13451 | movd\t{%1, %0|%0, %1} | |
65c52515 | 13452 | movd\t{%1, %0|%0, %1} |
ad2c46cf | 13453 | unpcklps\t{%2, %0|%0, %2} |
13454 | movss\t{%1, %0|%0, %1} | |
13455 | punpckldq\t{%2, %0|%0, %2} | |
13456 | movd\t{%1, %0|%0, %1}" | |
65c52515 | 13457 | [(set_attr "isa" "sse2,sse2,sse2,*,*,*,*") |
13458 | (set_attr "type" "sselog,ssemov,mmxmov,sselog,ssemov,mmxcvt,mmxmov") | |
13459 | (set_attr "mode" "TI,TI,DI,V4SF,SF,DI,DI")]) | |
ad2c46cf | 13460 | |
18c3cd78 | 13461 | (define_insn "*vec_concatv4si" |
f30b3ad6 | 13462 | [(set (match_operand:V4SI 0 "register_operand" "=x,x,x,x,x") |
ed30e0a6 | 13463 | (vec_concat:V4SI |
f30b3ad6 | 13464 | (match_operand:V2SI 1 "register_operand" " 0,x,0,0,x") |
13465 | (match_operand:V2SI 2 "nonimmediate_operand" " x,x,x,m,m")))] | |
ad2c46cf | 13466 | "TARGET_SSE" |
13467 | "@ | |
13468 | punpcklqdq\t{%2, %0|%0, %2} | |
18c3cd78 | 13469 | vpunpcklqdq\t{%2, %1, %0|%0, %1, %2} |
ad2c46cf | 13470 | movlhps\t{%2, %0|%0, %2} |
c358a059 | 13471 | movhps\t{%2, %0|%0, %q2} |
13472 | vmovhps\t{%2, %1, %0|%0, %1, %q2}" | |
f30b3ad6 | 13473 | [(set_attr "isa" "sse2_noavx,avx,noavx,noavx,avx") |
18c3cd78 | 13474 | (set_attr "type" "sselog,sselog,ssemov,ssemov,ssemov") |
13475 | (set_attr "prefix" "orig,vex,orig,orig,vex") | |
13476 | (set_attr "mode" "TI,TI,V4SF,V2SF,V2SF")]) | |
ad2c46cf | 13477 | |
dd196988 | 13478 | ;; movd instead of movq is required to handle broken assemblers. |
65c52515 | 13479 | (define_insn "vec_concatv2di" |
b11a97b3 | 13480 | [(set (match_operand:V2DI 0 "register_operand" |
0a32b282 | 13481 | "=Yr,*x,x ,Yi,x ,!x,x,x,x,x,x") |
d3d9aac1 | 13482 | (vec_concat:V2DI |
b11a97b3 | 13483 | (match_operand:DI 1 "nonimmediate_operand" |
0a32b282 | 13484 | " 0, 0,x ,r ,xm,*y,0,x,0,0,x") |
b11a97b3 | 13485 | (match_operand:DI 2 "vector_move_operand" |
0a32b282 | 13486 | "*rm,rm,rm,C ,C ,C ,x,x,x,m,m")))] |
65c52515 | 13487 | "TARGET_SSE" |
d3d9aac1 | 13488 | "@ |
0a32b282 | 13489 | pinsrq\t{$1, %2, %0|%0, %2, 1} |
1e541240 | 13490 | pinsrq\t{$1, %2, %0|%0, %2, 1} |
13491 | vpinsrq\t{$1, %2, %1, %0|%0, %1, %2, 1} | |
f17a6c34 | 13492 | * return HAVE_AS_IX86_INTERUNIT_MOVQ ? \"%vmovq\t{%1, %0|%0, %1}\" : \"%vmovd\t{%1, %0|%0, %1}\"; |
65c52515 | 13493 | %vmovq\t{%1, %0|%0, %1} |
d3d9aac1 | 13494 | movq2dq\t{%1, %0|%0, %1} |
13495 | punpcklqdq\t{%2, %0|%0, %2} | |
b11a97b3 | 13496 | vpunpcklqdq\t{%2, %1, %0|%0, %1, %2} |
65c52515 | 13497 | movlhps\t{%2, %0|%0, %2} |
b11a97b3 | 13498 | movhps\t{%2, %0|%0, %2} |
13499 | vmovhps\t{%2, %1, %0|%0, %1, %2}" | |
0a32b282 | 13500 | [(set_attr "isa" "x64_sse4_noavx,x64_sse4_noavx,x64_avx,x64,sse2,sse2,sse2_noavx,avx,noavx,noavx,avx") |
1e541240 | 13501 | (set (attr "type") |
13502 | (if_then_else | |
0a32b282 | 13503 | (eq_attr "alternative" "0,1,2,6,7") |
1e541240 | 13504 | (const_string "sselog") |
13505 | (const_string "ssemov"))) | |
0a32b282 | 13506 | (set_attr "prefix_rex" "1,1,1,1,*,*,*,*,*,*,*") |
13507 | (set_attr "prefix_extra" "1,1,1,*,*,*,*,*,*,*,*") | |
13508 | (set_attr "length_immediate" "1,1,1,*,*,*,*,*,*,*,*") | |
13509 | (set_attr "prefix" "orig,orig,vex,maybe_vex,maybe_vex,orig,orig,vex,orig,orig,vex") | |
13510 | (set_attr "mode" "TI,TI,TI,TI,TI,TI,TI,TI,V4SF,V2SF,V2SF")]) | |
b11a97b3 | 13511 | |
b6fc7168 | 13512 | (define_expand "vec_unpacks_lo_<mode>" |
abd4f58b | 13513 | [(match_operand:<sseunpackmode> 0 "register_operand") |
8f83f53e | 13514 | (match_operand:VI124_AVX2_24_AVX512F_1_AVX512BW 1 "register_operand")] |
c6c91d61 | 13515 | "TARGET_SSE2" |
3b87d2ec | 13516 | "ix86_expand_sse_unpack (operands[0], operands[1], false, false); DONE;") |
c6c91d61 | 13517 | |
b6fc7168 | 13518 | (define_expand "vec_unpacks_hi_<mode>" |
abd4f58b | 13519 | [(match_operand:<sseunpackmode> 0 "register_operand") |
8f83f53e | 13520 | (match_operand:VI124_AVX2_24_AVX512F_1_AVX512BW 1 "register_operand")] |
c6c91d61 | 13521 | "TARGET_SSE2" |
3b87d2ec | 13522 | "ix86_expand_sse_unpack (operands[0], operands[1], false, true); DONE;") |
c6c91d61 | 13523 | |
b6fc7168 | 13524 | (define_expand "vec_unpacku_lo_<mode>" |
abd4f58b | 13525 | [(match_operand:<sseunpackmode> 0 "register_operand") |
8f83f53e | 13526 | (match_operand:VI124_AVX2_24_AVX512F_1_AVX512BW 1 "register_operand")] |
c6c91d61 | 13527 | "TARGET_SSE2" |
3b87d2ec | 13528 | "ix86_expand_sse_unpack (operands[0], operands[1], true, false); DONE;") |
c6c91d61 | 13529 | |
0852690b | 13530 | (define_expand "vec_unpacks_lo_hi" |
13531 | [(set (match_operand:QI 0 "register_operand") | |
13532 | (subreg:QI (match_operand:HI 1 "register_operand") 0))] | |
13533 | "TARGET_AVX512DQ") | |
13534 | ||
13535 | (define_expand "vec_unpacks_lo_si" | |
13536 | [(set (match_operand:HI 0 "register_operand") | |
13537 | (subreg:HI (match_operand:SI 1 "register_operand") 0))] | |
13538 | "TARGET_AVX512F") | |
13539 | ||
13540 | (define_expand "vec_unpacks_lo_di" | |
13541 | [(set (match_operand:SI 0 "register_operand") | |
13542 | (subreg:SI (match_operand:DI 1 "register_operand") 0))] | |
13543 | "TARGET_AVX512BW") | |
13544 | ||
b6fc7168 | 13545 | (define_expand "vec_unpacku_hi_<mode>" |
abd4f58b | 13546 | [(match_operand:<sseunpackmode> 0 "register_operand") |
8f83f53e | 13547 | (match_operand:VI124_AVX2_24_AVX512F_1_AVX512BW 1 "register_operand")] |
c6c91d61 | 13548 | "TARGET_SSE2" |
3b87d2ec | 13549 | "ix86_expand_sse_unpack (operands[0], operands[1], true, true); DONE;") |
c6c91d61 | 13550 | |
0852690b | 13551 | (define_expand "vec_unpacks_hi_hi" |
13552 | [(set (subreg:HI (match_operand:QI 0 "register_operand") 0) | |
13553 | (lshiftrt:HI (match_operand:HI 1 "register_operand") | |
13554 | (const_int 8)))] | |
13555 | "TARGET_AVX512F") | |
13556 | ||
13557 | (define_expand "vec_unpacks_hi_<mode>" | |
13558 | [(set (subreg:SWI48x (match_operand:<HALFMASKMODE> 0 "register_operand") 0) | |
13559 | (lshiftrt:SWI48x (match_operand:SWI48x 1 "register_operand") | |
13560 | (match_dup 2)))] | |
13561 | "TARGET_AVX512BW" | |
13562 | { | |
13563 | operands[2] = GEN_INT (GET_MODE_BITSIZE (<HALFMASKMODE>mode)); | |
13564 | }) | |
13565 | ||
5802c0cb | 13566 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; |
13567 | ;; | |
0975351b | 13568 | ;; Miscellaneous |
5802c0cb | 13569 | ;; |
13570 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | |
13571 | ||
293fd15f | 13572 | (define_expand "<sse2_avx2>_uavg<mode>3<mask_name>" |
e4048f11 | 13573 | [(set (match_operand:VI12_AVX2 0 "register_operand") |
13574 | (truncate:VI12_AVX2 | |
13575 | (lshiftrt:<ssedoublemode> | |
13576 | (plus:<ssedoublemode> | |
13577 | (plus:<ssedoublemode> | |
13578 | (zero_extend:<ssedoublemode> | |
13579 | (match_operand:VI12_AVX2 1 "nonimmediate_operand")) | |
13580 | (zero_extend:<ssedoublemode> | |
13581 | (match_operand:VI12_AVX2 2 "nonimmediate_operand"))) | |
293fd15f | 13582 | (match_dup <mask_expand_op3>)) |
7c839b3f | 13583 | (const_int 1))))] |
293fd15f | 13584 | "TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>" |
e4048f11 | 13585 | { |
293fd15f | 13586 | rtx tmp; |
13587 | if (<mask_applied>) | |
13588 | tmp = operands[3]; | |
e4048f11 | 13589 | operands[3] = CONST1_RTX(<MODE>mode); |
13590 | ix86_fixup_binary_operands_no_copy (PLUS, <MODE>mode, operands); | |
293fd15f | 13591 | |
13592 | if (<mask_applied>) | |
13593 | { | |
13594 | operands[5] = operands[3]; | |
13595 | operands[3] = tmp; | |
13596 | } | |
e4048f11 | 13597 | }) |
5deb404d | 13598 | |
293fd15f | 13599 | (define_insn "*<sse2_avx2>_uavg<mode>3<mask_name>" |
13600 | [(set (match_operand:VI12_AVX2 0 "register_operand" "=x,v") | |
e4048f11 | 13601 | (truncate:VI12_AVX2 |
13602 | (lshiftrt:<ssedoublemode> | |
13603 | (plus:<ssedoublemode> | |
13604 | (plus:<ssedoublemode> | |
13605 | (zero_extend:<ssedoublemode> | |
293fd15f | 13606 | (match_operand:VI12_AVX2 1 "nonimmediate_operand" "%0,v")) |
e4048f11 | 13607 | (zero_extend:<ssedoublemode> |
293fd15f | 13608 | (match_operand:VI12_AVX2 2 "nonimmediate_operand" "xm,vm"))) |
13609 | (match_operand:VI12_AVX2 <mask_expand_op3> "const1_operand")) | |
5802c0cb | 13610 | (const_int 1))))] |
293fd15f | 13611 | "TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition> |
13612 | && ix86_binary_operator_ok (PLUS, <MODE>mode, operands)" | |
908dc1fc | 13613 | "@ |
e4048f11 | 13614 | pavg<ssemodesuffix>\t{%2, %0|%0, %2} |
293fd15f | 13615 | vpavg<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}" |
908dc1fc | 13616 | [(set_attr "isa" "noavx,avx") |
13617 | (set_attr "type" "sseiadd") | |
13618 | (set_attr "prefix_data16" "1,*") | |
293fd15f | 13619 | (set_attr "prefix" "orig,<mask_prefix>") |
e4048f11 | 13620 | (set_attr "mode" "<sseinsnmode>")]) |
5802c0cb | 13621 | |
009b318f | 13622 | ;; The correct representation for this is absolutely enormous, and |
5802c0cb | 13623 | ;; surely not generally useful. |
5deb404d | 13624 | (define_insn "<sse2_avx2>_psadbw" |
5f3ec3a3 | 13625 | [(set (match_operand:VI8_AVX2_AVX512BW 0 "register_operand" "=x,v") |
13626 | (unspec:VI8_AVX2_AVX512BW | |
13627 | [(match_operand:<ssebytemode> 1 "register_operand" "0,v") | |
13628 | (match_operand:<ssebytemode> 2 "nonimmediate_operand" "xm,vm")] | |
fd65bafc | 13629 | UNSPEC_PSADBW))] |
5802c0cb | 13630 | "TARGET_SSE2" |
908dc1fc | 13631 | "@ |
13632 | psadbw\t{%2, %0|%0, %2} | |
13633 | vpsadbw\t{%2, %1, %0|%0, %1, %2}" | |
13634 | [(set_attr "isa" "noavx,avx") | |
13635 | (set_attr "type" "sseiadd") | |
fbfe006e | 13636 | (set_attr "atom_unit" "simul") |
908dc1fc | 13637 | (set_attr "prefix_data16" "1,*") |
5f3ec3a3 | 13638 | (set_attr "prefix" "orig,maybe_evex") |
5deb404d | 13639 | (set_attr "mode" "<sseinsnmode>")]) |
5802c0cb | 13640 | |
63d5e521 | 13641 | (define_insn "<sse>_movmsk<ssemodesuffix><avxsizesuffix>" |
ed30e0a6 | 13642 | [(set (match_operand:SI 0 "register_operand" "=r") |
13643 | (unspec:SI | |
6a3f5f59 | 13644 | [(match_operand:VF_128_256 1 "register_operand" "x")] |
ed30e0a6 | 13645 | UNSPEC_MOVMSK))] |
6fe5844b | 13646 | "TARGET_SSE" |
0061967e | 13647 | "%vmovmsk<ssemodesuffix>\t{%1, %0|%0, %1}" |
fbfe006e | 13648 | [(set_attr "type" "ssemov") |
ed30e0a6 | 13649 | (set_attr "prefix" "maybe_vex") |
3da2a73c | 13650 | (set_attr "mode" "<MODE>")]) |
5802c0cb | 13651 | |
b1d9adac | 13652 | (define_insn "*<sse>_movmsk<ssemodesuffix><avxsizesuffix>_zext" |
13653 | [(set (match_operand:DI 0 "register_operand" "=r") | |
13654 | (zero_extend:DI | |
13655 | (unspec:SI | |
13656 | [(match_operand:VF_128_256 1 "register_operand" "x")] | |
13657 | UNSPEC_MOVMSK)))] | |
13658 | "TARGET_64BIT && TARGET_SSE" | |
13659 | "%vmovmsk<ssemodesuffix>\t{%1, %k0|%k0, %1}" | |
5deb404d | 13660 | [(set_attr "type" "ssemov") |
b1d9adac | 13661 | (set_attr "prefix" "maybe_vex") |
13662 | (set_attr "mode" "<MODE>")]) | |
5deb404d | 13663 | |
b1d9adac | 13664 | (define_insn "<sse2_avx2>_pmovmskb" |
5802c0cb | 13665 | [(set (match_operand:SI 0 "register_operand" "=r") |
b1d9adac | 13666 | (unspec:SI |
13667 | [(match_operand:VI1_AVX2 1 "register_operand" "x")] | |
13668 | UNSPEC_MOVMSK))] | |
5802c0cb | 13669 | "TARGET_SSE2" |
ed30e0a6 | 13670 | "%vpmovmskb\t{%1, %0|%0, %1}" |
fbfe006e | 13671 | [(set_attr "type" "ssemov") |
b1d9adac | 13672 | (set (attr "prefix_data16") |
13673 | (if_then_else | |
13674 | (match_test "TARGET_AVX") | |
13675 | (const_string "*") | |
13676 | (const_string "1"))) | |
13677 | (set_attr "prefix" "maybe_vex") | |
13678 | (set_attr "mode" "SI")]) | |
13679 | ||
13680 | (define_insn "*<sse2_avx2>_pmovmskb_zext" | |
13681 | [(set (match_operand:DI 0 "register_operand" "=r") | |
13682 | (zero_extend:DI | |
13683 | (unspec:SI | |
13684 | [(match_operand:VI1_AVX2 1 "register_operand" "x")] | |
13685 | UNSPEC_MOVMSK)))] | |
13686 | "TARGET_64BIT && TARGET_SSE2" | |
13687 | "%vpmovmskb\t{%1, %k0|%k0, %1}" | |
13688 | [(set_attr "type" "ssemov") | |
13689 | (set (attr "prefix_data16") | |
13690 | (if_then_else | |
13691 | (match_test "TARGET_AVX") | |
13692 | (const_string "*") | |
13693 | (const_string "1"))) | |
ed30e0a6 | 13694 | (set_attr "prefix" "maybe_vex") |
1f346cbc | 13695 | (set_attr "mode" "SI")]) |
5802c0cb | 13696 | |
13697 | (define_expand "sse2_maskmovdqu" | |
abd4f58b | 13698 | [(set (match_operand:V16QI 0 "memory_operand") |
13699 | (unspec:V16QI [(match_operand:V16QI 1 "register_operand") | |
13700 | (match_operand:V16QI 2 "register_operand") | |
5802c0cb | 13701 | (match_dup 0)] |
13702 | UNSPEC_MASKMOV))] | |
5bd1ff1d | 13703 | "TARGET_SSE2") |
5802c0cb | 13704 | |
13705 | (define_insn "*sse2_maskmovdqu" | |
dcab66ec | 13706 | [(set (mem:V16QI (match_operand:P 0 "register_operand" "D")) |
5802c0cb | 13707 | (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x") |
13708 | (match_operand:V16QI 2 "register_operand" "x") | |
13709 | (mem:V16QI (match_dup 0))] | |
13710 | UNSPEC_MASKMOV))] | |
dcab66ec | 13711 | "TARGET_SSE2" |
4a2a161f | 13712 | { |
13713 | /* We can't use %^ here due to ASM_OUTPUT_OPCODE processing | |
13714 | that requires %v to be at the beginning of the opcode name. */ | |
13715 | if (Pmode != word_mode) | |
13716 | fputs ("\taddr32", asm_out_file); | |
13717 | return "%vmaskmovdqu\t{%2, %1|%1, %2}"; | |
13718 | } | |
fbfe006e | 13719 | [(set_attr "type" "ssemov") |
1f346cbc | 13720 | (set_attr "prefix_data16" "1") |
4a2a161f | 13721 | (set (attr "length_address") |
13722 | (symbol_ref ("Pmode != word_mode"))) | |
00a0e418 | 13723 | ;; The implicit %rdi operand confuses default length_vex computation. |
13724 | (set (attr "length_vex") | |
dcab66ec | 13725 | (symbol_ref ("3 + REX_SSE_REGNO_P (REGNO (operands[2]))"))) |
ed30e0a6 | 13726 | (set_attr "prefix" "maybe_vex") |
4c9faaa4 | 13727 | (set_attr "znver1_decode" "vector") |
5802c0cb | 13728 | (set_attr "mode" "TI")]) |
13729 | ||
32513a88 | 13730 | (define_insn "sse_ldmxcsr" |
13731 | [(unspec_volatile [(match_operand:SI 0 "memory_operand" "m")] | |
13732 | UNSPECV_LDMXCSR)] | |
13733 | "TARGET_SSE" | |
ed30e0a6 | 13734 | "%vldmxcsr\t%0" |
32513a88 | 13735 | [(set_attr "type" "sse") |
fbfe006e | 13736 | (set_attr "atom_sse_attr" "mxcsr") |
ed30e0a6 | 13737 | (set_attr "prefix" "maybe_vex") |
32513a88 | 13738 | (set_attr "memory" "load")]) |
13739 | ||
13740 | (define_insn "sse_stmxcsr" | |
13741 | [(set (match_operand:SI 0 "memory_operand" "=m") | |
13742 | (unspec_volatile:SI [(const_int 0)] UNSPECV_STMXCSR))] | |
13743 | "TARGET_SSE" | |
ed30e0a6 | 13744 | "%vstmxcsr\t%0" |
32513a88 | 13745 | [(set_attr "type" "sse") |
fbfe006e | 13746 | (set_attr "atom_sse_attr" "mxcsr") |
ed30e0a6 | 13747 | (set_attr "prefix" "maybe_vex") |
32513a88 | 13748 | (set_attr "memory" "store")]) |
13749 | ||
5802c0cb | 13750 | (define_insn "sse2_clflush" |
13751 | [(unspec_volatile [(match_operand 0 "address_operand" "p")] | |
13752 | UNSPECV_CLFLUSH)] | |
13753 | "TARGET_SSE2" | |
13754 | "clflush\t%a0" | |
13755 | [(set_attr "type" "sse") | |
fbfe006e | 13756 | (set_attr "atom_sse_attr" "fence") |
5802c0cb | 13757 | (set_attr "memory" "unknown")]) |
13758 | ||
ff6e6cb6 | 13759 | ;; As per AMD and Intel ISA manuals, the first operand is extensions |
13760 | ;; and it goes to %ecx. The second operand received is hints and it goes | |
13761 | ;; to %eax. | |
5802c0cb | 13762 | (define_insn "sse3_mwait" |
ff6e6cb6 | 13763 | [(unspec_volatile [(match_operand:SI 0 "register_operand" "c") |
13764 | (match_operand:SI 1 "register_operand" "a")] | |
5802c0cb | 13765 | UNSPECV_MWAIT)] |
13766 | "TARGET_SSE3" | |
106eecb3 | 13767 | ;; 64bit version is "mwait %rax,%rcx". But only lower 32bits are used. |
13768 | ;; Since 32bit register operands are implicitly zero extended to 64bit, | |
13769 | ;; we only need to set up 32bit registers. | |
13770 | "mwait" | |
5802c0cb | 13771 | [(set_attr "length" "3")]) |
13772 | ||
4a2a161f | 13773 | (define_insn "sse3_monitor_<mode>" |
bf0a02ba | 13774 | [(unspec_volatile [(match_operand:P 0 "register_operand" "a") |
106eecb3 | 13775 | (match_operand:SI 1 "register_operand" "c") |
13776 | (match_operand:SI 2 "register_operand" "d")] | |
13777 | UNSPECV_MONITOR)] | |
4a2a161f | 13778 | "TARGET_SSE3" |
106eecb3 | 13779 | ;; 64bit version is "monitor %rax,%rcx,%rdx". But only lower 32bits in |
13780 | ;; RCX and RDX are used. Since 32bit register operands are implicitly | |
13781 | ;; zero extended to 64bit, we only need to set up 32bit registers. | |
4a2a161f | 13782 | "%^monitor" |
13783 | [(set (attr "length") | |
13784 | (symbol_ref ("(Pmode != word_mode) + 3")))]) | |
2b4894c5 | 13785 | |
3da2a73c | 13786 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; |
13787 | ;; | |
13788 | ;; SSSE3 instructions | |
13789 | ;; | |
13790 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | |
13791 | ||
fd65bafc | 13792 | (define_code_iterator ssse3_plusminus [plus ss_plus minus ss_minus]) |
2b4894c5 | 13793 | |
fd65bafc | 13794 | (define_insn "avx2_ph<plusminus_mnemonic>wv16hi3" |
5deb404d | 13795 | [(set (match_operand:V16HI 0 "register_operand" "=x") |
13796 | (vec_concat:V16HI | |
13797 | (vec_concat:V8HI | |
13798 | (vec_concat:V4HI | |
13799 | (vec_concat:V2HI | |
fd65bafc | 13800 | (ssse3_plusminus:HI |
5deb404d | 13801 | (vec_select:HI |
13802 | (match_operand:V16HI 1 "register_operand" "x") | |
13803 | (parallel [(const_int 0)])) | |
13804 | (vec_select:HI (match_dup 1) (parallel [(const_int 1)]))) | |
fd65bafc | 13805 | (ssse3_plusminus:HI |
5deb404d | 13806 | (vec_select:HI (match_dup 1) (parallel [(const_int 2)])) |
13807 | (vec_select:HI (match_dup 1) (parallel [(const_int 3)])))) | |
13808 | (vec_concat:V2HI | |
fd65bafc | 13809 | (ssse3_plusminus:HI |
5deb404d | 13810 | (vec_select:HI (match_dup 1) (parallel [(const_int 4)])) |
13811 | (vec_select:HI (match_dup 1) (parallel [(const_int 5)]))) | |
fd65bafc | 13812 | (ssse3_plusminus:HI |
5deb404d | 13813 | (vec_select:HI (match_dup 1) (parallel [(const_int 6)])) |
13814 | (vec_select:HI (match_dup 1) (parallel [(const_int 7)]))))) | |
13815 | (vec_concat:V4HI | |
13816 | (vec_concat:V2HI | |
fd65bafc | 13817 | (ssse3_plusminus:HI |
5deb404d | 13818 | (vec_select:HI (match_dup 1) (parallel [(const_int 8)])) |
13819 | (vec_select:HI (match_dup 1) (parallel [(const_int 9)]))) | |
fd65bafc | 13820 | (ssse3_plusminus:HI |
5deb404d | 13821 | (vec_select:HI (match_dup 1) (parallel [(const_int 10)])) |
13822 | (vec_select:HI (match_dup 1) (parallel [(const_int 11)])))) | |
13823 | (vec_concat:V2HI | |
fd65bafc | 13824 | (ssse3_plusminus:HI |
5deb404d | 13825 | (vec_select:HI (match_dup 1) (parallel [(const_int 12)])) |
13826 | (vec_select:HI (match_dup 1) (parallel [(const_int 13)]))) | |
fd65bafc | 13827 | (ssse3_plusminus:HI |
5deb404d | 13828 | (vec_select:HI (match_dup 1) (parallel [(const_int 14)])) |
13829 | (vec_select:HI (match_dup 1) (parallel [(const_int 15)])))))) | |
13830 | (vec_concat:V8HI | |
13831 | (vec_concat:V4HI | |
13832 | (vec_concat:V2HI | |
fd65bafc | 13833 | (ssse3_plusminus:HI |
5deb404d | 13834 | (vec_select:HI |
13835 | (match_operand:V16HI 2 "nonimmediate_operand" "xm") | |
13836 | (parallel [(const_int 0)])) | |
13837 | (vec_select:HI (match_dup 2) (parallel [(const_int 1)]))) | |
fd65bafc | 13838 | (ssse3_plusminus:HI |
5deb404d | 13839 | (vec_select:HI (match_dup 2) (parallel [(const_int 2)])) |
13840 | (vec_select:HI (match_dup 2) (parallel [(const_int 3)])))) | |
13841 | (vec_concat:V2HI | |
fd65bafc | 13842 | (ssse3_plusminus:HI |
5deb404d | 13843 | (vec_select:HI (match_dup 2) (parallel [(const_int 4)])) |
13844 | (vec_select:HI (match_dup 2) (parallel [(const_int 5)]))) | |
fd65bafc | 13845 | (ssse3_plusminus:HI |
5deb404d | 13846 | (vec_select:HI (match_dup 2) (parallel [(const_int 6)])) |
13847 | (vec_select:HI (match_dup 2) (parallel [(const_int 7)]))))) | |
13848 | (vec_concat:V4HI | |
13849 | (vec_concat:V2HI | |
fd65bafc | 13850 | (ssse3_plusminus:HI |
5deb404d | 13851 | (vec_select:HI (match_dup 2) (parallel [(const_int 8)])) |
13852 | (vec_select:HI (match_dup 2) (parallel [(const_int 9)]))) | |
fd65bafc | 13853 | (ssse3_plusminus:HI |
5deb404d | 13854 | (vec_select:HI (match_dup 2) (parallel [(const_int 10)])) |
13855 | (vec_select:HI (match_dup 2) (parallel [(const_int 11)])))) | |
13856 | (vec_concat:V2HI | |
fd65bafc | 13857 | (ssse3_plusminus:HI |
5deb404d | 13858 | (vec_select:HI (match_dup 2) (parallel [(const_int 12)])) |
13859 | (vec_select:HI (match_dup 2) (parallel [(const_int 13)]))) | |
fd65bafc | 13860 | (ssse3_plusminus:HI |
5deb404d | 13861 | (vec_select:HI (match_dup 2) (parallel [(const_int 14)])) |
13862 | (vec_select:HI (match_dup 2) (parallel [(const_int 15)]))))))))] | |
13863 | "TARGET_AVX2" | |
fd65bafc | 13864 | "vph<plusminus_mnemonic>w\t{%2, %1, %0|%0, %1, %2}" |
5deb404d | 13865 | [(set_attr "type" "sseiadd") |
13866 | (set_attr "prefix_extra" "1") | |
13867 | (set_attr "prefix" "vex") | |
13868 | (set_attr "mode" "OI")]) | |
13869 | ||
fd65bafc | 13870 | (define_insn "ssse3_ph<plusminus_mnemonic>wv8hi3" |
908dc1fc | 13871 | [(set (match_operand:V8HI 0 "register_operand" "=x,x") |
2b4894c5 | 13872 | (vec_concat:V8HI |
13873 | (vec_concat:V4HI | |
13874 | (vec_concat:V2HI | |
fd65bafc | 13875 | (ssse3_plusminus:HI |
2b4894c5 | 13876 | (vec_select:HI |
908dc1fc | 13877 | (match_operand:V8HI 1 "register_operand" "0,x") |
2b4894c5 | 13878 | (parallel [(const_int 0)])) |
13879 | (vec_select:HI (match_dup 1) (parallel [(const_int 1)]))) | |
fd65bafc | 13880 | (ssse3_plusminus:HI |
2b4894c5 | 13881 | (vec_select:HI (match_dup 1) (parallel [(const_int 2)])) |
13882 | (vec_select:HI (match_dup 1) (parallel [(const_int 3)])))) | |
13883 | (vec_concat:V2HI | |
fd65bafc | 13884 | (ssse3_plusminus:HI |
2b4894c5 | 13885 | (vec_select:HI (match_dup 1) (parallel [(const_int 4)])) |
13886 | (vec_select:HI (match_dup 1) (parallel [(const_int 5)]))) | |
fd65bafc | 13887 | (ssse3_plusminus:HI |
2b4894c5 | 13888 | (vec_select:HI (match_dup 1) (parallel [(const_int 6)])) |
13889 | (vec_select:HI (match_dup 1) (parallel [(const_int 7)]))))) | |
13890 | (vec_concat:V4HI | |
13891 | (vec_concat:V2HI | |
fd65bafc | 13892 | (ssse3_plusminus:HI |
2b4894c5 | 13893 | (vec_select:HI |
908dc1fc | 13894 | (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm") |
2b4894c5 | 13895 | (parallel [(const_int 0)])) |
13896 | (vec_select:HI (match_dup 2) (parallel [(const_int 1)]))) | |
fd65bafc | 13897 | (ssse3_plusminus:HI |
2b4894c5 | 13898 | (vec_select:HI (match_dup 2) (parallel [(const_int 2)])) |
13899 | (vec_select:HI (match_dup 2) (parallel [(const_int 3)])))) | |
13900 | (vec_concat:V2HI | |
fd65bafc | 13901 | (ssse3_plusminus:HI |
2b4894c5 | 13902 | (vec_select:HI (match_dup 2) (parallel [(const_int 4)])) |
13903 | (vec_select:HI (match_dup 2) (parallel [(const_int 5)]))) | |
fd65bafc | 13904 | (ssse3_plusminus:HI |
2b4894c5 | 13905 | (vec_select:HI (match_dup 2) (parallel [(const_int 6)])) |
13906 | (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))] | |
13907 | "TARGET_SSSE3" | |
908dc1fc | 13908 | "@ |
fd65bafc | 13909 | ph<plusminus_mnemonic>w\t{%2, %0|%0, %2} |
13910 | vph<plusminus_mnemonic>w\t{%2, %1, %0|%0, %1, %2}" | |
908dc1fc | 13911 | [(set_attr "isa" "noavx,avx") |
13912 | (set_attr "type" "sseiadd") | |
fbfe006e | 13913 | (set_attr "atom_unit" "complex") |
908dc1fc | 13914 | (set_attr "prefix_data16" "1,*") |
1f346cbc | 13915 | (set_attr "prefix_extra" "1") |
908dc1fc | 13916 | (set_attr "prefix" "orig,vex") |
2b4894c5 | 13917 | (set_attr "mode" "TI")]) |
13918 | ||
fd65bafc | 13919 | (define_insn "ssse3_ph<plusminus_mnemonic>wv4hi3" |
2b4894c5 | 13920 | [(set (match_operand:V4HI 0 "register_operand" "=y") |
13921 | (vec_concat:V4HI | |
13922 | (vec_concat:V2HI | |
fd65bafc | 13923 | (ssse3_plusminus:HI |
2b4894c5 | 13924 | (vec_select:HI |
13925 | (match_operand:V4HI 1 "register_operand" "0") | |
13926 | (parallel [(const_int 0)])) | |
13927 | (vec_select:HI (match_dup 1) (parallel [(const_int 1)]))) | |
fd65bafc | 13928 | (ssse3_plusminus:HI |
2b4894c5 | 13929 | (vec_select:HI (match_dup 1) (parallel [(const_int 2)])) |
13930 | (vec_select:HI (match_dup 1) (parallel [(const_int 3)])))) | |
13931 | (vec_concat:V2HI | |
fd65bafc | 13932 | (ssse3_plusminus:HI |
2b4894c5 | 13933 | (vec_select:HI |
13934 | (match_operand:V4HI 2 "nonimmediate_operand" "ym") | |
13935 | (parallel [(const_int 0)])) | |
13936 | (vec_select:HI (match_dup 2) (parallel [(const_int 1)]))) | |
fd65bafc | 13937 | (ssse3_plusminus:HI |
2b4894c5 | 13938 | (vec_select:HI (match_dup 2) (parallel [(const_int 2)])) |
13939 | (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))] | |
13940 | "TARGET_SSSE3" | |
fd65bafc | 13941 | "ph<plusminus_mnemonic>w\t{%2, %0|%0, %2}" |
2b4894c5 | 13942 | [(set_attr "type" "sseiadd") |
fbfe006e | 13943 | (set_attr "atom_unit" "complex") |
1f346cbc | 13944 | (set_attr "prefix_extra" "1") |
00a0e418 | 13945 | (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)")) |
2b4894c5 | 13946 | (set_attr "mode" "DI")]) |
13947 | ||
fd65bafc | 13948 | (define_insn "avx2_ph<plusminus_mnemonic>dv8si3" |
5deb404d | 13949 | [(set (match_operand:V8SI 0 "register_operand" "=x") |
13950 | (vec_concat:V8SI | |
13951 | (vec_concat:V4SI | |
13952 | (vec_concat:V2SI | |
fd65bafc | 13953 | (plusminus:SI |
5deb404d | 13954 | (vec_select:SI |
13955 | (match_operand:V8SI 1 "register_operand" "x") | |
13956 | (parallel [(const_int 0)])) | |
13957 | (vec_select:SI (match_dup 1) (parallel [(const_int 1)]))) | |
fd65bafc | 13958 | (plusminus:SI |
5deb404d | 13959 | (vec_select:SI (match_dup 1) (parallel [(const_int 2)])) |
13960 | (vec_select:SI (match_dup 1) (parallel [(const_int 3)])))) | |
13961 | (vec_concat:V2SI | |
fd65bafc | 13962 | (plusminus:SI |
5deb404d | 13963 | (vec_select:SI (match_dup 1) (parallel [(const_int 4)])) |
13964 | (vec_select:SI (match_dup 1) (parallel [(const_int 5)]))) | |
fd65bafc | 13965 | (plusminus:SI |
5deb404d | 13966 | (vec_select:SI (match_dup 1) (parallel [(const_int 6)])) |
13967 | (vec_select:SI (match_dup 1) (parallel [(const_int 7)]))))) | |
13968 | (vec_concat:V4SI | |
13969 | (vec_concat:V2SI | |
fd65bafc | 13970 | (plusminus:SI |
5deb404d | 13971 | (vec_select:SI |
13972 | (match_operand:V8SI 2 "nonimmediate_operand" "xm") | |
13973 | (parallel [(const_int 0)])) | |
13974 | (vec_select:SI (match_dup 2) (parallel [(const_int 1)]))) | |
fd65bafc | 13975 | (plusminus:SI |
5deb404d | 13976 | (vec_select:SI (match_dup 2) (parallel [(const_int 2)])) |
13977 | (vec_select:SI (match_dup 2) (parallel [(const_int 3)])))) | |
13978 | (vec_concat:V2SI | |
fd65bafc | 13979 | (plusminus:SI |
5deb404d | 13980 | (vec_select:SI (match_dup 2) (parallel [(const_int 4)])) |
13981 | (vec_select:SI (match_dup 2) (parallel [(const_int 5)]))) | |
fd65bafc | 13982 | (plusminus:SI |
5deb404d | 13983 | (vec_select:SI (match_dup 2) (parallel [(const_int 6)])) |
13984 | (vec_select:SI (match_dup 2) (parallel [(const_int 7)])))))))] | |
13985 | "TARGET_AVX2" | |
fd65bafc | 13986 | "vph<plusminus_mnemonic>d\t{%2, %1, %0|%0, %1, %2}" |
5deb404d | 13987 | [(set_attr "type" "sseiadd") |
13988 | (set_attr "prefix_extra" "1") | |
13989 | (set_attr "prefix" "vex") | |
13990 | (set_attr "mode" "OI")]) | |
13991 | ||
fd65bafc | 13992 | (define_insn "ssse3_ph<plusminus_mnemonic>dv4si3" |
908dc1fc | 13993 | [(set (match_operand:V4SI 0 "register_operand" "=x,x") |
2b4894c5 | 13994 | (vec_concat:V4SI |
13995 | (vec_concat:V2SI | |
fd65bafc | 13996 | (plusminus:SI |
2b4894c5 | 13997 | (vec_select:SI |
908dc1fc | 13998 | (match_operand:V4SI 1 "register_operand" "0,x") |
2b4894c5 | 13999 | (parallel [(const_int 0)])) |
14000 | (vec_select:SI (match_dup 1) (parallel [(const_int 1)]))) | |
fd65bafc | 14001 | (plusminus:SI |
2b4894c5 | 14002 | (vec_select:SI (match_dup 1) (parallel [(const_int 2)])) |
14003 | (vec_select:SI (match_dup 1) (parallel [(const_int 3)])))) | |
14004 | (vec_concat:V2SI | |
fd65bafc | 14005 | (plusminus:SI |
2b4894c5 | 14006 | (vec_select:SI |
908dc1fc | 14007 | (match_operand:V4SI 2 "nonimmediate_operand" "xm,xm") |
2b4894c5 | 14008 | (parallel [(const_int 0)])) |
14009 | (vec_select:SI (match_dup 2) (parallel [(const_int 1)]))) | |
fd65bafc | 14010 | (plusminus:SI |
2b4894c5 | 14011 | (vec_select:SI (match_dup 2) (parallel [(const_int 2)])) |
14012 | (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))] | |
14013 | "TARGET_SSSE3" | |
908dc1fc | 14014 | "@ |
fd65bafc | 14015 | ph<plusminus_mnemonic>d\t{%2, %0|%0, %2} |
14016 | vph<plusminus_mnemonic>d\t{%2, %1, %0|%0, %1, %2}" | |
908dc1fc | 14017 | [(set_attr "isa" "noavx,avx") |
14018 | (set_attr "type" "sseiadd") | |
fbfe006e | 14019 | (set_attr "atom_unit" "complex") |
908dc1fc | 14020 | (set_attr "prefix_data16" "1,*") |
1f346cbc | 14021 | (set_attr "prefix_extra" "1") |
908dc1fc | 14022 | (set_attr "prefix" "orig,vex") |
2b4894c5 | 14023 | (set_attr "mode" "TI")]) |
14024 | ||
fd65bafc | 14025 | (define_insn "ssse3_ph<plusminus_mnemonic>dv2si3" |
2b4894c5 | 14026 | [(set (match_operand:V2SI 0 "register_operand" "=y") |
14027 | (vec_concat:V2SI | |
fd65bafc | 14028 | (plusminus:SI |
2b4894c5 | 14029 | (vec_select:SI |
14030 | (match_operand:V2SI 1 "register_operand" "0") | |
14031 | (parallel [(const_int 0)])) | |
14032 | (vec_select:SI (match_dup 1) (parallel [(const_int 1)]))) | |
fd65bafc | 14033 | (plusminus:SI |
2b4894c5 | 14034 | (vec_select:SI |
14035 | (match_operand:V2SI 2 "nonimmediate_operand" "ym") | |
14036 | (parallel [(const_int 0)])) | |
14037 | (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))))] | |
14038 | "TARGET_SSSE3" | |
fd65bafc | 14039 | "ph<plusminus_mnemonic>d\t{%2, %0|%0, %2}" |
2b4894c5 | 14040 | [(set_attr "type" "sseiadd") |
fbfe006e | 14041 | (set_attr "atom_unit" "complex") |
ed30e0a6 | 14042 | (set_attr "prefix_extra" "1") |
00a0e418 | 14043 | (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)")) |
ed30e0a6 | 14044 | (set_attr "mode" "DI")]) |
14045 | ||
5deb404d | 14046 | (define_insn "avx2_pmaddubsw256" |
14047 | [(set (match_operand:V16HI 0 "register_operand" "=x") | |
14048 | (ss_plus:V16HI | |
14049 | (mult:V16HI | |
14050 | (zero_extend:V16HI | |
14051 | (vec_select:V16QI | |
14052 | (match_operand:V32QI 1 "register_operand" "x") | |
04d95c72 | 14053 | (parallel [(const_int 0) (const_int 2) |
14054 | (const_int 4) (const_int 6) | |
14055 | (const_int 8) (const_int 10) | |
14056 | (const_int 12) (const_int 14) | |
14057 | (const_int 16) (const_int 18) | |
14058 | (const_int 20) (const_int 22) | |
14059 | (const_int 24) (const_int 26) | |
14060 | (const_int 28) (const_int 30)]))) | |
5deb404d | 14061 | (sign_extend:V16HI |
14062 | (vec_select:V16QI | |
14063 | (match_operand:V32QI 2 "nonimmediate_operand" "xm") | |
04d95c72 | 14064 | (parallel [(const_int 0) (const_int 2) |
14065 | (const_int 4) (const_int 6) | |
14066 | (const_int 8) (const_int 10) | |
14067 | (const_int 12) (const_int 14) | |
14068 | (const_int 16) (const_int 18) | |
14069 | (const_int 20) (const_int 22) | |
14070 | (const_int 24) (const_int 26) | |
14071 | (const_int 28) (const_int 30)])))) | |
5deb404d | 14072 | (mult:V16HI |
14073 | (zero_extend:V16HI | |
14074 | (vec_select:V16QI (match_dup 1) | |
04d95c72 | 14075 | (parallel [(const_int 1) (const_int 3) |
14076 | (const_int 5) (const_int 7) | |
14077 | (const_int 9) (const_int 11) | |
14078 | (const_int 13) (const_int 15) | |
14079 | (const_int 17) (const_int 19) | |
14080 | (const_int 21) (const_int 23) | |
14081 | (const_int 25) (const_int 27) | |
14082 | (const_int 29) (const_int 31)]))) | |
5deb404d | 14083 | (sign_extend:V16HI |
14084 | (vec_select:V16QI (match_dup 2) | |
04d95c72 | 14085 | (parallel [(const_int 1) (const_int 3) |
14086 | (const_int 5) (const_int 7) | |
14087 | (const_int 9) (const_int 11) | |
14088 | (const_int 13) (const_int 15) | |
14089 | (const_int 17) (const_int 19) | |
14090 | (const_int 21) (const_int 23) | |
14091 | (const_int 25) (const_int 27) | |
14092 | (const_int 29) (const_int 31)]))))))] | |
5deb404d | 14093 | "TARGET_AVX2" |
14094 | "vpmaddubsw\t{%2, %1, %0|%0, %1, %2}" | |
14095 | [(set_attr "type" "sseiadd") | |
14096 | (set_attr "prefix_extra" "1") | |
14097 | (set_attr "prefix" "vex") | |
14098 | (set_attr "mode" "OI")]) | |
14099 | ||
d58134c2 | 14100 | ;; The correct representation for this is absolutely enormous, and |
14101 | ;; surely not generally useful. | |
14102 | (define_insn "avx512bw_pmaddubsw512<mode><mask_name>" | |
14103 | [(set (match_operand:VI2_AVX512VL 0 "register_operand" "=v") | |
14104 | (unspec:VI2_AVX512VL | |
14105 | [(match_operand:<dbpsadbwmode> 1 "register_operand" "v") | |
14106 | (match_operand:<dbpsadbwmode> 2 "nonimmediate_operand" "vm")] | |
14107 | UNSPEC_PMADDUBSW512))] | |
14108 | "TARGET_AVX512BW" | |
14109 | "vpmaddubsw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"; | |
14110 | [(set_attr "type" "sseiadd") | |
14111 | (set_attr "prefix" "evex") | |
14112 | (set_attr "mode" "XI")]) | |
14113 | ||
20144456 | 14114 | (define_insn "avx512bw_umulhrswv32hi3<mask_name>" |
14115 | [(set (match_operand:V32HI 0 "register_operand" "=v") | |
14116 | (truncate:V32HI | |
14117 | (lshiftrt:V32SI | |
14118 | (plus:V32SI | |
14119 | (lshiftrt:V32SI | |
14120 | (mult:V32SI | |
14121 | (sign_extend:V32SI | |
14122 | (match_operand:V32HI 1 "nonimmediate_operand" "%v")) | |
14123 | (sign_extend:V32SI | |
14124 | (match_operand:V32HI 2 "nonimmediate_operand" "vm"))) | |
14125 | (const_int 14)) | |
14126 | (const_vector:V32HI [(const_int 1) (const_int 1) | |
14127 | (const_int 1) (const_int 1) | |
14128 | (const_int 1) (const_int 1) | |
14129 | (const_int 1) (const_int 1) | |
14130 | (const_int 1) (const_int 1) | |
14131 | (const_int 1) (const_int 1) | |
14132 | (const_int 1) (const_int 1) | |
14133 | (const_int 1) (const_int 1) | |
14134 | (const_int 1) (const_int 1) | |
14135 | (const_int 1) (const_int 1) | |
14136 | (const_int 1) (const_int 1) | |
14137 | (const_int 1) (const_int 1) | |
14138 | (const_int 1) (const_int 1) | |
14139 | (const_int 1) (const_int 1) | |
14140 | (const_int 1) (const_int 1) | |
14141 | (const_int 1) (const_int 1)])) | |
14142 | (const_int 1))))] | |
14143 | "TARGET_AVX512BW" | |
14144 | "vpmulhrsw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}" | |
14145 | [(set_attr "type" "sseimul") | |
14146 | (set_attr "prefix" "evex") | |
14147 | (set_attr "mode" "XI")]) | |
14148 | ||
6f50184d | 14149 | (define_insn "ssse3_pmaddubsw128" |
908dc1fc | 14150 | [(set (match_operand:V8HI 0 "register_operand" "=x,x") |
2b4894c5 | 14151 | (ss_plus:V8HI |
14152 | (mult:V8HI | |
14153 | (zero_extend:V8HI | |
1fda60c6 | 14154 | (vec_select:V8QI |
908dc1fc | 14155 | (match_operand:V16QI 1 "register_operand" "0,x") |
04d95c72 | 14156 | (parallel [(const_int 0) (const_int 2) |
14157 | (const_int 4) (const_int 6) | |
14158 | (const_int 8) (const_int 10) | |
14159 | (const_int 12) (const_int 14)]))) | |
2b4894c5 | 14160 | (sign_extend:V8HI |
14161 | (vec_select:V8QI | |
908dc1fc | 14162 | (match_operand:V16QI 2 "nonimmediate_operand" "xm,xm") |
04d95c72 | 14163 | (parallel [(const_int 0) (const_int 2) |
14164 | (const_int 4) (const_int 6) | |
14165 | (const_int 8) (const_int 10) | |
14166 | (const_int 12) (const_int 14)])))) | |
2b4894c5 | 14167 | (mult:V8HI |
14168 | (zero_extend:V8HI | |
1fda60c6 | 14169 | (vec_select:V8QI (match_dup 1) |
04d95c72 | 14170 | (parallel [(const_int 1) (const_int 3) |
14171 | (const_int 5) (const_int 7) | |
14172 | (const_int 9) (const_int 11) | |
14173 | (const_int 13) (const_int 15)]))) | |
2b4894c5 | 14174 | (sign_extend:V8HI |
1fda60c6 | 14175 | (vec_select:V8QI (match_dup 2) |
04d95c72 | 14176 | (parallel [(const_int 1) (const_int 3) |
14177 | (const_int 5) (const_int 7) | |
14178 | (const_int 9) (const_int 11) | |
14179 | (const_int 13) (const_int 15)]))))))] | |
2b4894c5 | 14180 | "TARGET_SSSE3" |
908dc1fc | 14181 | "@ |
14182 | pmaddubsw\t{%2, %0|%0, %2} | |
14183 | vpmaddubsw\t{%2, %1, %0|%0, %1, %2}" | |
14184 | [(set_attr "isa" "noavx,avx") | |
14185 | (set_attr "type" "sseiadd") | |
fbfe006e | 14186 | (set_attr "atom_unit" "simul") |
908dc1fc | 14187 | (set_attr "prefix_data16" "1,*") |
1f346cbc | 14188 | (set_attr "prefix_extra" "1") |
908dc1fc | 14189 | (set_attr "prefix" "orig,vex") |
2b4894c5 | 14190 | (set_attr "mode" "TI")]) |
14191 | ||
6f50184d | 14192 | (define_insn "ssse3_pmaddubsw" |
2b4894c5 | 14193 | [(set (match_operand:V4HI 0 "register_operand" "=y") |
14194 | (ss_plus:V4HI | |
14195 | (mult:V4HI | |
14196 | (zero_extend:V4HI | |
14197 | (vec_select:V4QI | |
7c839b3f | 14198 | (match_operand:V8QI 1 "register_operand" "0") |
04d95c72 | 14199 | (parallel [(const_int 0) (const_int 2) |
14200 | (const_int 4) (const_int 6)]))) | |
2b4894c5 | 14201 | (sign_extend:V4HI |
14202 | (vec_select:V4QI | |
14203 | (match_operand:V8QI 2 "nonimmediate_operand" "ym") | |
04d95c72 | 14204 | (parallel [(const_int 0) (const_int 2) |
14205 | (const_int 4) (const_int 6)])))) | |
2b4894c5 | 14206 | (mult:V4HI |
14207 | (zero_extend:V4HI | |
1fda60c6 | 14208 | (vec_select:V4QI (match_dup 1) |
04d95c72 | 14209 | (parallel [(const_int 1) (const_int 3) |
14210 | (const_int 5) (const_int 7)]))) | |
2b4894c5 | 14211 | (sign_extend:V4HI |
1fda60c6 | 14212 | (vec_select:V4QI (match_dup 2) |
04d95c72 | 14213 | (parallel [(const_int 1) (const_int 3) |
14214 | (const_int 5) (const_int 7)]))))))] | |
2b4894c5 | 14215 | "TARGET_SSSE3" |
14216 | "pmaddubsw\t{%2, %0|%0, %2}" | |
14217 | [(set_attr "type" "sseiadd") | |
fbfe006e | 14218 | (set_attr "atom_unit" "simul") |
1f346cbc | 14219 | (set_attr "prefix_extra" "1") |
5deb404d | 14220 | (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)")) |
14221 | (set_attr "mode" "DI")]) | |
14222 | ||
e4048f11 | 14223 | (define_mode_iterator PMULHRSW |
14224 | [V4HI V8HI (V16HI "TARGET_AVX2")]) | |
14225 | ||
20144456 | 14226 | (define_expand "<ssse3_avx2>_pmulhrsw<mode>3_mask" |
14227 | [(set (match_operand:PMULHRSW 0 "register_operand") | |
14228 | (vec_merge:PMULHRSW | |
14229 | (truncate:PMULHRSW | |
14230 | (lshiftrt:<ssedoublemode> | |
14231 | (plus:<ssedoublemode> | |
14232 | (lshiftrt:<ssedoublemode> | |
14233 | (mult:<ssedoublemode> | |
14234 | (sign_extend:<ssedoublemode> | |
14235 | (match_operand:PMULHRSW 1 "nonimmediate_operand")) | |
14236 | (sign_extend:<ssedoublemode> | |
14237 | (match_operand:PMULHRSW 2 "nonimmediate_operand"))) | |
14238 | (const_int 14)) | |
14239 | (match_dup 5)) | |
14240 | (const_int 1))) | |
14241 | (match_operand:PMULHRSW 3 "register_operand") | |
14242 | (match_operand:<avx512fmaskmode> 4 "register_operand")))] | |
14243 | "TARGET_AVX512BW && TARGET_AVX512VL" | |
14244 | { | |
14245 | operands[5] = CONST1_RTX(<MODE>mode); | |
14246 | ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands); | |
14247 | }) | |
14248 | ||
e4048f11 | 14249 | (define_expand "<ssse3_avx2>_pmulhrsw<mode>3" |
14250 | [(set (match_operand:PMULHRSW 0 "register_operand") | |
14251 | (truncate:PMULHRSW | |
14252 | (lshiftrt:<ssedoublemode> | |
14253 | (plus:<ssedoublemode> | |
14254 | (lshiftrt:<ssedoublemode> | |
14255 | (mult:<ssedoublemode> | |
14256 | (sign_extend:<ssedoublemode> | |
14257 | (match_operand:PMULHRSW 1 "nonimmediate_operand")) | |
14258 | (sign_extend:<ssedoublemode> | |
14259 | (match_operand:PMULHRSW 2 "nonimmediate_operand"))) | |
5deb404d | 14260 | (const_int 14)) |
e4048f11 | 14261 | (match_dup 3)) |
5deb404d | 14262 | (const_int 1))))] |
14263 | "TARGET_AVX2" | |
e4048f11 | 14264 | { |
14265 | operands[3] = CONST1_RTX(<MODE>mode); | |
14266 | ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands); | |
14267 | }) | |
5deb404d | 14268 | |
2d71b728 | 14269 | (define_insn "*<ssse3_avx2>_pmulhrsw<mode>3<mask_name>" |
14270 | [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,v") | |
e8610aac | 14271 | (truncate:VI2_AVX2 |
14272 | (lshiftrt:<ssedoublemode> | |
14273 | (plus:<ssedoublemode> | |
14274 | (lshiftrt:<ssedoublemode> | |
14275 | (mult:<ssedoublemode> | |
14276 | (sign_extend:<ssedoublemode> | |
2d71b728 | 14277 | (match_operand:VI2_AVX2 1 "nonimmediate_operand" "%0,v")) |
e8610aac | 14278 | (sign_extend:<ssedoublemode> |
2d71b728 | 14279 | (match_operand:VI2_AVX2 2 "nonimmediate_operand" "xm,vm"))) |
2b4894c5 | 14280 | (const_int 14)) |
e8610aac | 14281 | (match_operand:VI2_AVX2 3 "const1_operand")) |
2b4894c5 | 14282 | (const_int 1))))] |
2d71b728 | 14283 | "TARGET_SSSE3 && <mask_mode512bit_condition> && <mask_avx512bw_condition> |
14284 | && ix86_binary_operator_ok (MULT, <MODE>mode, operands)" | |
908dc1fc | 14285 | "@ |
14286 | pmulhrsw\t{%2, %0|%0, %2} | |
2d71b728 | 14287 | vpmulhrsw\t{%2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2}" |
908dc1fc | 14288 | [(set_attr "isa" "noavx,avx") |
14289 | (set_attr "type" "sseimul") | |
14290 | (set_attr "prefix_data16" "1,*") | |
1f346cbc | 14291 | (set_attr "prefix_extra" "1") |
2d71b728 | 14292 | (set_attr "prefix" "orig,maybe_evex") |
e8610aac | 14293 | (set_attr "mode" "<sseinsnmode>")]) |
2b4894c5 | 14294 | |
7c839b3f | 14295 | (define_insn "*ssse3_pmulhrswv4hi3" |
2b4894c5 | 14296 | [(set (match_operand:V4HI 0 "register_operand" "=y") |
14297 | (truncate:V4HI | |
14298 | (lshiftrt:V4SI | |
14299 | (plus:V4SI | |
14300 | (lshiftrt:V4SI | |
14301 | (mult:V4SI | |
14302 | (sign_extend:V4SI | |
14303 | (match_operand:V4HI 1 "nonimmediate_operand" "%0")) | |
14304 | (sign_extend:V4SI | |
14305 | (match_operand:V4HI 2 "nonimmediate_operand" "ym"))) | |
14306 | (const_int 14)) | |
e4048f11 | 14307 | (match_operand:V4HI 3 "const1_operand")) |
2b4894c5 | 14308 | (const_int 1))))] |
14309 | "TARGET_SSSE3 && ix86_binary_operator_ok (MULT, V4HImode, operands)" | |
14310 | "pmulhrsw\t{%2, %0|%0, %2}" | |
14311 | [(set_attr "type" "sseimul") | |
1f346cbc | 14312 | (set_attr "prefix_extra" "1") |
00a0e418 | 14313 | (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)")) |
2b4894c5 | 14314 | (set_attr "mode" "DI")]) |
14315 | ||
201f262d | 14316 | (define_insn "<ssse3_avx2>_pshufb<mode>3<mask_name>" |
14317 | [(set (match_operand:VI1_AVX512 0 "register_operand" "=x,v") | |
14318 | (unspec:VI1_AVX512 | |
14319 | [(match_operand:VI1_AVX512 1 "register_operand" "0,v") | |
14320 | (match_operand:VI1_AVX512 2 "nonimmediate_operand" "xm,vm")] | |
fd65bafc | 14321 | UNSPEC_PSHUFB))] |
201f262d | 14322 | "TARGET_SSSE3 && <mask_mode512bit_condition> && <mask_avx512bw_condition>" |
908dc1fc | 14323 | "@ |
14324 | pshufb\t{%2, %0|%0, %2} | |
201f262d | 14325 | vpshufb\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}" |
908dc1fc | 14326 | [(set_attr "isa" "noavx,avx") |
14327 | (set_attr "type" "sselog1") | |
14328 | (set_attr "prefix_data16" "1,*") | |
1f346cbc | 14329 | (set_attr "prefix_extra" "1") |
201f262d | 14330 | (set_attr "prefix" "orig,maybe_evex") |
6470d004 | 14331 | (set_attr "btver2_decode" "vector,vector") |
5deb404d | 14332 | (set_attr "mode" "<sseinsnmode>")]) |
2b4894c5 | 14333 | |
14334 | (define_insn "ssse3_pshufbv8qi3" | |
14335 | [(set (match_operand:V8QI 0 "register_operand" "=y") | |
14336 | (unspec:V8QI [(match_operand:V8QI 1 "register_operand" "0") | |
14337 | (match_operand:V8QI 2 "nonimmediate_operand" "ym")] | |
2a466fea | 14338 | UNSPEC_PSHUFB))] |
2b4894c5 | 14339 | "TARGET_SSSE3" |
14340 | "pshufb\t{%2, %0|%0, %2}"; | |
14341 | [(set_attr "type" "sselog1") | |
1f346cbc | 14342 | (set_attr "prefix_extra" "1") |
00a0e418 | 14343 | (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)")) |
2b4894c5 | 14344 | (set_attr "mode" "DI")]) |
14345 | ||
5deb404d | 14346 | (define_insn "<ssse3_avx2>_psign<mode>3" |
14347 | [(set (match_operand:VI124_AVX2 0 "register_operand" "=x,x") | |
14348 | (unspec:VI124_AVX2 | |
14349 | [(match_operand:VI124_AVX2 1 "register_operand" "0,x") | |
14350 | (match_operand:VI124_AVX2 2 "nonimmediate_operand" "xm,xm")] | |
2a466fea | 14351 | UNSPEC_PSIGN))] |
2b4894c5 | 14352 | "TARGET_SSSE3" |
908dc1fc | 14353 | "@ |
63d5e521 | 14354 | psign<ssemodesuffix>\t{%2, %0|%0, %2} |
14355 | vpsign<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}" | |
908dc1fc | 14356 | [(set_attr "isa" "noavx,avx") |
14357 | (set_attr "type" "sselog1") | |
14358 | (set_attr "prefix_data16" "1,*") | |
1f346cbc | 14359 | (set_attr "prefix_extra" "1") |
908dc1fc | 14360 | (set_attr "prefix" "orig,vex") |
5deb404d | 14361 | (set_attr "mode" "<sseinsnmode>")]) |
2b4894c5 | 14362 | |
14363 | (define_insn "ssse3_psign<mode>3" | |
14364 | [(set (match_operand:MMXMODEI 0 "register_operand" "=y") | |
2a466fea | 14365 | (unspec:MMXMODEI |
14366 | [(match_operand:MMXMODEI 1 "register_operand" "0") | |
14367 | (match_operand:MMXMODEI 2 "nonimmediate_operand" "ym")] | |
14368 | UNSPEC_PSIGN))] | |
2b4894c5 | 14369 | "TARGET_SSSE3" |
14370 | "psign<mmxvecsize>\t{%2, %0|%0, %2}"; | |
14371 | [(set_attr "type" "sselog1") | |
1f346cbc | 14372 | (set_attr "prefix_extra" "1") |
00a0e418 | 14373 | (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)")) |
2b4894c5 | 14374 | (set_attr "mode" "DI")]) |
14375 | ||
d49df830 | 14376 | (define_insn "<ssse3_avx2>_palignr<mode>_mask" |
dfd41e6d | 14377 | [(set (match_operand:VI1_AVX512 0 "register_operand" "=v") |
14378 | (vec_merge:VI1_AVX512 | |
14379 | (unspec:VI1_AVX512 | |
14380 | [(match_operand:VI1_AVX512 1 "register_operand" "v") | |
14381 | (match_operand:VI1_AVX512 2 "nonimmediate_operand" "vm") | |
d49df830 | 14382 | (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n")] |
14383 | UNSPEC_PALIGNR) | |
dfd41e6d | 14384 | (match_operand:VI1_AVX512 4 "vector_move_operand" "0C") |
d49df830 | 14385 | (match_operand:<avx512fmaskmode> 5 "register_operand" "Yk")))] |
14386 | "TARGET_AVX512BW && (<MODE_SIZE> == 64 || TARGET_AVX512VL)" | |
14387 | { | |
14388 | operands[3] = GEN_INT (INTVAL (operands[3]) / 8); | |
14389 | return "vpalignr\t{%3, %2, %1, %0%{%5%}%N4|%0%{%5%}%N4, %1, %2, %3}"; | |
14390 | } | |
14391 | [(set_attr "type" "sseishft") | |
14392 | (set_attr "atom_unit" "sishuf") | |
14393 | (set_attr "prefix_extra" "1") | |
14394 | (set_attr "length_immediate" "1") | |
14395 | (set_attr "prefix" "evex") | |
14396 | (set_attr "mode" "<sseinsnmode>")]) | |
14397 | ||
5deb404d | 14398 | (define_insn "<ssse3_avx2>_palignr<mode>" |
d49df830 | 14399 | [(set (match_operand:SSESCALARMODE 0 "register_operand" "=x,v") |
fd65bafc | 14400 | (unspec:SSESCALARMODE |
d49df830 | 14401 | [(match_operand:SSESCALARMODE 1 "register_operand" "0,v") |
14402 | (match_operand:SSESCALARMODE 2 "nonimmediate_operand" "xm,vm") | |
fd65bafc | 14403 | (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n,n")] |
14404 | UNSPEC_PALIGNR))] | |
2b4894c5 | 14405 | "TARGET_SSSE3" |
14406 | { | |
14407 | operands[3] = GEN_INT (INTVAL (operands[3]) / 8); | |
908dc1fc | 14408 | |
14409 | switch (which_alternative) | |
14410 | { | |
14411 | case 0: | |
14412 | return "palignr\t{%3, %2, %0|%0, %2, %3}"; | |
14413 | case 1: | |
14414 | return "vpalignr\t{%3, %2, %1, %0|%0, %1, %2, %3}"; | |
14415 | default: | |
14416 | gcc_unreachable (); | |
14417 | } | |
2b4894c5 | 14418 | } |
908dc1fc | 14419 | [(set_attr "isa" "noavx,avx") |
14420 | (set_attr "type" "sseishft") | |
fbfe006e | 14421 | (set_attr "atom_unit" "sishuf") |
908dc1fc | 14422 | (set_attr "prefix_data16" "1,*") |
1f346cbc | 14423 | (set_attr "prefix_extra" "1") |
00a0e418 | 14424 | (set_attr "length_immediate" "1") |
908dc1fc | 14425 | (set_attr "prefix" "orig,vex") |
5deb404d | 14426 | (set_attr "mode" "<sseinsnmode>")]) |
2b4894c5 | 14427 | |
14428 | (define_insn "ssse3_palignrdi" | |
14429 | [(set (match_operand:DI 0 "register_operand" "=y") | |
14430 | (unspec:DI [(match_operand:DI 1 "register_operand" "0") | |
14431 | (match_operand:DI 2 "nonimmediate_operand" "ym") | |
14432 | (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n")] | |
2a466fea | 14433 | UNSPEC_PALIGNR))] |
2b4894c5 | 14434 | "TARGET_SSSE3" |
14435 | { | |
14436 | operands[3] = GEN_INT (INTVAL (operands[3]) / 8); | |
14437 | return "palignr\t{%3, %2, %0|%0, %2, %3}"; | |
14438 | } | |
14439 | [(set_attr "type" "sseishft") | |
fbfe006e | 14440 | (set_attr "atom_unit" "sishuf") |
1f346cbc | 14441 | (set_attr "prefix_extra" "1") |
00a0e418 | 14442 | (set_attr "length_immediate" "1") |
14443 | (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)")) | |
2b4894c5 | 14444 | (set_attr "mode" "DI")]) |
14445 | ||
e75eecf6 | 14446 | ;; Mode iterator to handle singularity w/ absence of V2DI and V4DI |
14447 | ;; modes for abs instruction on pre AVX-512 targets. | |
14448 | (define_mode_iterator VI1248_AVX512VL_AVX512BW | |
14449 | [(V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX2") V16QI | |
14450 | (V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX2") V8HI | |
14451 | (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX2") V4SI | |
14452 | (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")]) | |
14453 | ||
12803fe0 | 14454 | (define_insn "*abs<mode>2" |
e75eecf6 | 14455 | [(set (match_operand:VI1248_AVX512VL_AVX512BW 0 "register_operand" "=v") |
14456 | (abs:VI1248_AVX512VL_AVX512BW | |
14457 | (match_operand:VI1248_AVX512VL_AVX512BW 1 "nonimmediate_operand" "vm")))] | |
12803fe0 | 14458 | "TARGET_SSSE3" |
14459 | "%vpabs<ssemodesuffix>\t{%1, %0|%0, %1}" | |
2b4894c5 | 14460 | [(set_attr "type" "sselog1") |
1f346cbc | 14461 | (set_attr "prefix_data16" "1") |
14462 | (set_attr "prefix_extra" "1") | |
ed30e0a6 | 14463 | (set_attr "prefix" "maybe_vex") |
5deb404d | 14464 | (set_attr "mode" "<sseinsnmode>")]) |
2b4894c5 | 14465 | |
12803fe0 | 14466 | (define_insn "abs<mode>2_mask" |
14467 | [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v") | |
14468 | (vec_merge:VI48_AVX512VL | |
14469 | (abs:VI48_AVX512VL | |
14470 | (match_operand:VI48_AVX512VL 1 "nonimmediate_operand" "vm")) | |
14471 | (match_operand:VI48_AVX512VL 2 "vector_move_operand" "0C") | |
14472 | (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk")))] | |
14473 | "TARGET_AVX512F" | |
14474 | "vpabs<ssemodesuffix>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}" | |
14475 | [(set_attr "type" "sselog1") | |
14476 | (set_attr "prefix" "evex") | |
14477 | (set_attr "mode" "<sseinsnmode>")]) | |
14478 | ||
14479 | (define_insn "abs<mode>2_mask" | |
14480 | [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v") | |
14481 | (vec_merge:VI12_AVX512VL | |
14482 | (abs:VI12_AVX512VL | |
14483 | (match_operand:VI12_AVX512VL 1 "nonimmediate_operand" "vm")) | |
14484 | (match_operand:VI12_AVX512VL 2 "vector_move_operand" "0C") | |
14485 | (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk")))] | |
14486 | "TARGET_AVX512BW" | |
14487 | "vpabs<ssemodesuffix>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}" | |
14488 | [(set_attr "type" "sselog1") | |
14489 | (set_attr "prefix" "evex") | |
14490 | (set_attr "mode" "<sseinsnmode>")]) | |
14491 | ||
95e3231d | 14492 | (define_expand "abs<mode>2" |
e75eecf6 | 14493 | [(set (match_operand:VI1248_AVX512VL_AVX512BW 0 "register_operand") |
14494 | (abs:VI1248_AVX512VL_AVX512BW | |
14495 | (match_operand:VI1248_AVX512VL_AVX512BW 1 "nonimmediate_operand")))] | |
95e3231d | 14496 | "TARGET_SSE2" |
14497 | { | |
14498 | if (!TARGET_SSSE3) | |
14499 | { | |
14500 | ix86_expand_sse2_abs (operands[0], operands[1]); | |
14501 | DONE; | |
14502 | } | |
14503 | }) | |
14504 | ||
2b4894c5 | 14505 | (define_insn "abs<mode>2" |
14506 | [(set (match_operand:MMXMODEI 0 "register_operand" "=y") | |
908dc1fc | 14507 | (abs:MMXMODEI |
14508 | (match_operand:MMXMODEI 1 "nonimmediate_operand" "ym")))] | |
2b4894c5 | 14509 | "TARGET_SSSE3" |
14510 | "pabs<mmxvecsize>\t{%1, %0|%0, %1}"; | |
14511 | [(set_attr "type" "sselog1") | |
00a0e418 | 14512 | (set_attr "prefix_rep" "0") |
1f346cbc | 14513 | (set_attr "prefix_extra" "1") |
00a0e418 | 14514 | (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)")) |
2b4894c5 | 14515 | (set_attr "mode" "DI")]) |
3d775f8e | 14516 | |
14517 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | |
14518 | ;; | |
14519 | ;; AMD SSE4A instructions | |
14520 | ;; | |
14521 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | |
14522 | ||
3da2a73c | 14523 | (define_insn "sse4a_movnt<mode>" |
14524 | [(set (match_operand:MODEF 0 "memory_operand" "=m") | |
14525 | (unspec:MODEF | |
14526 | [(match_operand:MODEF 1 "register_operand" "x")] | |
5deb404d | 14527 | UNSPEC_MOVNT))] |
3d775f8e | 14528 | "TARGET_SSE4A" |
63d5e521 | 14529 | "movnt<ssemodesuffix>\t{%1, %0|%0, %1}" |
3d775f8e | 14530 | [(set_attr "type" "ssemov") |
3da2a73c | 14531 | (set_attr "mode" "<MODE>")]) |
3d775f8e | 14532 | |
3da2a73c | 14533 | (define_insn "sse4a_vmmovnt<mode>" |
14534 | [(set (match_operand:<ssescalarmode> 0 "memory_operand" "=m") | |
14535 | (unspec:<ssescalarmode> | |
14536 | [(vec_select:<ssescalarmode> | |
6fe5844b | 14537 | (match_operand:VF_128 1 "register_operand" "x") |
3da2a73c | 14538 | (parallel [(const_int 0)]))] |
14539 | UNSPEC_MOVNT))] | |
3d775f8e | 14540 | "TARGET_SSE4A" |
0061967e | 14541 | "movnt<ssescalarmodesuffix>\t{%1, %0|%0, %1}" |
3d775f8e | 14542 | [(set_attr "type" "ssemov") |
3da2a73c | 14543 | (set_attr "mode" "<ssescalarmode>")]) |
3d775f8e | 14544 | |
14545 | (define_insn "sse4a_extrqi" | |
14546 | [(set (match_operand:V2DI 0 "register_operand" "=x") | |
5deb404d | 14547 | (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0") |
abd4f58b | 14548 | (match_operand 2 "const_0_to_255_operand") |
14549 | (match_operand 3 "const_0_to_255_operand")] | |
5deb404d | 14550 | UNSPEC_EXTRQI))] |
3d775f8e | 14551 | "TARGET_SSE4A" |
14552 | "extrq\t{%3, %2, %0|%0, %2, %3}" | |
14553 | [(set_attr "type" "sse") | |
1f346cbc | 14554 | (set_attr "prefix_data16" "1") |
00a0e418 | 14555 | (set_attr "length_immediate" "2") |
3d775f8e | 14556 | (set_attr "mode" "TI")]) |
14557 | ||
14558 | (define_insn "sse4a_extrq" | |
14559 | [(set (match_operand:V2DI 0 "register_operand" "=x") | |
5deb404d | 14560 | (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0") |
14561 | (match_operand:V16QI 2 "register_operand" "x")] | |
14562 | UNSPEC_EXTRQ))] | |
3d775f8e | 14563 | "TARGET_SSE4A" |
14564 | "extrq\t{%2, %0|%0, %2}" | |
14565 | [(set_attr "type" "sse") | |
1f346cbc | 14566 | (set_attr "prefix_data16" "1") |
3d775f8e | 14567 | (set_attr "mode" "TI")]) |
14568 | ||
14569 | (define_insn "sse4a_insertqi" | |
14570 | [(set (match_operand:V2DI 0 "register_operand" "=x") | |
5deb404d | 14571 | (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0") |
14572 | (match_operand:V2DI 2 "register_operand" "x") | |
abd4f58b | 14573 | (match_operand 3 "const_0_to_255_operand") |
14574 | (match_operand 4 "const_0_to_255_operand")] | |
5deb404d | 14575 | UNSPEC_INSERTQI))] |
3d775f8e | 14576 | "TARGET_SSE4A" |
14577 | "insertq\t{%4, %3, %2, %0|%0, %2, %3, %4}" | |
14578 | [(set_attr "type" "sseins") | |
00a0e418 | 14579 | (set_attr "prefix_data16" "0") |
1f346cbc | 14580 | (set_attr "prefix_rep" "1") |
00a0e418 | 14581 | (set_attr "length_immediate" "2") |
3d775f8e | 14582 | (set_attr "mode" "TI")]) |
14583 | ||
14584 | (define_insn "sse4a_insertq" | |
14585 | [(set (match_operand:V2DI 0 "register_operand" "=x") | |
5deb404d | 14586 | (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0") |
14587 | (match_operand:V2DI 2 "register_operand" "x")] | |
14588 | UNSPEC_INSERTQ))] | |
3d775f8e | 14589 | "TARGET_SSE4A" |
14590 | "insertq\t{%2, %0|%0, %2}" | |
14591 | [(set_attr "type" "sseins") | |
00a0e418 | 14592 | (set_attr "prefix_data16" "0") |
1f346cbc | 14593 | (set_attr "prefix_rep" "1") |
3d775f8e | 14594 | (set_attr "mode" "TI")]) |
2d771892 | 14595 | |
14596 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | |
14597 | ;; | |
14598 | ;; Intel SSE4.1 instructions | |
14599 | ;; | |
14600 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | |
14601 | ||
18b7eecb | 14602 | ;; Mapping of immediate bits for blend instructions |
14603 | (define_mode_attr blendbits | |
14604 | [(V8SF "255") (V4SF "15") (V4DF "15") (V2DF "3")]) | |
14605 | ||
63d5e521 | 14606 | (define_insn "<sse4_1>_blend<ssemodesuffix><avxsizesuffix>" |
0a32b282 | 14607 | [(set (match_operand:VF_128_256 0 "register_operand" "=Yr,*x,x") |
6a3f5f59 | 14608 | (vec_merge:VF_128_256 |
0a32b282 | 14609 | (match_operand:VF_128_256 2 "nonimmediate_operand" "Yrm,*xm,xm") |
14610 | (match_operand:VF_128_256 1 "register_operand" "0,0,x") | |
abd4f58b | 14611 | (match_operand:SI 3 "const_0_to_<blendbits>_operand")))] |
e16e10c8 | 14612 | "TARGET_SSE4_1" |
f6c74054 | 14613 | "@ |
0a32b282 | 14614 | blend<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3} |
f6c74054 | 14615 | blend<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3} |
14616 | vblend<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}" | |
0a32b282 | 14617 | [(set_attr "isa" "noavx,noavx,avx") |
f6c74054 | 14618 | (set_attr "type" "ssemov") |
00a0e418 | 14619 | (set_attr "length_immediate" "1") |
0a32b282 | 14620 | (set_attr "prefix_data16" "1,1,*") |
f6c74054 | 14621 | (set_attr "prefix_extra" "1") |
0a32b282 | 14622 | (set_attr "prefix" "orig,orig,vex") |
3da2a73c | 14623 | (set_attr "mode" "<MODE>")]) |
2d771892 | 14624 | |
63d5e521 | 14625 | (define_insn "<sse4_1>_blendv<ssemodesuffix><avxsizesuffix>" |
0a32b282 | 14626 | [(set (match_operand:VF_128_256 0 "register_operand" "=Yr,*x,x") |
6a3f5f59 | 14627 | (unspec:VF_128_256 |
0a32b282 | 14628 | [(match_operand:VF_128_256 1 "register_operand" "0,0,x") |
14629 | (match_operand:VF_128_256 2 "nonimmediate_operand" "Yrm,*xm,xm") | |
14630 | (match_operand:VF_128_256 3 "register_operand" "Yz,Yz,x")] | |
3da2a73c | 14631 | UNSPEC_BLENDV))] |
2d771892 | 14632 | "TARGET_SSE4_1" |
f6c74054 | 14633 | "@ |
0a32b282 | 14634 | blendv<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3} |
f6c74054 | 14635 | blendv<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3} |
14636 | vblendv<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}" | |
0a32b282 | 14637 | [(set_attr "isa" "noavx,noavx,avx") |
f6c74054 | 14638 | (set_attr "type" "ssemov") |
14639 | (set_attr "length_immediate" "1") | |
0a32b282 | 14640 | (set_attr "prefix_data16" "1,1,*") |
2d771892 | 14641 | (set_attr "prefix_extra" "1") |
0a32b282 | 14642 | (set_attr "prefix" "orig,orig,vex") |
14643 | (set_attr "btver2_decode" "vector,vector,vector") | |
3da2a73c | 14644 | (set_attr "mode" "<MODE>")]) |
2d771892 | 14645 | |
63d5e521 | 14646 | (define_insn "<sse4_1>_dp<ssemodesuffix><avxsizesuffix>" |
0a32b282 | 14647 | [(set (match_operand:VF_128_256 0 "register_operand" "=Yr,*x,x") |
6a3f5f59 | 14648 | (unspec:VF_128_256 |
0a32b282 | 14649 | [(match_operand:VF_128_256 1 "nonimmediate_operand" "%0,0,x") |
14650 | (match_operand:VF_128_256 2 "nonimmediate_operand" "Yrm,*xm,xm") | |
14651 | (match_operand:SI 3 "const_0_to_255_operand" "n,n,n")] | |
3da2a73c | 14652 | UNSPEC_DP))] |
2d771892 | 14653 | "TARGET_SSE4_1" |
f6c74054 | 14654 | "@ |
0a32b282 | 14655 | dp<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3} |
f6c74054 | 14656 | dp<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3} |
14657 | vdp<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}" | |
0a32b282 | 14658 | [(set_attr "isa" "noavx,noavx,avx") |
f6c74054 | 14659 | (set_attr "type" "ssemul") |
00a0e418 | 14660 | (set_attr "length_immediate" "1") |
0a32b282 | 14661 | (set_attr "prefix_data16" "1,1,*") |
f6c74054 | 14662 | (set_attr "prefix_extra" "1") |
0a32b282 | 14663 | (set_attr "prefix" "orig,orig,vex") |
14664 | (set_attr "btver2_decode" "vector,vector,vector") | |
4c9faaa4 | 14665 | (set_attr "znver1_decode" "vector,vector,vector") |
3da2a73c | 14666 | (set_attr "mode" "<MODE>")]) |
2d771892 | 14667 | |
18b7eecb | 14668 | ;; Mode attribute used by `vmovntdqa' pattern |
14669 | (define_mode_attr vi8_sse4_1_avx2_avx512 | |
14670 | [(V2DI "sse4_1") (V4DI "avx2") (V8DI "avx512f")]) | |
14671 | ||
14672 | (define_insn "<vi8_sse4_1_avx2_avx512>_movntdqa" | |
0a32b282 | 14673 | [(set (match_operand:VI8_AVX2_AVX512F 0 "register_operand" "=Yr,*x, v") |
14674 | (unspec:VI8_AVX2_AVX512F [(match_operand:VI8_AVX2_AVX512F 1 "memory_operand" "m, m, m")] | |
2d771892 | 14675 | UNSPEC_MOVNTDQA))] |
14676 | "TARGET_SSE4_1" | |
ed30e0a6 | 14677 | "%vmovntdqa\t{%1, %0|%0, %1}" |
fbfe006e | 14678 | [(set_attr "type" "ssemov") |
0a32b282 | 14679 | (set_attr "prefix_extra" "1,1,*") |
14680 | (set_attr "prefix" "maybe_vex,maybe_vex,evex") | |
5deb404d | 14681 | (set_attr "mode" "<sseinsnmode>")]) |
ed30e0a6 | 14682 | |
5deb404d | 14683 | (define_insn "<sse4_1_avx2>_mpsadbw" |
0a32b282 | 14684 | [(set (match_operand:VI1_AVX2 0 "register_operand" "=Yr,*x,x") |
fd65bafc | 14685 | (unspec:VI1_AVX2 |
0a32b282 | 14686 | [(match_operand:VI1_AVX2 1 "register_operand" "0,0,x") |
14687 | (match_operand:VI1_AVX2 2 "nonimmediate_operand" "Yrm,*xm,xm") | |
14688 | (match_operand:SI 3 "const_0_to_255_operand" "n,n,n")] | |
fd65bafc | 14689 | UNSPEC_MPSADBW))] |
2d771892 | 14690 | "TARGET_SSE4_1" |
f6c74054 | 14691 | "@ |
0a32b282 | 14692 | mpsadbw\t{%3, %2, %0|%0, %2, %3} |
f6c74054 | 14693 | mpsadbw\t{%3, %2, %0|%0, %2, %3} |
14694 | vmpsadbw\t{%3, %2, %1, %0|%0, %1, %2, %3}" | |
0a32b282 | 14695 | [(set_attr "isa" "noavx,noavx,avx") |
f6c74054 | 14696 | (set_attr "type" "sselog1") |
00a0e418 | 14697 | (set_attr "length_immediate" "1") |
00a0e418 | 14698 | (set_attr "prefix_extra" "1") |
0a32b282 | 14699 | (set_attr "prefix" "orig,orig,vex") |
14700 | (set_attr "btver2_decode" "vector,vector,vector") | |
4c9faaa4 | 14701 | (set_attr "znver1_decode" "vector,vector,vector") |
5deb404d | 14702 | (set_attr "mode" "<sseinsnmode>")]) |
14703 | ||
2d71b728 | 14704 | (define_insn "<sse4_1_avx2>_packusdw<mask_name>" |
0a32b282 | 14705 | [(set (match_operand:VI2_AVX2 0 "register_operand" "=Yr,*x,v") |
2d71b728 | 14706 | (vec_concat:VI2_AVX2 |
14707 | (us_truncate:<ssehalfvecmode> | |
0a32b282 | 14708 | (match_operand:<sseunpackmode> 1 "register_operand" "0,0,v")) |
2d71b728 | 14709 | (us_truncate:<ssehalfvecmode> |
0a32b282 | 14710 | (match_operand:<sseunpackmode> 2 "nonimmediate_operand" "Yrm,*xm,vm"))))] |
2d71b728 | 14711 | "TARGET_SSE4_1 && <mask_mode512bit_condition> && <mask_avx512bw_condition>" |
f6c74054 | 14712 | "@ |
0a32b282 | 14713 | packusdw\t{%2, %0|%0, %2} |
f6c74054 | 14714 | packusdw\t{%2, %0|%0, %2} |
2d71b728 | 14715 | vpackusdw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}" |
0a32b282 | 14716 | [(set_attr "isa" "noavx,noavx,avx") |
f6c74054 | 14717 | (set_attr "type" "sselog") |
00a0e418 | 14718 | (set_attr "prefix_extra" "1") |
0a32b282 | 14719 | (set_attr "prefix" "orig,orig,maybe_evex") |
2d71b728 | 14720 | (set_attr "mode" "<sseinsnmode>")]) |
ed30e0a6 | 14721 | |
5deb404d | 14722 | (define_insn "<sse4_1_avx2>_pblendvb" |
0a32b282 | 14723 | [(set (match_operand:VI1_AVX2 0 "register_operand" "=Yr,*x,x") |
5deb404d | 14724 | (unspec:VI1_AVX2 |
0a32b282 | 14725 | [(match_operand:VI1_AVX2 1 "register_operand" "0,0,x") |
14726 | (match_operand:VI1_AVX2 2 "nonimmediate_operand" "Yrm,*xm,xm") | |
14727 | (match_operand:VI1_AVX2 3 "register_operand" "Yz,Yz,x")] | |
f6c74054 | 14728 | UNSPEC_BLENDV))] |
2d771892 | 14729 | "TARGET_SSE4_1" |
f6c74054 | 14730 | "@ |
0a32b282 | 14731 | pblendvb\t{%3, %2, %0|%0, %2, %3} |
f6c74054 | 14732 | pblendvb\t{%3, %2, %0|%0, %2, %3} |
14733 | vpblendvb\t{%3, %2, %1, %0|%0, %1, %2, %3}" | |
0a32b282 | 14734 | [(set_attr "isa" "noavx,noavx,avx") |
f6c74054 | 14735 | (set_attr "type" "ssemov") |
00a0e418 | 14736 | (set_attr "prefix_extra" "1") |
0a32b282 | 14737 | (set_attr "length_immediate" "*,*,1") |
14738 | (set_attr "prefix" "orig,orig,vex") | |
14739 | (set_attr "btver2_decode" "vector,vector,vector") | |
5deb404d | 14740 | (set_attr "mode" "<sseinsnmode>")]) |
ed30e0a6 | 14741 | |
738630ee | 14742 | (define_insn "sse4_1_pblendw" |
0a32b282 | 14743 | [(set (match_operand:V8HI 0 "register_operand" "=Yr,*x,x") |
738630ee | 14744 | (vec_merge:V8HI |
0a32b282 | 14745 | (match_operand:V8HI 2 "nonimmediate_operand" "Yrm,*xm,xm") |
14746 | (match_operand:V8HI 1 "register_operand" "0,0,x") | |
14747 | (match_operand:SI 3 "const_0_to_255_operand" "n,n,n")))] | |
2d771892 | 14748 | "TARGET_SSE4_1" |
f6c74054 | 14749 | "@ |
0a32b282 | 14750 | pblendw\t{%3, %2, %0|%0, %2, %3} |
f6c74054 | 14751 | pblendw\t{%3, %2, %0|%0, %2, %3} |
14752 | vpblendw\t{%3, %2, %1, %0|%0, %1, %2, %3}" | |
0a32b282 | 14753 | [(set_attr "isa" "noavx,noavx,avx") |
f6c74054 | 14754 | (set_attr "type" "ssemov") |
2d771892 | 14755 | (set_attr "prefix_extra" "1") |
00a0e418 | 14756 | (set_attr "length_immediate" "1") |
0a32b282 | 14757 | (set_attr "prefix" "orig,orig,vex") |
738630ee | 14758 | (set_attr "mode" "TI")]) |
14759 | ||
14760 | ;; The builtin uses an 8-bit immediate. Expand that. | |
14761 | (define_expand "avx2_pblendw" | |
abd4f58b | 14762 | [(set (match_operand:V16HI 0 "register_operand") |
738630ee | 14763 | (vec_merge:V16HI |
abd4f58b | 14764 | (match_operand:V16HI 2 "nonimmediate_operand") |
14765 | (match_operand:V16HI 1 "register_operand") | |
14766 | (match_operand:SI 3 "const_0_to_255_operand")))] | |
738630ee | 14767 | "TARGET_AVX2" |
14768 | { | |
14769 | HOST_WIDE_INT val = INTVAL (operands[3]) & 0xff; | |
14770 | operands[3] = GEN_INT (val << 8 | val); | |
14771 | }) | |
14772 | ||
14773 | (define_insn "*avx2_pblendw" | |
14774 | [(set (match_operand:V16HI 0 "register_operand" "=x") | |
14775 | (vec_merge:V16HI | |
14776 | (match_operand:V16HI 2 "nonimmediate_operand" "xm") | |
14777 | (match_operand:V16HI 1 "register_operand" "x") | |
14778 | (match_operand:SI 3 "avx2_pblendw_operand" "n")))] | |
c450bad4 | 14779 | "TARGET_AVX2" |
738630ee | 14780 | { |
14781 | operands[3] = GEN_INT (INTVAL (operands[3]) & 0xff); | |
14782 | return "vpblendw\t{%3, %2, %1, %0|%0, %1, %2, %3}"; | |
14783 | } | |
14784 | [(set_attr "type" "ssemov") | |
14785 | (set_attr "prefix_extra" "1") | |
14786 | (set_attr "length_immediate" "1") | |
14787 | (set_attr "prefix" "vex") | |
14788 | (set_attr "mode" "OI")]) | |
5deb404d | 14789 | |
14790 | (define_insn "avx2_pblendd<mode>" | |
14791 | [(set (match_operand:VI4_AVX2 0 "register_operand" "=x") | |
14792 | (vec_merge:VI4_AVX2 | |
14793 | (match_operand:VI4_AVX2 2 "nonimmediate_operand" "xm") | |
14794 | (match_operand:VI4_AVX2 1 "register_operand" "x") | |
14795 | (match_operand:SI 3 "const_0_to_255_operand" "n")))] | |
14796 | "TARGET_AVX2" | |
14797 | "vpblendd\t{%3, %2, %1, %0|%0, %1, %2, %3}" | |
14798 | [(set_attr "type" "ssemov") | |
14799 | (set_attr "prefix_extra" "1") | |
14800 | (set_attr "length_immediate" "1") | |
14801 | (set_attr "prefix" "vex") | |
14802 | (set_attr "mode" "<sseinsnmode>")]) | |
2d771892 | 14803 | |
14804 | (define_insn "sse4_1_phminposuw" | |
0a32b282 | 14805 | [(set (match_operand:V8HI 0 "register_operand" "=Yr,*x") |
14806 | (unspec:V8HI [(match_operand:V8HI 1 "nonimmediate_operand" "Yrm,*xm")] | |
2d771892 | 14807 | UNSPEC_PHMINPOSUW))] |
14808 | "TARGET_SSE4_1" | |
ed30e0a6 | 14809 | "%vphminposuw\t{%1, %0|%0, %1}" |
2d771892 | 14810 | [(set_attr "type" "sselog1") |
14811 | (set_attr "prefix_extra" "1") | |
ed30e0a6 | 14812 | (set_attr "prefix" "maybe_vex") |
2d771892 | 14813 | (set_attr "mode" "TI")]) |
14814 | ||
ffd21b9d | 14815 | (define_insn "avx2_<code>v16qiv16hi2<mask_name>" |
14816 | [(set (match_operand:V16HI 0 "register_operand" "=v") | |
5deb404d | 14817 | (any_extend:V16HI |
ffd21b9d | 14818 | (match_operand:V16QI 1 "nonimmediate_operand" "vm")))] |
14819 | "TARGET_AVX2 && <mask_avx512bw_condition> && <mask_avx512vl_condition>" | |
14820 | "vpmov<extsuffix>bw\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}" | |
5deb404d | 14821 | [(set_attr "type" "ssemov") |
14822 | (set_attr "prefix_extra" "1") | |
ffd21b9d | 14823 | (set_attr "prefix" "maybe_evex") |
5deb404d | 14824 | (set_attr "mode" "OI")]) |
14825 | ||
ffd21b9d | 14826 | (define_insn "avx512bw_<code>v32qiv32hi2<mask_name>" |
14827 | [(set (match_operand:V32HI 0 "register_operand" "=v") | |
14828 | (any_extend:V32HI | |
14829 | (match_operand:V32QI 1 "nonimmediate_operand" "vm")))] | |
14830 | "TARGET_AVX512BW" | |
14831 | "vpmov<extsuffix>bw\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}" | |
14832 | [(set_attr "type" "ssemov") | |
14833 | (set_attr "prefix_extra" "1") | |
14834 | (set_attr "prefix" "evex") | |
14835 | (set_attr "mode" "XI")]) | |
14836 | ||
14837 | (define_insn "sse4_1_<code>v8qiv8hi2<mask_name>" | |
0a32b282 | 14838 | [(set (match_operand:V8HI 0 "register_operand" "=Yr,*v") |
c868bf35 | 14839 | (any_extend:V8HI |
2d771892 | 14840 | (vec_select:V8QI |
0a32b282 | 14841 | (match_operand:V16QI 1 "nonimmediate_operand" "Yrm,*vm") |
04d95c72 | 14842 | (parallel [(const_int 0) (const_int 1) |
14843 | (const_int 2) (const_int 3) | |
14844 | (const_int 4) (const_int 5) | |
14845 | (const_int 6) (const_int 7)]))))] | |
ffd21b9d | 14846 | "TARGET_SSE4_1 && <mask_avx512bw_condition> && <mask_avx512vl_condition>" |
14847 | "%vpmov<extsuffix>bw\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}" | |
2d771892 | 14848 | [(set_attr "type" "ssemov") |
8c1dfa94 | 14849 | (set_attr "ssememalign" "64") |
2d771892 | 14850 | (set_attr "prefix_extra" "1") |
ed30e0a6 | 14851 | (set_attr "prefix" "maybe_vex") |
2d771892 | 14852 | (set_attr "mode" "TI")]) |
14853 | ||
5220cab6 | 14854 | (define_insn "<mask_codefor>avx512f_<code>v16qiv16si2<mask_name>" |
697a43f8 | 14855 | [(set (match_operand:V16SI 0 "register_operand" "=v") |
14856 | (any_extend:V16SI | |
14857 | (match_operand:V16QI 1 "nonimmediate_operand" "vm")))] | |
14858 | "TARGET_AVX512F" | |
5220cab6 | 14859 | "vpmov<extsuffix>bd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}" |
697a43f8 | 14860 | [(set_attr "type" "ssemov") |
14861 | (set_attr "prefix" "evex") | |
14862 | (set_attr "mode" "XI")]) | |
14863 | ||
ffd21b9d | 14864 | (define_insn "avx2_<code>v8qiv8si2<mask_name>" |
14865 | [(set (match_operand:V8SI 0 "register_operand" "=v") | |
5deb404d | 14866 | (any_extend:V8SI |
14867 | (vec_select:V8QI | |
ffd21b9d | 14868 | (match_operand:V16QI 1 "nonimmediate_operand" "vm") |
04d95c72 | 14869 | (parallel [(const_int 0) (const_int 1) |
14870 | (const_int 2) (const_int 3) | |
14871 | (const_int 4) (const_int 5) | |
14872 | (const_int 6) (const_int 7)]))))] | |
ffd21b9d | 14873 | "TARGET_AVX2 && <mask_avx512vl_condition>" |
14874 | "vpmov<extsuffix>bd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}" | |
5deb404d | 14875 | [(set_attr "type" "ssemov") |
14876 | (set_attr "prefix_extra" "1") | |
ffd21b9d | 14877 | (set_attr "prefix" "maybe_evex") |
5deb404d | 14878 | (set_attr "mode" "OI")]) |
14879 | ||
ffd21b9d | 14880 | (define_insn "sse4_1_<code>v4qiv4si2<mask_name>" |
0a32b282 | 14881 | [(set (match_operand:V4SI 0 "register_operand" "=Yr,*v") |
c868bf35 | 14882 | (any_extend:V4SI |
2d771892 | 14883 | (vec_select:V4QI |
0a32b282 | 14884 | (match_operand:V16QI 1 "nonimmediate_operand" "Yrm,*vm") |
04d95c72 | 14885 | (parallel [(const_int 0) (const_int 1) |
14886 | (const_int 2) (const_int 3)]))))] | |
ffd21b9d | 14887 | "TARGET_SSE4_1 && <mask_avx512vl_condition>" |
14888 | "%vpmov<extsuffix>bd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %k1}" | |
2d771892 | 14889 | [(set_attr "type" "ssemov") |
8c1dfa94 | 14890 | (set_attr "ssememalign" "32") |
2d771892 | 14891 | (set_attr "prefix_extra" "1") |
ed30e0a6 | 14892 | (set_attr "prefix" "maybe_vex") |
2d771892 | 14893 | (set_attr "mode" "TI")]) |
14894 | ||
5220cab6 | 14895 | (define_insn "avx512f_<code>v16hiv16si2<mask_name>" |
697a43f8 | 14896 | [(set (match_operand:V16SI 0 "register_operand" "=v") |
14897 | (any_extend:V16SI | |
14898 | (match_operand:V16HI 1 "nonimmediate_operand" "vm")))] | |
14899 | "TARGET_AVX512F" | |
5220cab6 | 14900 | "vpmov<extsuffix>wd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}" |
697a43f8 | 14901 | [(set_attr "type" "ssemov") |
14902 | (set_attr "prefix" "evex") | |
14903 | (set_attr "mode" "XI")]) | |
14904 | ||
ffd21b9d | 14905 | (define_insn "avx2_<code>v8hiv8si2<mask_name>" |
14906 | [(set (match_operand:V8SI 0 "register_operand" "=v") | |
5deb404d | 14907 | (any_extend:V8SI |
ffd21b9d | 14908 | (match_operand:V8HI 1 "nonimmediate_operand" "vm")))] |
14909 | "TARGET_AVX2 && <mask_avx512vl_condition>" | |
14910 | "vpmov<extsuffix>wd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}" | |
5deb404d | 14911 | [(set_attr "type" "ssemov") |
14912 | (set_attr "prefix_extra" "1") | |
ffd21b9d | 14913 | (set_attr "prefix" "maybe_evex") |
5deb404d | 14914 | (set_attr "mode" "OI")]) |
14915 | ||
ffd21b9d | 14916 | (define_insn "sse4_1_<code>v4hiv4si2<mask_name>" |
0a32b282 | 14917 | [(set (match_operand:V4SI 0 "register_operand" "=Yr,*v") |
c868bf35 | 14918 | (any_extend:V4SI |
2d771892 | 14919 | (vec_select:V4HI |
0a32b282 | 14920 | (match_operand:V8HI 1 "nonimmediate_operand" "Yrm,*vm") |
04d95c72 | 14921 | (parallel [(const_int 0) (const_int 1) |
14922 | (const_int 2) (const_int 3)]))))] | |
ffd21b9d | 14923 | "TARGET_SSE4_1 && <mask_avx512vl_condition>" |
14924 | "%vpmov<extsuffix>wd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}" | |
2d771892 | 14925 | [(set_attr "type" "ssemov") |
8c1dfa94 | 14926 | (set_attr "ssememalign" "64") |
2d771892 | 14927 | (set_attr "prefix_extra" "1") |
ed30e0a6 | 14928 | (set_attr "prefix" "maybe_vex") |
2d771892 | 14929 | (set_attr "mode" "TI")]) |
14930 | ||
5220cab6 | 14931 | (define_insn "avx512f_<code>v8qiv8di2<mask_name>" |
697a43f8 | 14932 | [(set (match_operand:V8DI 0 "register_operand" "=v") |
14933 | (any_extend:V8DI | |
14934 | (vec_select:V8QI | |
14935 | (match_operand:V16QI 1 "nonimmediate_operand" "vm") | |
14936 | (parallel [(const_int 0) (const_int 1) | |
14937 | (const_int 2) (const_int 3) | |
14938 | (const_int 4) (const_int 5) | |
14939 | (const_int 6) (const_int 7)]))))] | |
14940 | "TARGET_AVX512F" | |
5220cab6 | 14941 | "vpmov<extsuffix>bq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %k1}" |
697a43f8 | 14942 | [(set_attr "type" "ssemov") |
14943 | (set_attr "prefix" "evex") | |
14944 | (set_attr "mode" "XI")]) | |
14945 | ||
ffd21b9d | 14946 | (define_insn "avx2_<code>v4qiv4di2<mask_name>" |
14947 | [(set (match_operand:V4DI 0 "register_operand" "=v") | |
5deb404d | 14948 | (any_extend:V4DI |
14949 | (vec_select:V4QI | |
ffd21b9d | 14950 | (match_operand:V16QI 1 "nonimmediate_operand" "vm") |
04d95c72 | 14951 | (parallel [(const_int 0) (const_int 1) |
14952 | (const_int 2) (const_int 3)]))))] | |
ffd21b9d | 14953 | "TARGET_AVX2 && <mask_avx512vl_condition>" |
14954 | "vpmov<extsuffix>bq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %k1}" | |
5deb404d | 14955 | [(set_attr "type" "ssemov") |
14956 | (set_attr "prefix_extra" "1") | |
ffd21b9d | 14957 | (set_attr "prefix" "maybe_evex") |
5deb404d | 14958 | (set_attr "mode" "OI")]) |
14959 | ||
ffd21b9d | 14960 | (define_insn "sse4_1_<code>v2qiv2di2<mask_name>" |
0a32b282 | 14961 | [(set (match_operand:V2DI 0 "register_operand" "=Yr,*v") |
c868bf35 | 14962 | (any_extend:V2DI |
2d771892 | 14963 | (vec_select:V2QI |
0a32b282 | 14964 | (match_operand:V16QI 1 "nonimmediate_operand" "Yrm,*vm") |
04d95c72 | 14965 | (parallel [(const_int 0) (const_int 1)]))))] |
ffd21b9d | 14966 | "TARGET_SSE4_1 && <mask_avx512vl_condition>" |
14967 | "%vpmov<extsuffix>bq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %w1}" | |
2d771892 | 14968 | [(set_attr "type" "ssemov") |
8c1dfa94 | 14969 | (set_attr "ssememalign" "16") |
2d771892 | 14970 | (set_attr "prefix_extra" "1") |
ed30e0a6 | 14971 | (set_attr "prefix" "maybe_vex") |
2d771892 | 14972 | (set_attr "mode" "TI")]) |
14973 | ||
5220cab6 | 14974 | (define_insn "avx512f_<code>v8hiv8di2<mask_name>" |
697a43f8 | 14975 | [(set (match_operand:V8DI 0 "register_operand" "=v") |
14976 | (any_extend:V8DI | |
14977 | (match_operand:V8HI 1 "nonimmediate_operand" "vm")))] | |
14978 | "TARGET_AVX512F" | |
5220cab6 | 14979 | "vpmov<extsuffix>wq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}" |
697a43f8 | 14980 | [(set_attr "type" "ssemov") |
14981 | (set_attr "prefix" "evex") | |
14982 | (set_attr "mode" "XI")]) | |
14983 | ||
ffd21b9d | 14984 | (define_insn "avx2_<code>v4hiv4di2<mask_name>" |
14985 | [(set (match_operand:V4DI 0 "register_operand" "=v") | |
5deb404d | 14986 | (any_extend:V4DI |
14987 | (vec_select:V4HI | |
ffd21b9d | 14988 | (match_operand:V8HI 1 "nonimmediate_operand" "vm") |
04d95c72 | 14989 | (parallel [(const_int 0) (const_int 1) |
14990 | (const_int 2) (const_int 3)]))))] | |
ffd21b9d | 14991 | "TARGET_AVX2 && <mask_avx512vl_condition>" |
14992 | "vpmov<extsuffix>wq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}" | |
5deb404d | 14993 | [(set_attr "type" "ssemov") |
14994 | (set_attr "prefix_extra" "1") | |
ffd21b9d | 14995 | (set_attr "prefix" "maybe_evex") |
5deb404d | 14996 | (set_attr "mode" "OI")]) |
14997 | ||
ffd21b9d | 14998 | (define_insn "sse4_1_<code>v2hiv2di2<mask_name>" |
0a32b282 | 14999 | [(set (match_operand:V2DI 0 "register_operand" "=Yr,*v") |
c868bf35 | 15000 | (any_extend:V2DI |
2d771892 | 15001 | (vec_select:V2HI |
0a32b282 | 15002 | (match_operand:V8HI 1 "nonimmediate_operand" "Yrm,*vm") |
04d95c72 | 15003 | (parallel [(const_int 0) (const_int 1)]))))] |
ffd21b9d | 15004 | "TARGET_SSE4_1 && <mask_avx512vl_condition>" |
15005 | "%vpmov<extsuffix>wq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %k1}" | |
2d771892 | 15006 | [(set_attr "type" "ssemov") |
8c1dfa94 | 15007 | (set_attr "ssememalign" "32") |
2d771892 | 15008 | (set_attr "prefix_extra" "1") |
ed30e0a6 | 15009 | (set_attr "prefix" "maybe_vex") |
2d771892 | 15010 | (set_attr "mode" "TI")]) |
15011 | ||
5220cab6 | 15012 | (define_insn "avx512f_<code>v8siv8di2<mask_name>" |
697a43f8 | 15013 | [(set (match_operand:V8DI 0 "register_operand" "=v") |
15014 | (any_extend:V8DI | |
15015 | (match_operand:V8SI 1 "nonimmediate_operand" "vm")))] | |
15016 | "TARGET_AVX512F" | |
5220cab6 | 15017 | "vpmov<extsuffix>dq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}" |
697a43f8 | 15018 | [(set_attr "type" "ssemov") |
15019 | (set_attr "prefix" "evex") | |
15020 | (set_attr "mode" "XI")]) | |
15021 | ||
ffd21b9d | 15022 | (define_insn "avx2_<code>v4siv4di2<mask_name>" |
15023 | [(set (match_operand:V4DI 0 "register_operand" "=v") | |
5deb404d | 15024 | (any_extend:V4DI |
ffd21b9d | 15025 | (match_operand:V4SI 1 "nonimmediate_operand" "vm")))] |
15026 | "TARGET_AVX2 && <mask_avx512vl_condition>" | |
15027 | "vpmov<extsuffix>dq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}" | |
5deb404d | 15028 | [(set_attr "type" "ssemov") |
ffd21b9d | 15029 | (set_attr "prefix" "maybe_evex") |
5deb404d | 15030 | (set_attr "prefix_extra" "1") |
15031 | (set_attr "mode" "OI")]) | |
15032 | ||
ffd21b9d | 15033 | (define_insn "sse4_1_<code>v2siv2di2<mask_name>" |
0a32b282 | 15034 | [(set (match_operand:V2DI 0 "register_operand" "=Yr,*v") |
c868bf35 | 15035 | (any_extend:V2DI |
2d771892 | 15036 | (vec_select:V2SI |
0a32b282 | 15037 | (match_operand:V4SI 1 "nonimmediate_operand" "Yrm,*vm") |
04d95c72 | 15038 | (parallel [(const_int 0) (const_int 1)]))))] |
ffd21b9d | 15039 | "TARGET_SSE4_1 && <mask_avx512vl_condition>" |
15040 | "%vpmov<extsuffix>dq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}" | |
2d771892 | 15041 | [(set_attr "type" "ssemov") |
8c1dfa94 | 15042 | (set_attr "ssememalign" "64") |
2d771892 | 15043 | (set_attr "prefix_extra" "1") |
ed30e0a6 | 15044 | (set_attr "prefix" "maybe_vex") |
2d771892 | 15045 | (set_attr "mode" "TI")]) |
15046 | ||
ed30e0a6 | 15047 | ;; ptestps/ptestpd are very similar to comiss and ucomiss when |
15048 | ;; setting FLAGS_REG. But it is not a really compare instruction. | |
63d5e521 | 15049 | (define_insn "avx_vtest<ssemodesuffix><avxsizesuffix>" |
ed30e0a6 | 15050 | [(set (reg:CC FLAGS_REG) |
6a3f5f59 | 15051 | (unspec:CC [(match_operand:VF_128_256 0 "register_operand" "x") |
15052 | (match_operand:VF_128_256 1 "nonimmediate_operand" "xm")] | |
ed30e0a6 | 15053 | UNSPEC_VTESTP))] |
15054 | "TARGET_AVX" | |
0061967e | 15055 | "vtest<ssemodesuffix>\t{%1, %0|%0, %1}" |
ed30e0a6 | 15056 | [(set_attr "type" "ssecomi") |
00a0e418 | 15057 | (set_attr "prefix_extra" "1") |
ed30e0a6 | 15058 | (set_attr "prefix" "vex") |
15059 | (set_attr "mode" "<MODE>")]) | |
15060 | ||
2d771892 | 15061 | ;; ptest is very similar to comiss and ucomiss when setting FLAGS_REG. |
15062 | ;; But it is not a really compare instruction. | |
37407f90 | 15063 | (define_insn "<sse4_1>_ptest<mode>" |
2d771892 | 15064 | [(set (reg:CC FLAGS_REG) |
37407f90 | 15065 | (unspec:CC [(match_operand:V_AVX 0 "register_operand" "Yr, *x, x") |
15066 | (match_operand:V_AVX 1 "nonimmediate_operand" "Yrm, *xm, xm")] | |
2d771892 | 15067 | UNSPEC_PTEST))] |
15068 | "TARGET_SSE4_1" | |
ed30e0a6 | 15069 | "%vptest\t{%1, %0|%0, %1}" |
37407f90 | 15070 | [(set_attr "isa" "*,*,avx") |
15071 | (set_attr "type" "ssecomi") | |
2d771892 | 15072 | (set_attr "prefix_extra" "1") |
ed30e0a6 | 15073 | (set_attr "prefix" "maybe_vex") |
37407f90 | 15074 | (set (attr "btver2_decode") |
15075 | (if_then_else | |
15076 | (match_test "<sseinsnmode>mode==OImode") | |
15077 | (const_string "vector") | |
15078 | (const_string "*"))) | |
15079 | (set_attr "mode" "<sseinsnmode>")]) | |
2d771892 | 15080 | |
63d5e521 | 15081 | (define_insn "<sse4_1>_round<ssemodesuffix><avxsizesuffix>" |
0a32b282 | 15082 | [(set (match_operand:VF_128_256 0 "register_operand" "=Yr,*x") |
6a3f5f59 | 15083 | (unspec:VF_128_256 |
0a32b282 | 15084 | [(match_operand:VF_128_256 1 "nonimmediate_operand" "Yrm,*xm") |
15085 | (match_operand:SI 2 "const_0_to_15_operand" "n,n")] | |
3da2a73c | 15086 | UNSPEC_ROUND))] |
448e99f5 | 15087 | "TARGET_ROUND" |
0061967e | 15088 | "%vround<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}" |
2d771892 | 15089 | [(set_attr "type" "ssecvt") |
f6c74054 | 15090 | (set (attr "prefix_data16") |
15091 | (if_then_else | |
6be3efec | 15092 | (match_test "TARGET_AVX") |
f6c74054 | 15093 | (const_string "*") |
15094 | (const_string "1"))) | |
2d771892 | 15095 | (set_attr "prefix_extra" "1") |
00a0e418 | 15096 | (set_attr "length_immediate" "1") |
ed30e0a6 | 15097 | (set_attr "prefix" "maybe_vex") |
15098 | (set_attr "mode" "<MODE>")]) | |
15099 | ||
c34303ca | 15100 | (define_expand "<sse4_1>_round<ssemodesuffix>_sfix<avxsizesuffix>" |
abd4f58b | 15101 | [(match_operand:<sseintvecmode> 0 "register_operand") |
03ae25dc | 15102 | (match_operand:VF1_128_256 1 "nonimmediate_operand") |
abd4f58b | 15103 | (match_operand:SI 2 "const_0_to_15_operand")] |
c34303ca | 15104 | "TARGET_ROUND" |
15105 | { | |
15106 | rtx tmp = gen_reg_rtx (<MODE>mode); | |
15107 | ||
15108 | emit_insn | |
15109 | (gen_<sse4_1>_round<ssemodesuffix><avxsizesuffix> (tmp, operands[1], | |
15110 | operands[2])); | |
15111 | emit_insn | |
15112 | (gen_fix_trunc<mode><sseintvecmodelower>2 (operands[0], tmp)); | |
15113 | DONE; | |
15114 | }) | |
15115 | ||
6615b722 | 15116 | (define_expand "avx512f_roundpd512" |
15117 | [(match_operand:V8DF 0 "register_operand") | |
15118 | (match_operand:V8DF 1 "nonimmediate_operand") | |
15119 | (match_operand:SI 2 "const_0_to_15_operand")] | |
15120 | "TARGET_AVX512F" | |
15121 | { | |
15122 | emit_insn (gen_avx512f_rndscalev8df (operands[0], operands[1], operands[2])); | |
15123 | DONE; | |
15124 | }) | |
15125 | ||
c34303ca | 15126 | (define_expand "<sse4_1>_round<ssemodesuffix>_vec_pack_sfix<avxsizesuffix>" |
abd4f58b | 15127 | [(match_operand:<ssepackfltmode> 0 "register_operand") |
15128 | (match_operand:VF2 1 "nonimmediate_operand") | |
15129 | (match_operand:VF2 2 "nonimmediate_operand") | |
15130 | (match_operand:SI 3 "const_0_to_15_operand")] | |
c34303ca | 15131 | "TARGET_ROUND" |
15132 | { | |
15133 | rtx tmp0, tmp1; | |
15134 | ||
4030506f | 15135 | if (<MODE>mode == V2DFmode |
f00377d6 | 15136 | && TARGET_AVX && !TARGET_PREFER_AVX128 && optimize_insn_for_speed_p ()) |
4030506f | 15137 | { |
15138 | rtx tmp2 = gen_reg_rtx (V4DFmode); | |
c34303ca | 15139 | |
4030506f | 15140 | tmp0 = gen_reg_rtx (V4DFmode); |
15141 | tmp1 = force_reg (V2DFmode, operands[1]); | |
15142 | ||
15143 | emit_insn (gen_avx_vec_concatv4df (tmp0, tmp1, operands[2])); | |
15144 | emit_insn (gen_avx_roundpd256 (tmp2, tmp0, operands[3])); | |
15145 | emit_insn (gen_fix_truncv4dfv4si2 (operands[0], tmp2)); | |
15146 | } | |
15147 | else | |
15148 | { | |
15149 | tmp0 = gen_reg_rtx (<MODE>mode); | |
15150 | tmp1 = gen_reg_rtx (<MODE>mode); | |
15151 | ||
15152 | emit_insn | |
15153 | (gen_<sse4_1>_round<ssemodesuffix><avxsizesuffix> (tmp0, operands[1], | |
15154 | operands[3])); | |
15155 | emit_insn | |
15156 | (gen_<sse4_1>_round<ssemodesuffix><avxsizesuffix> (tmp1, operands[2], | |
15157 | operands[3])); | |
15158 | emit_insn | |
15159 | (gen_vec_pack_sfix_trunc_<mode> (operands[0], tmp0, tmp1)); | |
15160 | } | |
c34303ca | 15161 | DONE; |
15162 | }) | |
15163 | ||
0061967e | 15164 | (define_insn "sse4_1_round<ssescalarmodesuffix>" |
0a32b282 | 15165 | [(set (match_operand:VF_128 0 "register_operand" "=Yr,*x,x") |
f6c74054 | 15166 | (vec_merge:VF_128 |
15167 | (unspec:VF_128 | |
0a32b282 | 15168 | [(match_operand:VF_128 2 "register_operand" "Yr,*x,x") |
15169 | (match_operand:SI 3 "const_0_to_15_operand" "n,n,n")] | |
3da2a73c | 15170 | UNSPEC_ROUND) |
0a32b282 | 15171 | (match_operand:VF_128 1 "register_operand" "0,0,x") |
2d771892 | 15172 | (const_int 1)))] |
448e99f5 | 15173 | "TARGET_ROUND" |
f6c74054 | 15174 | "@ |
0a32b282 | 15175 | round<ssescalarmodesuffix>\t{%3, %2, %0|%0, %2, %3} |
f6c74054 | 15176 | round<ssescalarmodesuffix>\t{%3, %2, %0|%0, %2, %3} |
15177 | vround<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}" | |
0a32b282 | 15178 | [(set_attr "isa" "noavx,noavx,avx") |
f6c74054 | 15179 | (set_attr "type" "ssecvt") |
00a0e418 | 15180 | (set_attr "length_immediate" "1") |
0a32b282 | 15181 | (set_attr "prefix_data16" "1,1,*") |
f6c74054 | 15182 | (set_attr "prefix_extra" "1") |
0a32b282 | 15183 | (set_attr "prefix" "orig,orig,vex") |
3da2a73c | 15184 | (set_attr "mode" "<MODE>")]) |
f0dd3deb | 15185 | |
56b61659 | 15186 | (define_expand "round<mode>2" |
15187 | [(set (match_dup 4) | |
15188 | (plus:VF | |
abd4f58b | 15189 | (match_operand:VF 1 "register_operand") |
56b61659 | 15190 | (match_dup 3))) |
abd4f58b | 15191 | (set (match_operand:VF 0 "register_operand") |
56b61659 | 15192 | (unspec:VF |
15193 | [(match_dup 4) (match_dup 5)] | |
15194 | UNSPEC_ROUND))] | |
15195 | "TARGET_ROUND && !flag_trapping_math" | |
15196 | { | |
3754d046 | 15197 | machine_mode scalar_mode; |
56b61659 | 15198 | const struct real_format *fmt; |
15199 | REAL_VALUE_TYPE pred_half, half_minus_pred_half; | |
15200 | rtx half, vec_half; | |
15201 | ||
15202 | scalar_mode = GET_MODE_INNER (<MODE>mode); | |
15203 | ||
15204 | /* load nextafter (0.5, 0.0) */ | |
15205 | fmt = REAL_MODE_FORMAT (scalar_mode); | |
15206 | real_2expN (&half_minus_pred_half, -(fmt->p) - 1, scalar_mode); | |
f2ad9e38 | 15207 | real_arithmetic (&pred_half, MINUS_EXPR, &dconsthalf, &half_minus_pred_half); |
56b61659 | 15208 | half = const_double_from_real_value (pred_half, scalar_mode); |
15209 | ||
15210 | vec_half = ix86_build_const_vector (<MODE>mode, true, half); | |
15211 | vec_half = force_reg (<MODE>mode, vec_half); | |
15212 | ||
15213 | operands[3] = gen_reg_rtx (<MODE>mode); | |
15214 | emit_insn (gen_copysign<mode>3 (operands[3], vec_half, operands[1])); | |
15215 | ||
15216 | operands[4] = gen_reg_rtx (<MODE>mode); | |
15217 | operands[5] = GEN_INT (ROUND_TRUNC); | |
15218 | }) | |
15219 | ||
c34303ca | 15220 | (define_expand "round<mode>2_sfix" |
abd4f58b | 15221 | [(match_operand:<sseintvecmode> 0 "register_operand") |
03ae25dc | 15222 | (match_operand:VF1_128_256 1 "register_operand")] |
c34303ca | 15223 | "TARGET_ROUND && !flag_trapping_math" |
15224 | { | |
15225 | rtx tmp = gen_reg_rtx (<MODE>mode); | |
15226 | ||
15227 | emit_insn (gen_round<mode>2 (tmp, operands[1])); | |
15228 | ||
15229 | emit_insn | |
15230 | (gen_fix_trunc<mode><sseintvecmodelower>2 (operands[0], tmp)); | |
15231 | DONE; | |
15232 | }) | |
15233 | ||
15234 | (define_expand "round<mode>2_vec_pack_sfix" | |
abd4f58b | 15235 | [(match_operand:<ssepackfltmode> 0 "register_operand") |
15236 | (match_operand:VF2 1 "register_operand") | |
15237 | (match_operand:VF2 2 "register_operand")] | |
c34303ca | 15238 | "TARGET_ROUND && !flag_trapping_math" |
15239 | { | |
15240 | rtx tmp0, tmp1; | |
15241 | ||
4030506f | 15242 | if (<MODE>mode == V2DFmode |
f00377d6 | 15243 | && TARGET_AVX && !TARGET_PREFER_AVX128 && optimize_insn_for_speed_p ()) |
4030506f | 15244 | { |
15245 | rtx tmp2 = gen_reg_rtx (V4DFmode); | |
c34303ca | 15246 | |
4030506f | 15247 | tmp0 = gen_reg_rtx (V4DFmode); |
15248 | tmp1 = force_reg (V2DFmode, operands[1]); | |
c34303ca | 15249 | |
4030506f | 15250 | emit_insn (gen_avx_vec_concatv4df (tmp0, tmp1, operands[2])); |
15251 | emit_insn (gen_roundv4df2 (tmp2, tmp0)); | |
15252 | emit_insn (gen_fix_truncv4dfv4si2 (operands[0], tmp2)); | |
15253 | } | |
15254 | else | |
15255 | { | |
15256 | tmp0 = gen_reg_rtx (<MODE>mode); | |
15257 | tmp1 = gen_reg_rtx (<MODE>mode); | |
15258 | ||
15259 | emit_insn (gen_round<mode>2 (tmp0, operands[1])); | |
15260 | emit_insn (gen_round<mode>2 (tmp1, operands[2])); | |
15261 | ||
15262 | emit_insn | |
15263 | (gen_vec_pack_sfix_trunc_<mode> (operands[0], tmp0, tmp1)); | |
15264 | } | |
c34303ca | 15265 | DONE; |
15266 | }) | |
15267 | ||
f0dd3deb | 15268 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; |
15269 | ;; | |
15270 | ;; Intel SSE4.2 string/text processing instructions | |
15271 | ;; | |
15272 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | |
15273 | ||
15274 | (define_insn_and_split "sse4_2_pcmpestr" | |
15275 | [(set (match_operand:SI 0 "register_operand" "=c,c") | |
15276 | (unspec:SI | |
1a5eff3d | 15277 | [(match_operand:V16QI 2 "register_operand" "x,x") |
f0dd3deb | 15278 | (match_operand:SI 3 "register_operand" "a,a") |
1a5eff3d | 15279 | (match_operand:V16QI 4 "nonimmediate_operand" "x,m") |
f0dd3deb | 15280 | (match_operand:SI 5 "register_operand" "d,d") |
15281 | (match_operand:SI 6 "const_0_to_255_operand" "n,n")] | |
15282 | UNSPEC_PCMPESTR)) | |
50c9119e | 15283 | (set (match_operand:V16QI 1 "register_operand" "=Yz,Yz") |
f0dd3deb | 15284 | (unspec:V16QI |
15285 | [(match_dup 2) | |
15286 | (match_dup 3) | |
15287 | (match_dup 4) | |
15288 | (match_dup 5) | |
15289 | (match_dup 6)] | |
15290 | UNSPEC_PCMPESTR)) | |
15291 | (set (reg:CC FLAGS_REG) | |
15292 | (unspec:CC | |
15293 | [(match_dup 2) | |
15294 | (match_dup 3) | |
15295 | (match_dup 4) | |
15296 | (match_dup 5) | |
15297 | (match_dup 6)] | |
15298 | UNSPEC_PCMPESTR))] | |
15299 | "TARGET_SSE4_2 | |
d0b2c064 | 15300 | && can_create_pseudo_p ()" |
f0dd3deb | 15301 | "#" |
15302 | "&& 1" | |
15303 | [(const_int 0)] | |
15304 | { | |
15305 | int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0])); | |
15306 | int xmm0 = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[1])); | |
15307 | int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG); | |
15308 | ||
15309 | if (ecx) | |
15310 | emit_insn (gen_sse4_2_pcmpestri (operands[0], operands[2], | |
15311 | operands[3], operands[4], | |
15312 | operands[5], operands[6])); | |
15313 | if (xmm0) | |
15314 | emit_insn (gen_sse4_2_pcmpestrm (operands[1], operands[2], | |
15315 | operands[3], operands[4], | |
15316 | operands[5], operands[6])); | |
15317 | if (flags && !(ecx || xmm0)) | |
18f95a36 | 15318 | emit_insn (gen_sse4_2_pcmpestr_cconly (NULL, NULL, |
15319 | operands[2], operands[3], | |
f0dd3deb | 15320 | operands[4], operands[5], |
15321 | operands[6])); | |
1c9cc6e6 | 15322 | if (!(flags || ecx || xmm0)) |
15323 | emit_note (NOTE_INSN_DELETED); | |
15324 | ||
f0dd3deb | 15325 | DONE; |
15326 | } | |
15327 | [(set_attr "type" "sselog") | |
15328 | (set_attr "prefix_data16" "1") | |
15329 | (set_attr "prefix_extra" "1") | |
8c1dfa94 | 15330 | (set_attr "ssememalign" "8") |
00a0e418 | 15331 | (set_attr "length_immediate" "1") |
f0dd3deb | 15332 | (set_attr "memory" "none,load") |
15333 | (set_attr "mode" "TI")]) | |
15334 | ||
538c1aa0 | 15335 | (define_insn_and_split "*sse4_2_pcmpestr_unaligned" |
15336 | [(set (match_operand:SI 0 "register_operand" "=c") | |
15337 | (unspec:SI | |
1a5eff3d | 15338 | [(match_operand:V16QI 2 "register_operand" "x") |
538c1aa0 | 15339 | (match_operand:SI 3 "register_operand" "a") |
15340 | (unspec:V16QI | |
15341 | [(match_operand:V16QI 4 "memory_operand" "m")] | |
00820ea0 | 15342 | UNSPEC_LOADU) |
538c1aa0 | 15343 | (match_operand:SI 5 "register_operand" "d") |
15344 | (match_operand:SI 6 "const_0_to_255_operand" "n")] | |
15345 | UNSPEC_PCMPESTR)) | |
15346 | (set (match_operand:V16QI 1 "register_operand" "=Yz") | |
15347 | (unspec:V16QI | |
15348 | [(match_dup 2) | |
15349 | (match_dup 3) | |
00820ea0 | 15350 | (unspec:V16QI [(match_dup 4)] UNSPEC_LOADU) |
538c1aa0 | 15351 | (match_dup 5) |
15352 | (match_dup 6)] | |
15353 | UNSPEC_PCMPESTR)) | |
15354 | (set (reg:CC FLAGS_REG) | |
15355 | (unspec:CC | |
15356 | [(match_dup 2) | |
15357 | (match_dup 3) | |
00820ea0 | 15358 | (unspec:V16QI [(match_dup 4)] UNSPEC_LOADU) |
538c1aa0 | 15359 | (match_dup 5) |
15360 | (match_dup 6)] | |
15361 | UNSPEC_PCMPESTR))] | |
15362 | "TARGET_SSE4_2 | |
15363 | && can_create_pseudo_p ()" | |
15364 | "#" | |
15365 | "&& 1" | |
15366 | [(const_int 0)] | |
15367 | { | |
15368 | int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0])); | |
15369 | int xmm0 = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[1])); | |
15370 | int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG); | |
15371 | ||
15372 | if (ecx) | |
15373 | emit_insn (gen_sse4_2_pcmpestri (operands[0], operands[2], | |
15374 | operands[3], operands[4], | |
15375 | operands[5], operands[6])); | |
15376 | if (xmm0) | |
15377 | emit_insn (gen_sse4_2_pcmpestrm (operands[1], operands[2], | |
15378 | operands[3], operands[4], | |
15379 | operands[5], operands[6])); | |
15380 | if (flags && !(ecx || xmm0)) | |
15381 | emit_insn (gen_sse4_2_pcmpestr_cconly (NULL, NULL, | |
15382 | operands[2], operands[3], | |
15383 | operands[4], operands[5], | |
15384 | operands[6])); | |
15385 | if (!(flags || ecx || xmm0)) | |
15386 | emit_note (NOTE_INSN_DELETED); | |
15387 | ||
15388 | DONE; | |
15389 | } | |
15390 | [(set_attr "type" "sselog") | |
15391 | (set_attr "prefix_data16" "1") | |
15392 | (set_attr "prefix_extra" "1") | |
8c1dfa94 | 15393 | (set_attr "ssememalign" "8") |
538c1aa0 | 15394 | (set_attr "length_immediate" "1") |
15395 | (set_attr "memory" "load") | |
15396 | (set_attr "mode" "TI")]) | |
15397 | ||
f0dd3deb | 15398 | (define_insn "sse4_2_pcmpestri" |
15399 | [(set (match_operand:SI 0 "register_operand" "=c,c") | |
15400 | (unspec:SI | |
15401 | [(match_operand:V16QI 1 "register_operand" "x,x") | |
15402 | (match_operand:SI 2 "register_operand" "a,a") | |
15403 | (match_operand:V16QI 3 "nonimmediate_operand" "x,m") | |
15404 | (match_operand:SI 4 "register_operand" "d,d") | |
15405 | (match_operand:SI 5 "const_0_to_255_operand" "n,n")] | |
15406 | UNSPEC_PCMPESTR)) | |
15407 | (set (reg:CC FLAGS_REG) | |
15408 | (unspec:CC | |
15409 | [(match_dup 1) | |
15410 | (match_dup 2) | |
15411 | (match_dup 3) | |
15412 | (match_dup 4) | |
15413 | (match_dup 5)] | |
15414 | UNSPEC_PCMPESTR))] | |
15415 | "TARGET_SSE4_2" | |
ed30e0a6 | 15416 | "%vpcmpestri\t{%5, %3, %1|%1, %3, %5}" |
f0dd3deb | 15417 | [(set_attr "type" "sselog") |
15418 | (set_attr "prefix_data16" "1") | |
15419 | (set_attr "prefix_extra" "1") | |
ed30e0a6 | 15420 | (set_attr "prefix" "maybe_vex") |
8c1dfa94 | 15421 | (set_attr "ssememalign" "8") |
00a0e418 | 15422 | (set_attr "length_immediate" "1") |
6470d004 | 15423 | (set_attr "btver2_decode" "vector") |
f0dd3deb | 15424 | (set_attr "memory" "none,load") |
15425 | (set_attr "mode" "TI")]) | |
15426 | ||
15427 | (define_insn "sse4_2_pcmpestrm" | |
50c9119e | 15428 | [(set (match_operand:V16QI 0 "register_operand" "=Yz,Yz") |
f0dd3deb | 15429 | (unspec:V16QI |
15430 | [(match_operand:V16QI 1 "register_operand" "x,x") | |
15431 | (match_operand:SI 2 "register_operand" "a,a") | |
15432 | (match_operand:V16QI 3 "nonimmediate_operand" "x,m") | |
15433 | (match_operand:SI 4 "register_operand" "d,d") | |
15434 | (match_operand:SI 5 "const_0_to_255_operand" "n,n")] | |
15435 | UNSPEC_PCMPESTR)) | |
15436 | (set (reg:CC FLAGS_REG) | |
15437 | (unspec:CC | |
15438 | [(match_dup 1) | |
15439 | (match_dup 2) | |
15440 | (match_dup 3) | |
15441 | (match_dup 4) | |
15442 | (match_dup 5)] | |
15443 | UNSPEC_PCMPESTR))] | |
15444 | "TARGET_SSE4_2" | |
ed30e0a6 | 15445 | "%vpcmpestrm\t{%5, %3, %1|%1, %3, %5}" |
f0dd3deb | 15446 | [(set_attr "type" "sselog") |
15447 | (set_attr "prefix_data16" "1") | |
15448 | (set_attr "prefix_extra" "1") | |
8c1dfa94 | 15449 | (set_attr "ssememalign" "8") |
00a0e418 | 15450 | (set_attr "length_immediate" "1") |
ed30e0a6 | 15451 | (set_attr "prefix" "maybe_vex") |
6470d004 | 15452 | (set_attr "btver2_decode" "vector") |
f0dd3deb | 15453 | (set_attr "memory" "none,load") |
15454 | (set_attr "mode" "TI")]) | |
15455 | ||
15456 | (define_insn "sse4_2_pcmpestr_cconly" | |
15457 | [(set (reg:CC FLAGS_REG) | |
15458 | (unspec:CC | |
18f95a36 | 15459 | [(match_operand:V16QI 2 "register_operand" "x,x,x,x") |
15460 | (match_operand:SI 3 "register_operand" "a,a,a,a") | |
15461 | (match_operand:V16QI 4 "nonimmediate_operand" "x,m,x,m") | |
15462 | (match_operand:SI 5 "register_operand" "d,d,d,d") | |
15463 | (match_operand:SI 6 "const_0_to_255_operand" "n,n,n,n")] | |
f0dd3deb | 15464 | UNSPEC_PCMPESTR)) |
18f95a36 | 15465 | (clobber (match_scratch:V16QI 0 "=Yz,Yz,X,X")) |
15466 | (clobber (match_scratch:SI 1 "= X, X,c,c"))] | |
f0dd3deb | 15467 | "TARGET_SSE4_2" |
15468 | "@ | |
ed30e0a6 | 15469 | %vpcmpestrm\t{%6, %4, %2|%2, %4, %6} |
15470 | %vpcmpestrm\t{%6, %4, %2|%2, %4, %6} | |
15471 | %vpcmpestri\t{%6, %4, %2|%2, %4, %6} | |
15472 | %vpcmpestri\t{%6, %4, %2|%2, %4, %6}" | |
f0dd3deb | 15473 | [(set_attr "type" "sselog") |
15474 | (set_attr "prefix_data16" "1") | |
15475 | (set_attr "prefix_extra" "1") | |
8c1dfa94 | 15476 | (set_attr "ssememalign" "8") |
00a0e418 | 15477 | (set_attr "length_immediate" "1") |
f0dd3deb | 15478 | (set_attr "memory" "none,load,none,load") |
6470d004 | 15479 | (set_attr "btver2_decode" "vector,vector,vector,vector") |
ed30e0a6 | 15480 | (set_attr "prefix" "maybe_vex") |
f0dd3deb | 15481 | (set_attr "mode" "TI")]) |
15482 | ||
15483 | (define_insn_and_split "sse4_2_pcmpistr" | |
15484 | [(set (match_operand:SI 0 "register_operand" "=c,c") | |
15485 | (unspec:SI | |
1a5eff3d | 15486 | [(match_operand:V16QI 2 "register_operand" "x,x") |
15487 | (match_operand:V16QI 3 "nonimmediate_operand" "x,m") | |
f0dd3deb | 15488 | (match_operand:SI 4 "const_0_to_255_operand" "n,n")] |
15489 | UNSPEC_PCMPISTR)) | |
50c9119e | 15490 | (set (match_operand:V16QI 1 "register_operand" "=Yz,Yz") |
f0dd3deb | 15491 | (unspec:V16QI |
15492 | [(match_dup 2) | |
15493 | (match_dup 3) | |
15494 | (match_dup 4)] | |
15495 | UNSPEC_PCMPISTR)) | |
15496 | (set (reg:CC FLAGS_REG) | |
15497 | (unspec:CC | |
15498 | [(match_dup 2) | |
15499 | (match_dup 3) | |
15500 | (match_dup 4)] | |
15501 | UNSPEC_PCMPISTR))] | |
15502 | "TARGET_SSE4_2 | |
d0b2c064 | 15503 | && can_create_pseudo_p ()" |
f0dd3deb | 15504 | "#" |
15505 | "&& 1" | |
15506 | [(const_int 0)] | |
15507 | { | |
15508 | int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0])); | |
15509 | int xmm0 = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[1])); | |
15510 | int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG); | |
15511 | ||
15512 | if (ecx) | |
15513 | emit_insn (gen_sse4_2_pcmpistri (operands[0], operands[2], | |
15514 | operands[3], operands[4])); | |
15515 | if (xmm0) | |
15516 | emit_insn (gen_sse4_2_pcmpistrm (operands[1], operands[2], | |
15517 | operands[3], operands[4])); | |
15518 | if (flags && !(ecx || xmm0)) | |
18f95a36 | 15519 | emit_insn (gen_sse4_2_pcmpistr_cconly (NULL, NULL, |
15520 | operands[2], operands[3], | |
f0dd3deb | 15521 | operands[4])); |
1c9cc6e6 | 15522 | if (!(flags || ecx || xmm0)) |
15523 | emit_note (NOTE_INSN_DELETED); | |
15524 | ||
f0dd3deb | 15525 | DONE; |
15526 | } | |
15527 | [(set_attr "type" "sselog") | |
15528 | (set_attr "prefix_data16" "1") | |
15529 | (set_attr "prefix_extra" "1") | |
8c1dfa94 | 15530 | (set_attr "ssememalign" "8") |
00a0e418 | 15531 | (set_attr "length_immediate" "1") |
f0dd3deb | 15532 | (set_attr "memory" "none,load") |
15533 | (set_attr "mode" "TI")]) | |
15534 | ||
538c1aa0 | 15535 | (define_insn_and_split "*sse4_2_pcmpistr_unaligned" |
15536 | [(set (match_operand:SI 0 "register_operand" "=c") | |
15537 | (unspec:SI | |
1a5eff3d | 15538 | [(match_operand:V16QI 2 "register_operand" "x") |
538c1aa0 | 15539 | (unspec:V16QI |
15540 | [(match_operand:V16QI 3 "memory_operand" "m")] | |
00820ea0 | 15541 | UNSPEC_LOADU) |
538c1aa0 | 15542 | (match_operand:SI 4 "const_0_to_255_operand" "n")] |
15543 | UNSPEC_PCMPISTR)) | |
15544 | (set (match_operand:V16QI 1 "register_operand" "=Yz") | |
15545 | (unspec:V16QI | |
15546 | [(match_dup 2) | |
00820ea0 | 15547 | (unspec:V16QI [(match_dup 3)] UNSPEC_LOADU) |
538c1aa0 | 15548 | (match_dup 4)] |
15549 | UNSPEC_PCMPISTR)) | |
15550 | (set (reg:CC FLAGS_REG) | |
15551 | (unspec:CC | |
15552 | [(match_dup 2) | |
00820ea0 | 15553 | (unspec:V16QI [(match_dup 3)] UNSPEC_LOADU) |
538c1aa0 | 15554 | (match_dup 4)] |
15555 | UNSPEC_PCMPISTR))] | |
15556 | "TARGET_SSE4_2 | |
15557 | && can_create_pseudo_p ()" | |
15558 | "#" | |
15559 | "&& 1" | |
15560 | [(const_int 0)] | |
15561 | { | |
15562 | int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0])); | |
15563 | int xmm0 = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[1])); | |
15564 | int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG); | |
15565 | ||
15566 | if (ecx) | |
15567 | emit_insn (gen_sse4_2_pcmpistri (operands[0], operands[2], | |
15568 | operands[3], operands[4])); | |
15569 | if (xmm0) | |
15570 | emit_insn (gen_sse4_2_pcmpistrm (operands[1], operands[2], | |
15571 | operands[3], operands[4])); | |
15572 | if (flags && !(ecx || xmm0)) | |
15573 | emit_insn (gen_sse4_2_pcmpistr_cconly (NULL, NULL, | |
15574 | operands[2], operands[3], | |
15575 | operands[4])); | |
15576 | if (!(flags || ecx || xmm0)) | |
15577 | emit_note (NOTE_INSN_DELETED); | |
15578 | ||
15579 | DONE; | |
15580 | } | |
15581 | [(set_attr "type" "sselog") | |
15582 | (set_attr "prefix_data16" "1") | |
15583 | (set_attr "prefix_extra" "1") | |
8c1dfa94 | 15584 | (set_attr "ssememalign" "8") |
538c1aa0 | 15585 | (set_attr "length_immediate" "1") |
15586 | (set_attr "memory" "load") | |
15587 | (set_attr "mode" "TI")]) | |
15588 | ||
f0dd3deb | 15589 | (define_insn "sse4_2_pcmpistri" |
15590 | [(set (match_operand:SI 0 "register_operand" "=c,c") | |
15591 | (unspec:SI | |
15592 | [(match_operand:V16QI 1 "register_operand" "x,x") | |
15593 | (match_operand:V16QI 2 "nonimmediate_operand" "x,m") | |
15594 | (match_operand:SI 3 "const_0_to_255_operand" "n,n")] | |
15595 | UNSPEC_PCMPISTR)) | |
15596 | (set (reg:CC FLAGS_REG) | |
15597 | (unspec:CC | |
15598 | [(match_dup 1) | |
15599 | (match_dup 2) | |
15600 | (match_dup 3)] | |
15601 | UNSPEC_PCMPISTR))] | |
15602 | "TARGET_SSE4_2" | |
ed30e0a6 | 15603 | "%vpcmpistri\t{%3, %2, %1|%1, %2, %3}" |
f0dd3deb | 15604 | [(set_attr "type" "sselog") |
15605 | (set_attr "prefix_data16" "1") | |
15606 | (set_attr "prefix_extra" "1") | |
8c1dfa94 | 15607 | (set_attr "ssememalign" "8") |
00a0e418 | 15608 | (set_attr "length_immediate" "1") |
ed30e0a6 | 15609 | (set_attr "prefix" "maybe_vex") |
f0dd3deb | 15610 | (set_attr "memory" "none,load") |
6470d004 | 15611 | (set_attr "btver2_decode" "vector") |
f0dd3deb | 15612 | (set_attr "mode" "TI")]) |
15613 | ||
15614 | (define_insn "sse4_2_pcmpistrm" | |
50c9119e | 15615 | [(set (match_operand:V16QI 0 "register_operand" "=Yz,Yz") |
f0dd3deb | 15616 | (unspec:V16QI |
15617 | [(match_operand:V16QI 1 "register_operand" "x,x") | |
15618 | (match_operand:V16QI 2 "nonimmediate_operand" "x,m") | |
15619 | (match_operand:SI 3 "const_0_to_255_operand" "n,n")] | |
15620 | UNSPEC_PCMPISTR)) | |
15621 | (set (reg:CC FLAGS_REG) | |
15622 | (unspec:CC | |
15623 | [(match_dup 1) | |
15624 | (match_dup 2) | |
15625 | (match_dup 3)] | |
15626 | UNSPEC_PCMPISTR))] | |
15627 | "TARGET_SSE4_2" | |
ed30e0a6 | 15628 | "%vpcmpistrm\t{%3, %2, %1|%1, %2, %3}" |
f0dd3deb | 15629 | [(set_attr "type" "sselog") |
15630 | (set_attr "prefix_data16" "1") | |
15631 | (set_attr "prefix_extra" "1") | |
8c1dfa94 | 15632 | (set_attr "ssememalign" "8") |
00a0e418 | 15633 | (set_attr "length_immediate" "1") |
ed30e0a6 | 15634 | (set_attr "prefix" "maybe_vex") |
f0dd3deb | 15635 | (set_attr "memory" "none,load") |
6470d004 | 15636 | (set_attr "btver2_decode" "vector") |
f0dd3deb | 15637 | (set_attr "mode" "TI")]) |
15638 | ||
15639 | (define_insn "sse4_2_pcmpistr_cconly" | |
15640 | [(set (reg:CC FLAGS_REG) | |
15641 | (unspec:CC | |
18f95a36 | 15642 | [(match_operand:V16QI 2 "register_operand" "x,x,x,x") |
15643 | (match_operand:V16QI 3 "nonimmediate_operand" "x,m,x,m") | |
15644 | (match_operand:SI 4 "const_0_to_255_operand" "n,n,n,n")] | |
f0dd3deb | 15645 | UNSPEC_PCMPISTR)) |
18f95a36 | 15646 | (clobber (match_scratch:V16QI 0 "=Yz,Yz,X,X")) |
15647 | (clobber (match_scratch:SI 1 "= X, X,c,c"))] | |
f0dd3deb | 15648 | "TARGET_SSE4_2" |
15649 | "@ | |
ed30e0a6 | 15650 | %vpcmpistrm\t{%4, %3, %2|%2, %3, %4} |
15651 | %vpcmpistrm\t{%4, %3, %2|%2, %3, %4} | |
15652 | %vpcmpistri\t{%4, %3, %2|%2, %3, %4} | |
15653 | %vpcmpistri\t{%4, %3, %2|%2, %3, %4}" | |
f0dd3deb | 15654 | [(set_attr "type" "sselog") |
15655 | (set_attr "prefix_data16" "1") | |
15656 | (set_attr "prefix_extra" "1") | |
8c1dfa94 | 15657 | (set_attr "ssememalign" "8") |
00a0e418 | 15658 | (set_attr "length_immediate" "1") |
f0dd3deb | 15659 | (set_attr "memory" "none,load,none,load") |
ed30e0a6 | 15660 | (set_attr "prefix" "maybe_vex") |
6470d004 | 15661 | (set_attr "btver2_decode" "vector,vector,vector,vector") |
f0dd3deb | 15662 | (set_attr "mode" "TI")]) |
448e99f5 | 15663 | |
0daf3bbe | 15664 | ;; Packed float variants |
15665 | (define_mode_attr GATHER_SCATTER_SF_MEM_MODE | |
15666 | [(V8DI "V8SF") (V16SI "V16SF")]) | |
15667 | ||
15668 | (define_expand "avx512pf_gatherpf<mode>sf" | |
d2ff59d6 | 15669 | [(unspec |
15670 | [(match_operand:<avx512fmaskmode> 0 "register_or_constm1_operand") | |
0daf3bbe | 15671 | (mem:<GATHER_SCATTER_SF_MEM_MODE> |
d2ff59d6 | 15672 | (match_par_dup 5 |
15673 | [(match_operand 2 "vsib_address_operand") | |
15674 | (match_operand:VI48_512 1 "register_operand") | |
15675 | (match_operand:SI 3 "const1248_operand")])) | |
3befdeb0 | 15676 | (match_operand:SI 4 "const_2_to_3_operand")] |
d2ff59d6 | 15677 | UNSPEC_GATHER_PREFETCH)] |
15678 | "TARGET_AVX512PF" | |
15679 | { | |
15680 | operands[5] | |
15681 | = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[1], | |
15682 | operands[3]), UNSPEC_VSIBADDR); | |
15683 | }) | |
15684 | ||
0daf3bbe | 15685 | (define_insn "*avx512pf_gatherpf<mode>sf_mask" |
d2ff59d6 | 15686 | [(unspec |
a31e7f46 | 15687 | [(match_operand:<avx512fmaskmode> 0 "register_operand" "Yk") |
0daf3bbe | 15688 | (match_operator:<GATHER_SCATTER_SF_MEM_MODE> 5 "vsib_mem_operator" |
d2ff59d6 | 15689 | [(unspec:P |
1e662e65 | 15690 | [(match_operand:P 2 "vsib_address_operand" "Tv") |
d2ff59d6 | 15691 | (match_operand:VI48_512 1 "register_operand" "v") |
15692 | (match_operand:SI 3 "const1248_operand" "n")] | |
15693 | UNSPEC_VSIBADDR)]) | |
3befdeb0 | 15694 | (match_operand:SI 4 "const_2_to_3_operand" "n")] |
d2ff59d6 | 15695 | UNSPEC_GATHER_PREFETCH)] |
15696 | "TARGET_AVX512PF" | |
15697 | { | |
15698 | switch (INTVAL (operands[4])) | |
15699 | { | |
3befdeb0 | 15700 | case 3: |
23afdab7 | 15701 | return "vgatherpf0<ssemodesuffix>ps\t{%5%{%0%}|%5%{%0%}}"; |
15702 | case 2: | |
d2ff59d6 | 15703 | return "vgatherpf1<ssemodesuffix>ps\t{%5%{%0%}|%5%{%0%}}"; |
15704 | default: | |
15705 | gcc_unreachable (); | |
15706 | } | |
15707 | } | |
15708 | [(set_attr "type" "sse") | |
15709 | (set_attr "prefix" "evex") | |
15710 | (set_attr "mode" "XI")]) | |
15711 | ||
0daf3bbe | 15712 | (define_insn "*avx512pf_gatherpf<mode>sf" |
d2ff59d6 | 15713 | [(unspec |
15714 | [(const_int -1) | |
0daf3bbe | 15715 | (match_operator:<GATHER_SCATTER_SF_MEM_MODE> 4 "vsib_mem_operator" |
d2ff59d6 | 15716 | [(unspec:P |
1e662e65 | 15717 | [(match_operand:P 1 "vsib_address_operand" "Tv") |
d2ff59d6 | 15718 | (match_operand:VI48_512 0 "register_operand" "v") |
15719 | (match_operand:SI 2 "const1248_operand" "n")] | |
15720 | UNSPEC_VSIBADDR)]) | |
3befdeb0 | 15721 | (match_operand:SI 3 "const_2_to_3_operand" "n")] |
d2ff59d6 | 15722 | UNSPEC_GATHER_PREFETCH)] |
15723 | "TARGET_AVX512PF" | |
15724 | { | |
15725 | switch (INTVAL (operands[3])) | |
15726 | { | |
3befdeb0 | 15727 | case 3: |
23afdab7 | 15728 | return "vgatherpf0<ssemodesuffix>ps\t{%4|%4}"; |
15729 | case 2: | |
d2ff59d6 | 15730 | return "vgatherpf1<ssemodesuffix>ps\t{%4|%4}"; |
15731 | default: | |
15732 | gcc_unreachable (); | |
15733 | } | |
15734 | } | |
15735 | [(set_attr "type" "sse") | |
15736 | (set_attr "prefix" "evex") | |
15737 | (set_attr "mode" "XI")]) | |
15738 | ||
0daf3bbe | 15739 | ;; Packed double variants |
15740 | (define_expand "avx512pf_gatherpf<mode>df" | |
15741 | [(unspec | |
15742 | [(match_operand:<avx512fmaskmode> 0 "register_or_constm1_operand") | |
15743 | (mem:V8DF | |
15744 | (match_par_dup 5 | |
15745 | [(match_operand 2 "vsib_address_operand") | |
15746 | (match_operand:VI4_256_8_512 1 "register_operand") | |
15747 | (match_operand:SI 3 "const1248_operand")])) | |
3befdeb0 | 15748 | (match_operand:SI 4 "const_2_to_3_operand")] |
0daf3bbe | 15749 | UNSPEC_GATHER_PREFETCH)] |
15750 | "TARGET_AVX512PF" | |
15751 | { | |
15752 | operands[5] | |
15753 | = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[1], | |
15754 | operands[3]), UNSPEC_VSIBADDR); | |
15755 | }) | |
15756 | ||
15757 | (define_insn "*avx512pf_gatherpf<mode>df_mask" | |
15758 | [(unspec | |
a31e7f46 | 15759 | [(match_operand:<avx512fmaskmode> 0 "register_operand" "Yk") |
0daf3bbe | 15760 | (match_operator:V8DF 5 "vsib_mem_operator" |
15761 | [(unspec:P | |
15762 | [(match_operand:P 2 "vsib_address_operand" "Tv") | |
15763 | (match_operand:VI4_256_8_512 1 "register_operand" "v") | |
15764 | (match_operand:SI 3 "const1248_operand" "n")] | |
15765 | UNSPEC_VSIBADDR)]) | |
3befdeb0 | 15766 | (match_operand:SI 4 "const_2_to_3_operand" "n")] |
0daf3bbe | 15767 | UNSPEC_GATHER_PREFETCH)] |
15768 | "TARGET_AVX512PF" | |
15769 | { | |
15770 | switch (INTVAL (operands[4])) | |
15771 | { | |
3befdeb0 | 15772 | case 3: |
23afdab7 | 15773 | return "vgatherpf0<ssemodesuffix>pd\t{%5%{%0%}|%5%{%0%}}"; |
15774 | case 2: | |
0daf3bbe | 15775 | return "vgatherpf1<ssemodesuffix>pd\t{%5%{%0%}|%5%{%0%}}"; |
15776 | default: | |
15777 | gcc_unreachable (); | |
15778 | } | |
15779 | } | |
15780 | [(set_attr "type" "sse") | |
15781 | (set_attr "prefix" "evex") | |
15782 | (set_attr "mode" "XI")]) | |
15783 | ||
15784 | (define_insn "*avx512pf_gatherpf<mode>df" | |
15785 | [(unspec | |
15786 | [(const_int -1) | |
15787 | (match_operator:V8DF 4 "vsib_mem_operator" | |
15788 | [(unspec:P | |
15789 | [(match_operand:P 1 "vsib_address_operand" "Tv") | |
15790 | (match_operand:VI4_256_8_512 0 "register_operand" "v") | |
15791 | (match_operand:SI 2 "const1248_operand" "n")] | |
15792 | UNSPEC_VSIBADDR)]) | |
3befdeb0 | 15793 | (match_operand:SI 3 "const_2_to_3_operand" "n")] |
0daf3bbe | 15794 | UNSPEC_GATHER_PREFETCH)] |
15795 | "TARGET_AVX512PF" | |
15796 | { | |
15797 | switch (INTVAL (operands[3])) | |
15798 | { | |
3befdeb0 | 15799 | case 3: |
23afdab7 | 15800 | return "vgatherpf0<ssemodesuffix>pd\t{%4|%4}"; |
15801 | case 2: | |
0daf3bbe | 15802 | return "vgatherpf1<ssemodesuffix>pd\t{%4|%4}"; |
15803 | default: | |
15804 | gcc_unreachable (); | |
15805 | } | |
15806 | } | |
15807 | [(set_attr "type" "sse") | |
15808 | (set_attr "prefix" "evex") | |
15809 | (set_attr "mode" "XI")]) | |
15810 | ||
15811 | ;; Packed float variants | |
15812 | (define_expand "avx512pf_scatterpf<mode>sf" | |
d2ff59d6 | 15813 | [(unspec |
15814 | [(match_operand:<avx512fmaskmode> 0 "register_or_constm1_operand") | |
0daf3bbe | 15815 | (mem:<GATHER_SCATTER_SF_MEM_MODE> |
d2ff59d6 | 15816 | (match_par_dup 5 |
15817 | [(match_operand 2 "vsib_address_operand") | |
15818 | (match_operand:VI48_512 1 "register_operand") | |
15819 | (match_operand:SI 3 "const1248_operand")])) | |
d418f1d9 | 15820 | (match_operand:SI 4 "const2367_operand")] |
d2ff59d6 | 15821 | UNSPEC_SCATTER_PREFETCH)] |
15822 | "TARGET_AVX512PF" | |
15823 | { | |
15824 | operands[5] | |
15825 | = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[1], | |
15826 | operands[3]), UNSPEC_VSIBADDR); | |
15827 | }) | |
15828 | ||
0daf3bbe | 15829 | (define_insn "*avx512pf_scatterpf<mode>sf_mask" |
d2ff59d6 | 15830 | [(unspec |
a31e7f46 | 15831 | [(match_operand:<avx512fmaskmode> 0 "register_operand" "Yk") |
0daf3bbe | 15832 | (match_operator:<GATHER_SCATTER_SF_MEM_MODE> 5 "vsib_mem_operator" |
d2ff59d6 | 15833 | [(unspec:P |
1e662e65 | 15834 | [(match_operand:P 2 "vsib_address_operand" "Tv") |
d2ff59d6 | 15835 | (match_operand:VI48_512 1 "register_operand" "v") |
15836 | (match_operand:SI 3 "const1248_operand" "n")] | |
15837 | UNSPEC_VSIBADDR)]) | |
d418f1d9 | 15838 | (match_operand:SI 4 "const2367_operand" "n")] |
d2ff59d6 | 15839 | UNSPEC_SCATTER_PREFETCH)] |
15840 | "TARGET_AVX512PF" | |
15841 | { | |
15842 | switch (INTVAL (operands[4])) | |
15843 | { | |
3befdeb0 | 15844 | case 3: |
d418f1d9 | 15845 | case 7: |
23afdab7 | 15846 | return "vscatterpf0<ssemodesuffix>ps\t{%5%{%0%}|%5%{%0%}}"; |
15847 | case 2: | |
15848 | case 6: | |
d2ff59d6 | 15849 | return "vscatterpf1<ssemodesuffix>ps\t{%5%{%0%}|%5%{%0%}}"; |
15850 | default: | |
15851 | gcc_unreachable (); | |
15852 | } | |
15853 | } | |
15854 | [(set_attr "type" "sse") | |
15855 | (set_attr "prefix" "evex") | |
15856 | (set_attr "mode" "XI")]) | |
15857 | ||
0daf3bbe | 15858 | (define_insn "*avx512pf_scatterpf<mode>sf" |
d2ff59d6 | 15859 | [(unspec |
15860 | [(const_int -1) | |
0daf3bbe | 15861 | (match_operator:<GATHER_SCATTER_SF_MEM_MODE> 4 "vsib_mem_operator" |
d2ff59d6 | 15862 | [(unspec:P |
1e662e65 | 15863 | [(match_operand:P 1 "vsib_address_operand" "Tv") |
d2ff59d6 | 15864 | (match_operand:VI48_512 0 "register_operand" "v") |
15865 | (match_operand:SI 2 "const1248_operand" "n")] | |
15866 | UNSPEC_VSIBADDR)]) | |
d418f1d9 | 15867 | (match_operand:SI 3 "const2367_operand" "n")] |
d2ff59d6 | 15868 | UNSPEC_SCATTER_PREFETCH)] |
15869 | "TARGET_AVX512PF" | |
15870 | { | |
15871 | switch (INTVAL (operands[3])) | |
15872 | { | |
3befdeb0 | 15873 | case 3: |
d418f1d9 | 15874 | case 7: |
23afdab7 | 15875 | return "vscatterpf0<ssemodesuffix>ps\t{%4|%4}"; |
15876 | case 2: | |
15877 | case 6: | |
d2ff59d6 | 15878 | return "vscatterpf1<ssemodesuffix>ps\t{%4|%4}"; |
15879 | default: | |
15880 | gcc_unreachable (); | |
15881 | } | |
15882 | } | |
15883 | [(set_attr "type" "sse") | |
15884 | (set_attr "prefix" "evex") | |
15885 | (set_attr "mode" "XI")]) | |
15886 | ||
0daf3bbe | 15887 | ;; Packed double variants |
15888 | (define_expand "avx512pf_scatterpf<mode>df" | |
15889 | [(unspec | |
15890 | [(match_operand:<avx512fmaskmode> 0 "register_or_constm1_operand") | |
15891 | (mem:V8DF | |
15892 | (match_par_dup 5 | |
15893 | [(match_operand 2 "vsib_address_operand") | |
15894 | (match_operand:VI4_256_8_512 1 "register_operand") | |
15895 | (match_operand:SI 3 "const1248_operand")])) | |
d418f1d9 | 15896 | (match_operand:SI 4 "const2367_operand")] |
0daf3bbe | 15897 | UNSPEC_SCATTER_PREFETCH)] |
15898 | "TARGET_AVX512PF" | |
15899 | { | |
15900 | operands[5] | |
15901 | = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[1], | |
15902 | operands[3]), UNSPEC_VSIBADDR); | |
15903 | }) | |
15904 | ||
15905 | (define_insn "*avx512pf_scatterpf<mode>df_mask" | |
15906 | [(unspec | |
a31e7f46 | 15907 | [(match_operand:<avx512fmaskmode> 0 "register_operand" "Yk") |
0daf3bbe | 15908 | (match_operator:V8DF 5 "vsib_mem_operator" |
15909 | [(unspec:P | |
15910 | [(match_operand:P 2 "vsib_address_operand" "Tv") | |
15911 | (match_operand:VI4_256_8_512 1 "register_operand" "v") | |
15912 | (match_operand:SI 3 "const1248_operand" "n")] | |
15913 | UNSPEC_VSIBADDR)]) | |
d418f1d9 | 15914 | (match_operand:SI 4 "const2367_operand" "n")] |
0daf3bbe | 15915 | UNSPEC_SCATTER_PREFETCH)] |
15916 | "TARGET_AVX512PF" | |
15917 | { | |
15918 | switch (INTVAL (operands[4])) | |
15919 | { | |
3befdeb0 | 15920 | case 3: |
d418f1d9 | 15921 | case 7: |
23afdab7 | 15922 | return "vscatterpf0<ssemodesuffix>pd\t{%5%{%0%}|%5%{%0%}}"; |
15923 | case 2: | |
15924 | case 6: | |
0daf3bbe | 15925 | return "vscatterpf1<ssemodesuffix>pd\t{%5%{%0%}|%5%{%0%}}"; |
15926 | default: | |
15927 | gcc_unreachable (); | |
15928 | } | |
15929 | } | |
15930 | [(set_attr "type" "sse") | |
15931 | (set_attr "prefix" "evex") | |
15932 | (set_attr "mode" "XI")]) | |
15933 | ||
15934 | (define_insn "*avx512pf_scatterpf<mode>df" | |
15935 | [(unspec | |
15936 | [(const_int -1) | |
15937 | (match_operator:V8DF 4 "vsib_mem_operator" | |
15938 | [(unspec:P | |
15939 | [(match_operand:P 1 "vsib_address_operand" "Tv") | |
15940 | (match_operand:VI4_256_8_512 0 "register_operand" "v") | |
15941 | (match_operand:SI 2 "const1248_operand" "n")] | |
15942 | UNSPEC_VSIBADDR)]) | |
d418f1d9 | 15943 | (match_operand:SI 3 "const2367_operand" "n")] |
0daf3bbe | 15944 | UNSPEC_SCATTER_PREFETCH)] |
15945 | "TARGET_AVX512PF" | |
15946 | { | |
15947 | switch (INTVAL (operands[3])) | |
15948 | { | |
3befdeb0 | 15949 | case 3: |
d418f1d9 | 15950 | case 7: |
23afdab7 | 15951 | return "vscatterpf0<ssemodesuffix>pd\t{%4|%4}"; |
15952 | case 2: | |
15953 | case 6: | |
0daf3bbe | 15954 | return "vscatterpf1<ssemodesuffix>pd\t{%4|%4}"; |
15955 | default: | |
15956 | gcc_unreachable (); | |
15957 | } | |
15958 | } | |
15959 | [(set_attr "type" "sse") | |
15960 | (set_attr "prefix" "evex") | |
15961 | (set_attr "mode" "XI")]) | |
15962 | ||
fbf4df62 | 15963 | (define_insn "avx512er_exp2<mode><mask_name><round_saeonly_name>" |
85065932 | 15964 | [(set (match_operand:VF_512 0 "register_operand" "=v") |
15965 | (unspec:VF_512 | |
fbf4df62 | 15966 | [(match_operand:VF_512 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")] |
85065932 | 15967 | UNSPEC_EXP2))] |
15968 | "TARGET_AVX512ER" | |
fbf4df62 | 15969 | "vexp2<ssemodesuffix>\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}" |
85065932 | 15970 | [(set_attr "prefix" "evex") |
c4f782fd | 15971 | (set_attr "type" "sse") |
85065932 | 15972 | (set_attr "mode" "<MODE>")]) |
15973 | ||
fbf4df62 | 15974 | (define_insn "<mask_codefor>avx512er_rcp28<mode><mask_name><round_saeonly_name>" |
85065932 | 15975 | [(set (match_operand:VF_512 0 "register_operand" "=v") |
15976 | (unspec:VF_512 | |
fbf4df62 | 15977 | [(match_operand:VF_512 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")] |
85065932 | 15978 | UNSPEC_RCP28))] |
15979 | "TARGET_AVX512ER" | |
fbf4df62 | 15980 | "vrcp28<ssemodesuffix>\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}" |
85065932 | 15981 | [(set_attr "prefix" "evex") |
c4f782fd | 15982 | (set_attr "type" "sse") |
85065932 | 15983 | (set_attr "mode" "<MODE>")]) |
15984 | ||
fbf4df62 | 15985 | (define_insn "avx512er_vmrcp28<mode><round_saeonly_name>" |
15986 | [(set (match_operand:VF_128 0 "register_operand" "=v") | |
15987 | (vec_merge:VF_128 | |
15988 | (unspec:VF_128 | |
15989 | [(match_operand:VF_128 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")] | |
15990 | UNSPEC_RCP28) | |
15991 | (match_operand:VF_128 2 "register_operand" "v") | |
15992 | (const_int 1)))] | |
15993 | "TARGET_AVX512ER" | |
c4f782fd | 15994 | "vrcp28<ssescalarmodesuffix>\t{<round_saeonly_op3>%1, %2, %0|%0, %2, %1<round_saeonly_op3>}" |
fbf4df62 | 15995 | [(set_attr "length_immediate" "1") |
15996 | (set_attr "prefix" "evex") | |
c4f782fd | 15997 | (set_attr "type" "sse") |
fbf4df62 | 15998 | (set_attr "mode" "<MODE>")]) |
15999 | ||
16000 | (define_insn "<mask_codefor>avx512er_rsqrt28<mode><mask_name><round_saeonly_name>" | |
85065932 | 16001 | [(set (match_operand:VF_512 0 "register_operand" "=v") |
16002 | (unspec:VF_512 | |
fbf4df62 | 16003 | [(match_operand:VF_512 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")] |
85065932 | 16004 | UNSPEC_RSQRT28))] |
16005 | "TARGET_AVX512ER" | |
fbf4df62 | 16006 | "vrsqrt28<ssemodesuffix>\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}" |
85065932 | 16007 | [(set_attr "prefix" "evex") |
c4f782fd | 16008 | (set_attr "type" "sse") |
85065932 | 16009 | (set_attr "mode" "<MODE>")]) |
16010 | ||
fbf4df62 | 16011 | (define_insn "avx512er_vmrsqrt28<mode><round_saeonly_name>" |
16012 | [(set (match_operand:VF_128 0 "register_operand" "=v") | |
16013 | (vec_merge:VF_128 | |
16014 | (unspec:VF_128 | |
16015 | [(match_operand:VF_128 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")] | |
16016 | UNSPEC_RSQRT28) | |
16017 | (match_operand:VF_128 2 "register_operand" "v") | |
16018 | (const_int 1)))] | |
16019 | "TARGET_AVX512ER" | |
c4f782fd | 16020 | "vrsqrt28<ssescalarmodesuffix>\t{<round_saeonly_op3>%1, %2, %0|%0, %2, %1<round_saeonly_op3>}" |
fbf4df62 | 16021 | [(set_attr "length_immediate" "1") |
c4f782fd | 16022 | (set_attr "type" "sse") |
fbf4df62 | 16023 | (set_attr "prefix" "evex") |
16024 | (set_attr "mode" "<MODE>")]) | |
16025 | ||
18525343 | 16026 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; |
16027 | ;; | |
16028 | ;; XOP instructions | |
16029 | ;; | |
16030 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | |
16031 | ||
fd65bafc | 16032 | (define_code_iterator xop_plus [plus ss_plus]) |
16033 | ||
16034 | (define_code_attr macs [(plus "macs") (ss_plus "macss")]) | |
16035 | (define_code_attr madcs [(plus "madcs") (ss_plus "madcss")]) | |
16036 | ||
18525343 | 16037 | ;; XOP parallel integer multiply/add instructions. |
18525343 | 16038 | |
fd65bafc | 16039 | (define_insn "xop_p<macs><ssemodesuffix><ssemodesuffix>" |
16040 | [(set (match_operand:VI24_128 0 "register_operand" "=x") | |
16041 | (xop_plus:VI24_128 | |
16042 | (mult:VI24_128 | |
16043 | (match_operand:VI24_128 1 "nonimmediate_operand" "%x") | |
16044 | (match_operand:VI24_128 2 "nonimmediate_operand" "xm")) | |
5093b7f4 | 16045 | (match_operand:VI24_128 3 "register_operand" "x")))] |
e029cd62 | 16046 | "TARGET_XOP" |
fd65bafc | 16047 | "vp<macs><ssemodesuffix><ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}" |
18525343 | 16048 | [(set_attr "type" "ssemuladd") |
16049 | (set_attr "mode" "TI")]) | |
16050 | ||
fd65bafc | 16051 | (define_insn "xop_p<macs>dql" |
e029cd62 | 16052 | [(set (match_operand:V2DI 0 "register_operand" "=x") |
fd65bafc | 16053 | (xop_plus:V2DI |
18525343 | 16054 | (mult:V2DI |
16055 | (sign_extend:V2DI | |
16056 | (vec_select:V2SI | |
c971711a | 16057 | (match_operand:V4SI 1 "nonimmediate_operand" "%x") |
72b48d28 | 16058 | (parallel [(const_int 0) (const_int 2)]))) |
18525343 | 16059 | (sign_extend:V2DI |
16060 | (vec_select:V2SI | |
fd65bafc | 16061 | (match_operand:V4SI 2 "nonimmediate_operand" "xm") |
72b48d28 | 16062 | (parallel [(const_int 0) (const_int 2)])))) |
5093b7f4 | 16063 | (match_operand:V2DI 3 "register_operand" "x")))] |
e029cd62 | 16064 | "TARGET_XOP" |
fd65bafc | 16065 | "vp<macs>dql\t{%3, %2, %1, %0|%0, %1, %2, %3}" |
18525343 | 16066 | [(set_attr "type" "ssemuladd") |
16067 | (set_attr "mode" "TI")]) | |
16068 | ||
fd65bafc | 16069 | (define_insn "xop_p<macs>dqh" |
e029cd62 | 16070 | [(set (match_operand:V2DI 0 "register_operand" "=x") |
fd65bafc | 16071 | (xop_plus:V2DI |
18525343 | 16072 | (mult:V2DI |
16073 | (sign_extend:V2DI | |
16074 | (vec_select:V2SI | |
c971711a | 16075 | (match_operand:V4SI 1 "nonimmediate_operand" "%x") |
72b48d28 | 16076 | (parallel [(const_int 1) (const_int 3)]))) |
18525343 | 16077 | (sign_extend:V2DI |
16078 | (vec_select:V2SI | |
e029cd62 | 16079 | (match_operand:V4SI 2 "nonimmediate_operand" "xm") |
72b48d28 | 16080 | (parallel [(const_int 1) (const_int 3)])))) |
5093b7f4 | 16081 | (match_operand:V2DI 3 "register_operand" "x")))] |
e029cd62 | 16082 | "TARGET_XOP" |
fd65bafc | 16083 | "vp<macs>dqh\t{%3, %2, %1, %0|%0, %1, %2, %3}" |
18525343 | 16084 | [(set_attr "type" "ssemuladd") |
16085 | (set_attr "mode" "TI")]) | |
16086 | ||
18525343 | 16087 | ;; XOP parallel integer multiply/add instructions for the intrinisics |
fd65bafc | 16088 | (define_insn "xop_p<macs>wd" |
e029cd62 | 16089 | [(set (match_operand:V4SI 0 "register_operand" "=x") |
fd65bafc | 16090 | (xop_plus:V4SI |
18525343 | 16091 | (mult:V4SI |
16092 | (sign_extend:V4SI | |
16093 | (vec_select:V4HI | |
c971711a | 16094 | (match_operand:V8HI 1 "nonimmediate_operand" "%x") |
04d95c72 | 16095 | (parallel [(const_int 1) (const_int 3) |
16096 | (const_int 5) (const_int 7)]))) | |
18525343 | 16097 | (sign_extend:V4SI |
16098 | (vec_select:V4HI | |
e029cd62 | 16099 | (match_operand:V8HI 2 "nonimmediate_operand" "xm") |
04d95c72 | 16100 | (parallel [(const_int 1) (const_int 3) |
16101 | (const_int 5) (const_int 7)])))) | |
5093b7f4 | 16102 | (match_operand:V4SI 3 "register_operand" "x")))] |
e029cd62 | 16103 | "TARGET_XOP" |
fd65bafc | 16104 | "vp<macs>wd\t{%3, %2, %1, %0|%0, %1, %2, %3}" |
18525343 | 16105 | [(set_attr "type" "ssemuladd") |
16106 | (set_attr "mode" "TI")]) | |
16107 | ||
fd65bafc | 16108 | (define_insn "xop_p<madcs>wd" |
e029cd62 | 16109 | [(set (match_operand:V4SI 0 "register_operand" "=x") |
fd65bafc | 16110 | (xop_plus:V4SI |
18525343 | 16111 | (plus:V4SI |
16112 | (mult:V4SI | |
16113 | (sign_extend:V4SI | |
16114 | (vec_select:V4HI | |
c971711a | 16115 | (match_operand:V8HI 1 "nonimmediate_operand" "%x") |
04d95c72 | 16116 | (parallel [(const_int 0) (const_int 2) |
16117 | (const_int 4) (const_int 6)]))) | |
18525343 | 16118 | (sign_extend:V4SI |
16119 | (vec_select:V4HI | |
e029cd62 | 16120 | (match_operand:V8HI 2 "nonimmediate_operand" "xm") |
04d95c72 | 16121 | (parallel [(const_int 0) (const_int 2) |
16122 | (const_int 4) (const_int 6)])))) | |
18525343 | 16123 | (mult:V4SI |
16124 | (sign_extend:V4SI | |
16125 | (vec_select:V4HI | |
16126 | (match_dup 1) | |
04d95c72 | 16127 | (parallel [(const_int 1) (const_int 3) |
16128 | (const_int 5) (const_int 7)]))) | |
18525343 | 16129 | (sign_extend:V4SI |
16130 | (vec_select:V4HI | |
16131 | (match_dup 2) | |
04d95c72 | 16132 | (parallel [(const_int 1) (const_int 3) |
16133 | (const_int 5) (const_int 7)]))))) | |
5093b7f4 | 16134 | (match_operand:V4SI 3 "register_operand" "x")))] |
e029cd62 | 16135 | "TARGET_XOP" |
fd65bafc | 16136 | "vp<madcs>wd\t{%3, %2, %1, %0|%0, %1, %2, %3}" |
18525343 | 16137 | [(set_attr "type" "ssemuladd") |
16138 | (set_attr "mode" "TI")]) | |
16139 | ||
16140 | ;; XOP parallel XMM conditional moves | |
6fe5844b | 16141 | (define_insn "xop_pcmov_<mode><avxsizesuffix>" |
16142 | [(set (match_operand:V 0 "register_operand" "=x,x") | |
16143 | (if_then_else:V | |
16144 | (match_operand:V 3 "nonimmediate_operand" "x,m") | |
3efe840b | 16145 | (match_operand:V 1 "register_operand" "x,x") |
16146 | (match_operand:V 2 "nonimmediate_operand" "xm,x")))] | |
e029cd62 | 16147 | "TARGET_XOP" |
49fce50b | 16148 | "vpcmov\t{%3, %2, %1, %0|%0, %1, %2, %3}" |
18525343 | 16149 | [(set_attr "type" "sse4arg")]) |
16150 | ||
16151 | ;; XOP horizontal add/subtract instructions | |
fd65bafc | 16152 | (define_insn "xop_phadd<u>bw" |
18525343 | 16153 | [(set (match_operand:V8HI 0 "register_operand" "=x") |
16154 | (plus:V8HI | |
fd65bafc | 16155 | (any_extend:V8HI |
18525343 | 16156 | (vec_select:V8QI |
16157 | (match_operand:V16QI 1 "nonimmediate_operand" "xm") | |
04d95c72 | 16158 | (parallel [(const_int 0) (const_int 2) |
16159 | (const_int 4) (const_int 6) | |
16160 | (const_int 8) (const_int 10) | |
16161 | (const_int 12) (const_int 14)]))) | |
fd65bafc | 16162 | (any_extend:V8HI |
18525343 | 16163 | (vec_select:V8QI |
16164 | (match_dup 1) | |
04d95c72 | 16165 | (parallel [(const_int 1) (const_int 3) |
16166 | (const_int 5) (const_int 7) | |
16167 | (const_int 9) (const_int 11) | |
16168 | (const_int 13) (const_int 15)])))))] | |
18525343 | 16169 | "TARGET_XOP" |
fd65bafc | 16170 | "vphadd<u>bw\t{%1, %0|%0, %1}" |
18525343 | 16171 | [(set_attr "type" "sseiadd1")]) |
16172 | ||
fd65bafc | 16173 | (define_insn "xop_phadd<u>bd" |
18525343 | 16174 | [(set (match_operand:V4SI 0 "register_operand" "=x") |
16175 | (plus:V4SI | |
16176 | (plus:V4SI | |
fd65bafc | 16177 | (any_extend:V4SI |
18525343 | 16178 | (vec_select:V4QI |
16179 | (match_operand:V16QI 1 "nonimmediate_operand" "xm") | |
04d95c72 | 16180 | (parallel [(const_int 0) (const_int 4) |
16181 | (const_int 8) (const_int 12)]))) | |
fd65bafc | 16182 | (any_extend:V4SI |
18525343 | 16183 | (vec_select:V4QI |
16184 | (match_dup 1) | |
04d95c72 | 16185 | (parallel [(const_int 1) (const_int 5) |
16186 | (const_int 9) (const_int 13)])))) | |
18525343 | 16187 | (plus:V4SI |
fd65bafc | 16188 | (any_extend:V4SI |
18525343 | 16189 | (vec_select:V4QI |
16190 | (match_dup 1) | |
04d95c72 | 16191 | (parallel [(const_int 2) (const_int 6) |
16192 | (const_int 10) (const_int 14)]))) | |
fd65bafc | 16193 | (any_extend:V4SI |
18525343 | 16194 | (vec_select:V4QI |
16195 | (match_dup 1) | |
04d95c72 | 16196 | (parallel [(const_int 3) (const_int 7) |
16197 | (const_int 11) (const_int 15)]))))))] | |
18525343 | 16198 | "TARGET_XOP" |
fd65bafc | 16199 | "vphadd<u>bd\t{%1, %0|%0, %1}" |
18525343 | 16200 | [(set_attr "type" "sseiadd1")]) |
16201 | ||
fd65bafc | 16202 | (define_insn "xop_phadd<u>bq" |
18525343 | 16203 | [(set (match_operand:V2DI 0 "register_operand" "=x") |
16204 | (plus:V2DI | |
16205 | (plus:V2DI | |
16206 | (plus:V2DI | |
fd65bafc | 16207 | (any_extend:V2DI |
18525343 | 16208 | (vec_select:V2QI |
16209 | (match_operand:V16QI 1 "nonimmediate_operand" "xm") | |
5093b7f4 | 16210 | (parallel [(const_int 0) (const_int 8)]))) |
fd65bafc | 16211 | (any_extend:V2DI |
18525343 | 16212 | (vec_select:V2QI |
16213 | (match_dup 1) | |
5093b7f4 | 16214 | (parallel [(const_int 1) (const_int 9)])))) |
18525343 | 16215 | (plus:V2DI |
fd65bafc | 16216 | (any_extend:V2DI |
18525343 | 16217 | (vec_select:V2QI |
16218 | (match_dup 1) | |
5093b7f4 | 16219 | (parallel [(const_int 2) (const_int 10)]))) |
fd65bafc | 16220 | (any_extend:V2DI |
18525343 | 16221 | (vec_select:V2QI |
16222 | (match_dup 1) | |
5093b7f4 | 16223 | (parallel [(const_int 3) (const_int 11)]))))) |
18525343 | 16224 | (plus:V2DI |
16225 | (plus:V2DI | |
fd65bafc | 16226 | (any_extend:V2DI |
18525343 | 16227 | (vec_select:V2QI |
16228 | (match_dup 1) | |
5093b7f4 | 16229 | (parallel [(const_int 4) (const_int 12)]))) |
fd65bafc | 16230 | (any_extend:V2DI |
18525343 | 16231 | (vec_select:V2QI |
16232 | (match_dup 1) | |
5093b7f4 | 16233 | (parallel [(const_int 5) (const_int 13)])))) |
18525343 | 16234 | (plus:V2DI |
fd65bafc | 16235 | (any_extend:V2DI |
18525343 | 16236 | (vec_select:V2QI |
16237 | (match_dup 1) | |
5093b7f4 | 16238 | (parallel [(const_int 6) (const_int 14)]))) |
fd65bafc | 16239 | (any_extend:V2DI |
18525343 | 16240 | (vec_select:V2QI |
16241 | (match_dup 1) | |
5093b7f4 | 16242 | (parallel [(const_int 7) (const_int 15)])))))))] |
18525343 | 16243 | "TARGET_XOP" |
fd65bafc | 16244 | "vphadd<u>bq\t{%1, %0|%0, %1}" |
18525343 | 16245 | [(set_attr "type" "sseiadd1")]) |
16246 | ||
fd65bafc | 16247 | (define_insn "xop_phadd<u>wd" |
18525343 | 16248 | [(set (match_operand:V4SI 0 "register_operand" "=x") |
16249 | (plus:V4SI | |
fd65bafc | 16250 | (any_extend:V4SI |
18525343 | 16251 | (vec_select:V4HI |
16252 | (match_operand:V8HI 1 "nonimmediate_operand" "xm") | |
04d95c72 | 16253 | (parallel [(const_int 0) (const_int 2) |
16254 | (const_int 4) (const_int 6)]))) | |
fd65bafc | 16255 | (any_extend:V4SI |
18525343 | 16256 | (vec_select:V4HI |
16257 | (match_dup 1) | |
04d95c72 | 16258 | (parallel [(const_int 1) (const_int 3) |
16259 | (const_int 5) (const_int 7)])))))] | |
18525343 | 16260 | "TARGET_XOP" |
fd65bafc | 16261 | "vphadd<u>wd\t{%1, %0|%0, %1}" |
18525343 | 16262 | [(set_attr "type" "sseiadd1")]) |
16263 | ||
fd65bafc | 16264 | (define_insn "xop_phadd<u>wq" |
18525343 | 16265 | [(set (match_operand:V2DI 0 "register_operand" "=x") |
16266 | (plus:V2DI | |
16267 | (plus:V2DI | |
fd65bafc | 16268 | (any_extend:V2DI |
18525343 | 16269 | (vec_select:V2HI |
16270 | (match_operand:V8HI 1 "nonimmediate_operand" "xm") | |
04d95c72 | 16271 | (parallel [(const_int 0) (const_int 4)]))) |
fd65bafc | 16272 | (any_extend:V2DI |
18525343 | 16273 | (vec_select:V2HI |
16274 | (match_dup 1) | |
04d95c72 | 16275 | (parallel [(const_int 1) (const_int 5)])))) |
18525343 | 16276 | (plus:V2DI |
fd65bafc | 16277 | (any_extend:V2DI |
18525343 | 16278 | (vec_select:V2HI |
16279 | (match_dup 1) | |
04d95c72 | 16280 | (parallel [(const_int 2) (const_int 6)]))) |
fd65bafc | 16281 | (any_extend:V2DI |
18525343 | 16282 | (vec_select:V2HI |
16283 | (match_dup 1) | |
04d95c72 | 16284 | (parallel [(const_int 3) (const_int 7)]))))))] |
18525343 | 16285 | "TARGET_XOP" |
fd65bafc | 16286 | "vphadd<u>wq\t{%1, %0|%0, %1}" |
18525343 | 16287 | [(set_attr "type" "sseiadd1")]) |
16288 | ||
fd65bafc | 16289 | (define_insn "xop_phadd<u>dq" |
18525343 | 16290 | [(set (match_operand:V2DI 0 "register_operand" "=x") |
16291 | (plus:V2DI | |
fd65bafc | 16292 | (any_extend:V2DI |
18525343 | 16293 | (vec_select:V2SI |
16294 | (match_operand:V4SI 1 "nonimmediate_operand" "xm") | |
04d95c72 | 16295 | (parallel [(const_int 0) (const_int 2)]))) |
fd65bafc | 16296 | (any_extend:V2DI |
18525343 | 16297 | (vec_select:V2SI |
16298 | (match_dup 1) | |
04d95c72 | 16299 | (parallel [(const_int 1) (const_int 3)])))))] |
18525343 | 16300 | "TARGET_XOP" |
fd65bafc | 16301 | "vphadd<u>dq\t{%1, %0|%0, %1}" |
18525343 | 16302 | [(set_attr "type" "sseiadd1")]) |
16303 | ||
16304 | (define_insn "xop_phsubbw" | |
16305 | [(set (match_operand:V8HI 0 "register_operand" "=x") | |
16306 | (minus:V8HI | |
16307 | (sign_extend:V8HI | |
16308 | (vec_select:V8QI | |
16309 | (match_operand:V16QI 1 "nonimmediate_operand" "xm") | |
04d95c72 | 16310 | (parallel [(const_int 0) (const_int 2) |
16311 | (const_int 4) (const_int 6) | |
16312 | (const_int 8) (const_int 10) | |
16313 | (const_int 12) (const_int 14)]))) | |
18525343 | 16314 | (sign_extend:V8HI |
16315 | (vec_select:V8QI | |
16316 | (match_dup 1) | |
04d95c72 | 16317 | (parallel [(const_int 1) (const_int 3) |
16318 | (const_int 5) (const_int 7) | |
16319 | (const_int 9) (const_int 11) | |
16320 | (const_int 13) (const_int 15)])))))] | |
18525343 | 16321 | "TARGET_XOP" |
16322 | "vphsubbw\t{%1, %0|%0, %1}" | |
16323 | [(set_attr "type" "sseiadd1")]) | |
16324 | ||
16325 | (define_insn "xop_phsubwd" | |
16326 | [(set (match_operand:V4SI 0 "register_operand" "=x") | |
16327 | (minus:V4SI | |
16328 | (sign_extend:V4SI | |
16329 | (vec_select:V4HI | |
16330 | (match_operand:V8HI 1 "nonimmediate_operand" "xm") | |
04d95c72 | 16331 | (parallel [(const_int 0) (const_int 2) |
16332 | (const_int 4) (const_int 6)]))) | |
18525343 | 16333 | (sign_extend:V4SI |
16334 | (vec_select:V4HI | |
16335 | (match_dup 1) | |
04d95c72 | 16336 | (parallel [(const_int 1) (const_int 3) |
16337 | (const_int 5) (const_int 7)])))))] | |
18525343 | 16338 | "TARGET_XOP" |
16339 | "vphsubwd\t{%1, %0|%0, %1}" | |
16340 | [(set_attr "type" "sseiadd1")]) | |
16341 | ||
16342 | (define_insn "xop_phsubdq" | |
16343 | [(set (match_operand:V2DI 0 "register_operand" "=x") | |
16344 | (minus:V2DI | |
16345 | (sign_extend:V2DI | |
16346 | (vec_select:V2SI | |
16347 | (match_operand:V4SI 1 "nonimmediate_operand" "xm") | |
04d95c72 | 16348 | (parallel [(const_int 0) (const_int 2)]))) |
18525343 | 16349 | (sign_extend:V2DI |
16350 | (vec_select:V2SI | |
16351 | (match_dup 1) | |
04d95c72 | 16352 | (parallel [(const_int 1) (const_int 3)])))))] |
18525343 | 16353 | "TARGET_XOP" |
16354 | "vphsubdq\t{%1, %0|%0, %1}" | |
16355 | [(set_attr "type" "sseiadd1")]) | |
16356 | ||
16357 | ;; XOP permute instructions | |
16358 | (define_insn "xop_pperm" | |
e029cd62 | 16359 | [(set (match_operand:V16QI 0 "register_operand" "=x,x") |
18525343 | 16360 | (unspec:V16QI |
e029cd62 | 16361 | [(match_operand:V16QI 1 "register_operand" "x,x") |
16362 | (match_operand:V16QI 2 "nonimmediate_operand" "x,m") | |
16363 | (match_operand:V16QI 3 "nonimmediate_operand" "xm,x")] | |
18525343 | 16364 | UNSPEC_XOP_PERMUTE))] |
e029cd62 | 16365 | "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))" |
18525343 | 16366 | "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}" |
16367 | [(set_attr "type" "sse4arg") | |
16368 | (set_attr "mode" "TI")]) | |
16369 | ||
16370 | ;; XOP pack instructions that combine two vectors into a smaller vector | |
16371 | (define_insn "xop_pperm_pack_v2di_v4si" | |
e029cd62 | 16372 | [(set (match_operand:V4SI 0 "register_operand" "=x,x") |
18525343 | 16373 | (vec_concat:V4SI |
16374 | (truncate:V2SI | |
e029cd62 | 16375 | (match_operand:V2DI 1 "register_operand" "x,x")) |
18525343 | 16376 | (truncate:V2SI |
e029cd62 | 16377 | (match_operand:V2DI 2 "nonimmediate_operand" "x,m")))) |
16378 | (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x"))] | |
16379 | "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))" | |
18525343 | 16380 | "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}" |
16381 | [(set_attr "type" "sse4arg") | |
16382 | (set_attr "mode" "TI")]) | |
16383 | ||
16384 | (define_insn "xop_pperm_pack_v4si_v8hi" | |
e029cd62 | 16385 | [(set (match_operand:V8HI 0 "register_operand" "=x,x") |
18525343 | 16386 | (vec_concat:V8HI |
16387 | (truncate:V4HI | |
e029cd62 | 16388 | (match_operand:V4SI 1 "register_operand" "x,x")) |
18525343 | 16389 | (truncate:V4HI |
e029cd62 | 16390 | (match_operand:V4SI 2 "nonimmediate_operand" "x,m")))) |
16391 | (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x"))] | |
16392 | "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))" | |
18525343 | 16393 | "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}" |
16394 | [(set_attr "type" "sse4arg") | |
16395 | (set_attr "mode" "TI")]) | |
16396 | ||
16397 | (define_insn "xop_pperm_pack_v8hi_v16qi" | |
e029cd62 | 16398 | [(set (match_operand:V16QI 0 "register_operand" "=x,x") |
18525343 | 16399 | (vec_concat:V16QI |
16400 | (truncate:V8QI | |
e029cd62 | 16401 | (match_operand:V8HI 1 "register_operand" "x,x")) |
18525343 | 16402 | (truncate:V8QI |
e029cd62 | 16403 | (match_operand:V8HI 2 "nonimmediate_operand" "x,m")))) |
16404 | (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x"))] | |
16405 | "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))" | |
18525343 | 16406 | "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}" |
16407 | [(set_attr "type" "sse4arg") | |
16408 | (set_attr "mode" "TI")]) | |
16409 | ||
16410 | ;; XOP packed rotate instructions | |
16411 | (define_expand "rotl<mode>3" | |
abd4f58b | 16412 | [(set (match_operand:VI_128 0 "register_operand") |
6fe5844b | 16413 | (rotate:VI_128 |
abd4f58b | 16414 | (match_operand:VI_128 1 "nonimmediate_operand") |
18525343 | 16415 | (match_operand:SI 2 "general_operand")))] |
16416 | "TARGET_XOP" | |
16417 | { | |
16418 | /* If we were given a scalar, convert it to parallel */ | |
16419 | if (! const_0_to_<sserotatemax>_operand (operands[2], SImode)) | |
16420 | { | |
16421 | rtvec vs = rtvec_alloc (<ssescalarnum>); | |
16422 | rtx par = gen_rtx_PARALLEL (<MODE>mode, vs); | |
16423 | rtx reg = gen_reg_rtx (<MODE>mode); | |
16424 | rtx op2 = operands[2]; | |
16425 | int i; | |
16426 | ||
16427 | if (GET_MODE (op2) != <ssescalarmode>mode) | |
5deb404d | 16428 | { |
18525343 | 16429 | op2 = gen_reg_rtx (<ssescalarmode>mode); |
16430 | convert_move (op2, operands[2], false); | |
16431 | } | |
16432 | ||
16433 | for (i = 0; i < <ssescalarnum>; i++) | |
16434 | RTVEC_ELT (vs, i) = op2; | |
16435 | ||
16436 | emit_insn (gen_vec_init<mode> (reg, par)); | |
16437 | emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], reg)); | |
16438 | DONE; | |
16439 | } | |
16440 | }) | |
16441 | ||
16442 | (define_expand "rotr<mode>3" | |
abd4f58b | 16443 | [(set (match_operand:VI_128 0 "register_operand") |
6fe5844b | 16444 | (rotatert:VI_128 |
abd4f58b | 16445 | (match_operand:VI_128 1 "nonimmediate_operand") |
18525343 | 16446 | (match_operand:SI 2 "general_operand")))] |
16447 | "TARGET_XOP" | |
16448 | { | |
16449 | /* If we were given a scalar, convert it to parallel */ | |
16450 | if (! const_0_to_<sserotatemax>_operand (operands[2], SImode)) | |
16451 | { | |
16452 | rtvec vs = rtvec_alloc (<ssescalarnum>); | |
16453 | rtx par = gen_rtx_PARALLEL (<MODE>mode, vs); | |
16454 | rtx neg = gen_reg_rtx (<MODE>mode); | |
16455 | rtx reg = gen_reg_rtx (<MODE>mode); | |
16456 | rtx op2 = operands[2]; | |
16457 | int i; | |
16458 | ||
16459 | if (GET_MODE (op2) != <ssescalarmode>mode) | |
5deb404d | 16460 | { |
18525343 | 16461 | op2 = gen_reg_rtx (<ssescalarmode>mode); |
16462 | convert_move (op2, operands[2], false); | |
16463 | } | |
16464 | ||
16465 | for (i = 0; i < <ssescalarnum>; i++) | |
16466 | RTVEC_ELT (vs, i) = op2; | |
16467 | ||
16468 | emit_insn (gen_vec_init<mode> (reg, par)); | |
16469 | emit_insn (gen_neg<mode>2 (neg, reg)); | |
16470 | emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], neg)); | |
16471 | DONE; | |
16472 | } | |
16473 | }) | |
16474 | ||
16475 | (define_insn "xop_rotl<mode>3" | |
6fe5844b | 16476 | [(set (match_operand:VI_128 0 "register_operand" "=x") |
16477 | (rotate:VI_128 | |
16478 | (match_operand:VI_128 1 "nonimmediate_operand" "xm") | |
18525343 | 16479 | (match_operand:SI 2 "const_0_to_<sserotatemax>_operand" "n")))] |
16480 | "TARGET_XOP" | |
63d5e521 | 16481 | "vprot<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}" |
18525343 | 16482 | [(set_attr "type" "sseishft") |
16483 | (set_attr "length_immediate" "1") | |
16484 | (set_attr "mode" "TI")]) | |
16485 | ||
16486 | (define_insn "xop_rotr<mode>3" | |
6fe5844b | 16487 | [(set (match_operand:VI_128 0 "register_operand" "=x") |
16488 | (rotatert:VI_128 | |
16489 | (match_operand:VI_128 1 "nonimmediate_operand" "xm") | |
18525343 | 16490 | (match_operand:SI 2 "const_0_to_<sserotatemax>_operand" "n")))] |
16491 | "TARGET_XOP" | |
16492 | { | |
a31cce64 | 16493 | operands[3] |
16494 | = GEN_INT (GET_MODE_BITSIZE (<ssescalarmode>mode) - INTVAL (operands[2])); | |
63d5e521 | 16495 | return \"vprot<ssemodesuffix>\t{%3, %1, %0|%0, %1, %3}\"; |
18525343 | 16496 | } |
16497 | [(set_attr "type" "sseishft") | |
16498 | (set_attr "length_immediate" "1") | |
16499 | (set_attr "mode" "TI")]) | |
16500 | ||
16501 | (define_expand "vrotr<mode>3" | |
abd4f58b | 16502 | [(match_operand:VI_128 0 "register_operand") |
16503 | (match_operand:VI_128 1 "register_operand") | |
16504 | (match_operand:VI_128 2 "register_operand")] | |
18525343 | 16505 | "TARGET_XOP" |
16506 | { | |
16507 | rtx reg = gen_reg_rtx (<MODE>mode); | |
16508 | emit_insn (gen_neg<mode>2 (reg, operands[2])); | |
16509 | emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], reg)); | |
16510 | DONE; | |
16511 | }) | |
16512 | ||
16513 | (define_expand "vrotl<mode>3" | |
abd4f58b | 16514 | [(match_operand:VI_128 0 "register_operand") |
16515 | (match_operand:VI_128 1 "register_operand") | |
16516 | (match_operand:VI_128 2 "register_operand")] | |
18525343 | 16517 | "TARGET_XOP" |
16518 | { | |
16519 | emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], operands[2])); | |
16520 | DONE; | |
16521 | }) | |
16522 | ||
16523 | (define_insn "xop_vrotl<mode>3" | |
6fe5844b | 16524 | [(set (match_operand:VI_128 0 "register_operand" "=x,x") |
16525 | (if_then_else:VI_128 | |
16526 | (ge:VI_128 | |
16527 | (match_operand:VI_128 2 "nonimmediate_operand" "x,m") | |
18525343 | 16528 | (const_int 0)) |
6fe5844b | 16529 | (rotate:VI_128 |
16530 | (match_operand:VI_128 1 "nonimmediate_operand" "xm,x") | |
18525343 | 16531 | (match_dup 2)) |
6fe5844b | 16532 | (rotatert:VI_128 |
18525343 | 16533 | (match_dup 1) |
6fe5844b | 16534 | (neg:VI_128 (match_dup 2)))))] |
e029cd62 | 16535 | "TARGET_XOP && !(MEM_P (operands[1]) && MEM_P (operands[2]))" |
63d5e521 | 16536 | "vprot<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}" |
18525343 | 16537 | [(set_attr "type" "sseishft") |
16538 | (set_attr "prefix_data16" "0") | |
16539 | (set_attr "prefix_extra" "2") | |
16540 | (set_attr "mode" "TI")]) | |
16541 | ||
16542 | ;; XOP packed shift instructions. | |
18525343 | 16543 | (define_expand "vlshr<mode>3" |
abd4f58b | 16544 | [(set (match_operand:VI12_128 0 "register_operand") |
8e73e3ae | 16545 | (lshiftrt:VI12_128 |
abd4f58b | 16546 | (match_operand:VI12_128 1 "register_operand") |
16547 | (match_operand:VI12_128 2 "nonimmediate_operand")))] | |
18525343 | 16548 | "TARGET_XOP" |
16549 | { | |
16550 | rtx neg = gen_reg_rtx (<MODE>mode); | |
16551 | emit_insn (gen_neg<mode>2 (neg, operands[2])); | |
9f689820 | 16552 | emit_insn (gen_xop_shl<mode>3 (operands[0], operands[1], neg)); |
18525343 | 16553 | DONE; |
16554 | }) | |
16555 | ||
7d079352 | 16556 | (define_expand "vlshr<mode>3" |
abd4f58b | 16557 | [(set (match_operand:VI48_128 0 "register_operand") |
7d079352 | 16558 | (lshiftrt:VI48_128 |
abd4f58b | 16559 | (match_operand:VI48_128 1 "register_operand") |
16560 | (match_operand:VI48_128 2 "nonimmediate_operand")))] | |
7d079352 | 16561 | "TARGET_AVX2 || TARGET_XOP" |
16562 | { | |
16563 | if (!TARGET_AVX2) | |
16564 | { | |
16565 | rtx neg = gen_reg_rtx (<MODE>mode); | |
16566 | emit_insn (gen_neg<mode>2 (neg, operands[2])); | |
9f689820 | 16567 | emit_insn (gen_xop_shl<mode>3 (operands[0], operands[1], neg)); |
7d079352 | 16568 | DONE; |
16569 | } | |
16570 | }) | |
16571 | ||
d2ff59d6 | 16572 | (define_expand "vlshr<mode>3" |
16573 | [(set (match_operand:VI48_512 0 "register_operand") | |
16574 | (lshiftrt:VI48_512 | |
16575 | (match_operand:VI48_512 1 "register_operand") | |
16576 | (match_operand:VI48_512 2 "nonimmediate_operand")))] | |
16577 | "TARGET_AVX512F") | |
16578 | ||
7d079352 | 16579 | (define_expand "vlshr<mode>3" |
abd4f58b | 16580 | [(set (match_operand:VI48_256 0 "register_operand") |
7d079352 | 16581 | (lshiftrt:VI48_256 |
abd4f58b | 16582 | (match_operand:VI48_256 1 "register_operand") |
16583 | (match_operand:VI48_256 2 "nonimmediate_operand")))] | |
7d079352 | 16584 | "TARGET_AVX2") |
16585 | ||
05e7532b | 16586 | (define_expand "vashrv8hi3<mask_name>" |
16587 | [(set (match_operand:V8HI 0 "register_operand") | |
16588 | (ashiftrt:V8HI | |
16589 | (match_operand:V8HI 1 "register_operand") | |
16590 | (match_operand:V8HI 2 "nonimmediate_operand")))] | |
3e41ffb2 | 16591 | "TARGET_XOP || (TARGET_AVX512BW && TARGET_AVX512VL)" |
18525343 | 16592 | { |
3e41ffb2 | 16593 | if (TARGET_XOP) |
16594 | { | |
05e7532b | 16595 | rtx neg = gen_reg_rtx (V8HImode); |
16596 | emit_insn (gen_negv8hi2 (neg, operands[2])); | |
16597 | emit_insn (gen_xop_shav8hi3 (operands[0], operands[1], neg)); | |
3e41ffb2 | 16598 | DONE; |
16599 | } | |
16600 | }) | |
16601 | ||
05e7532b | 16602 | (define_expand "vashrv16qi3" |
16603 | [(set (match_operand:V16QI 0 "register_operand") | |
16604 | (ashiftrt:V16QI | |
16605 | (match_operand:V16QI 1 "register_operand") | |
16606 | (match_operand:V16QI 2 "nonimmediate_operand")))] | |
16607 | "TARGET_XOP" | |
16608 | { | |
16609 | rtx neg = gen_reg_rtx (V16QImode); | |
16610 | emit_insn (gen_negv16qi2 (neg, operands[2])); | |
16611 | emit_insn (gen_xop_shav16qi3 (operands[0], operands[1], neg)); | |
16612 | DONE; | |
16613 | }) | |
16614 | ||
3e41ffb2 | 16615 | (define_expand "vashrv2di3<mask_name>" |
16616 | [(set (match_operand:V2DI 0 "register_operand") | |
16617 | (ashiftrt:V2DI | |
16618 | (match_operand:V2DI 1 "register_operand") | |
16619 | (match_operand:V2DI 2 "nonimmediate_operand")))] | |
16620 | "TARGET_XOP || TARGET_AVX512VL" | |
16621 | { | |
16622 | if (TARGET_XOP) | |
16623 | { | |
16624 | rtx neg = gen_reg_rtx (V2DImode); | |
16625 | emit_insn (gen_negv2di2 (neg, operands[2])); | |
16626 | emit_insn (gen_xop_shav2di3 (operands[0], operands[1], neg)); | |
16627 | DONE; | |
16628 | } | |
18525343 | 16629 | }) |
16630 | ||
7d079352 | 16631 | (define_expand "vashrv4si3" |
abd4f58b | 16632 | [(set (match_operand:V4SI 0 "register_operand") |
16633 | (ashiftrt:V4SI (match_operand:V4SI 1 "register_operand") | |
16634 | (match_operand:V4SI 2 "nonimmediate_operand")))] | |
7d079352 | 16635 | "TARGET_AVX2 || TARGET_XOP" |
16636 | { | |
16637 | if (!TARGET_AVX2) | |
16638 | { | |
16639 | rtx neg = gen_reg_rtx (V4SImode); | |
16640 | emit_insn (gen_negv4si2 (neg, operands[2])); | |
9f689820 | 16641 | emit_insn (gen_xop_shav4si3 (operands[0], operands[1], neg)); |
7d079352 | 16642 | DONE; |
16643 | } | |
16644 | }) | |
16645 | ||
2344eae2 | 16646 | (define_expand "vashrv16si3" |
16647 | [(set (match_operand:V16SI 0 "register_operand") | |
16648 | (ashiftrt:V16SI (match_operand:V16SI 1 "register_operand") | |
16649 | (match_operand:V16SI 2 "nonimmediate_operand")))] | |
16650 | "TARGET_AVX512F") | |
16651 | ||
7d079352 | 16652 | (define_expand "vashrv8si3" |
abd4f58b | 16653 | [(set (match_operand:V8SI 0 "register_operand") |
16654 | (ashiftrt:V8SI (match_operand:V8SI 1 "register_operand") | |
16655 | (match_operand:V8SI 2 "nonimmediate_operand")))] | |
7d079352 | 16656 | "TARGET_AVX2") |
16657 | ||
18525343 | 16658 | (define_expand "vashl<mode>3" |
abd4f58b | 16659 | [(set (match_operand:VI12_128 0 "register_operand") |
8e73e3ae | 16660 | (ashift:VI12_128 |
abd4f58b | 16661 | (match_operand:VI12_128 1 "register_operand") |
16662 | (match_operand:VI12_128 2 "nonimmediate_operand")))] | |
18525343 | 16663 | "TARGET_XOP" |
16664 | { | |
9f689820 | 16665 | emit_insn (gen_xop_sha<mode>3 (operands[0], operands[1], operands[2])); |
18525343 | 16666 | DONE; |
16667 | }) | |
16668 | ||
7d079352 | 16669 | (define_expand "vashl<mode>3" |
abd4f58b | 16670 | [(set (match_operand:VI48_128 0 "register_operand") |
7d079352 | 16671 | (ashift:VI48_128 |
abd4f58b | 16672 | (match_operand:VI48_128 1 "register_operand") |
16673 | (match_operand:VI48_128 2 "nonimmediate_operand")))] | |
7d079352 | 16674 | "TARGET_AVX2 || TARGET_XOP" |
16675 | { | |
16676 | if (!TARGET_AVX2) | |
16677 | { | |
16678 | operands[2] = force_reg (<MODE>mode, operands[2]); | |
9f689820 | 16679 | emit_insn (gen_xop_sha<mode>3 (operands[0], operands[1], operands[2])); |
7d079352 | 16680 | DONE; |
16681 | } | |
16682 | }) | |
16683 | ||
d2ff59d6 | 16684 | (define_expand "vashl<mode>3" |
16685 | [(set (match_operand:VI48_512 0 "register_operand") | |
16686 | (ashift:VI48_512 | |
16687 | (match_operand:VI48_512 1 "register_operand") | |
16688 | (match_operand:VI48_512 2 "nonimmediate_operand")))] | |
16689 | "TARGET_AVX512F") | |
16690 | ||
7d079352 | 16691 | (define_expand "vashl<mode>3" |
abd4f58b | 16692 | [(set (match_operand:VI48_256 0 "register_operand") |
7d079352 | 16693 | (ashift:VI48_256 |
abd4f58b | 16694 | (match_operand:VI48_256 1 "register_operand") |
16695 | (match_operand:VI48_256 2 "nonimmediate_operand")))] | |
7d079352 | 16696 | "TARGET_AVX2") |
16697 | ||
9f689820 | 16698 | (define_insn "xop_sha<mode>3" |
6fe5844b | 16699 | [(set (match_operand:VI_128 0 "register_operand" "=x,x") |
16700 | (if_then_else:VI_128 | |
16701 | (ge:VI_128 | |
16702 | (match_operand:VI_128 2 "nonimmediate_operand" "x,m") | |
18525343 | 16703 | (const_int 0)) |
6fe5844b | 16704 | (ashift:VI_128 |
16705 | (match_operand:VI_128 1 "nonimmediate_operand" "xm,x") | |
18525343 | 16706 | (match_dup 2)) |
6fe5844b | 16707 | (ashiftrt:VI_128 |
18525343 | 16708 | (match_dup 1) |
6fe5844b | 16709 | (neg:VI_128 (match_dup 2)))))] |
e029cd62 | 16710 | "TARGET_XOP && !(MEM_P (operands[1]) && MEM_P (operands[2]))" |
63d5e521 | 16711 | "vpsha<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}" |
18525343 | 16712 | [(set_attr "type" "sseishft") |
16713 | (set_attr "prefix_data16" "0") | |
16714 | (set_attr "prefix_extra" "2") | |
16715 | (set_attr "mode" "TI")]) | |
16716 | ||
9f689820 | 16717 | (define_insn "xop_shl<mode>3" |
6fe5844b | 16718 | [(set (match_operand:VI_128 0 "register_operand" "=x,x") |
16719 | (if_then_else:VI_128 | |
16720 | (ge:VI_128 | |
16721 | (match_operand:VI_128 2 "nonimmediate_operand" "x,m") | |
18525343 | 16722 | (const_int 0)) |
6fe5844b | 16723 | (ashift:VI_128 |
16724 | (match_operand:VI_128 1 "nonimmediate_operand" "xm,x") | |
18525343 | 16725 | (match_dup 2)) |
6fe5844b | 16726 | (lshiftrt:VI_128 |
18525343 | 16727 | (match_dup 1) |
6fe5844b | 16728 | (neg:VI_128 (match_dup 2)))))] |
e029cd62 | 16729 | "TARGET_XOP && !(MEM_P (operands[1]) && MEM_P (operands[2]))" |
63d5e521 | 16730 | "vpshl<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}" |
18525343 | 16731 | [(set_attr "type" "sseishft") |
16732 | (set_attr "prefix_data16" "0") | |
16733 | (set_attr "prefix_extra" "2") | |
16734 | (set_attr "mode" "TI")]) | |
16735 | ||
1f983100 | 16736 | (define_expand "<shift_insn><mode>3" |
201f262d | 16737 | [(set (match_operand:VI1_AVX512 0 "register_operand") |
16738 | (any_shift:VI1_AVX512 | |
16739 | (match_operand:VI1_AVX512 1 "register_operand") | |
abd4f58b | 16740 | (match_operand:SI 2 "nonmemory_operand")))] |
1f983100 | 16741 | "TARGET_SSE2" |
18525343 | 16742 | { |
1f983100 | 16743 | if (TARGET_XOP && <MODE>mode == V16QImode) |
16744 | { | |
16745 | bool negate = false; | |
16746 | rtx (*gen) (rtx, rtx, rtx); | |
16747 | rtx tmp, par; | |
16748 | int i; | |
885c8b76 | 16749 | |
1f983100 | 16750 | if (<CODE> != ASHIFT) |
16751 | { | |
16752 | if (CONST_INT_P (operands[2])) | |
16753 | operands[2] = GEN_INT (-INTVAL (operands[2])); | |
16754 | else | |
16755 | negate = true; | |
16756 | } | |
16757 | par = gen_rtx_PARALLEL (V16QImode, rtvec_alloc (16)); | |
16758 | for (i = 0; i < 16; i++) | |
16759 | XVECEXP (par, 0, i) = operands[2]; | |
18525343 | 16760 | |
1f983100 | 16761 | tmp = gen_reg_rtx (V16QImode); |
16762 | emit_insn (gen_vec_initv16qi (tmp, par)); | |
18525343 | 16763 | |
1f983100 | 16764 | if (negate) |
16765 | emit_insn (gen_negv16qi2 (tmp, tmp)); | |
885c8b76 | 16766 | |
1f983100 | 16767 | gen = (<CODE> == LSHIFTRT ? gen_xop_shlv16qi3 : gen_xop_shav16qi3); |
16768 | emit_insn (gen (operands[0], operands[1], tmp)); | |
16769 | } | |
18525343 | 16770 | else |
1f983100 | 16771 | ix86_expand_vecop_qihi (<CODE>, operands[0], operands[1], operands[2]); |
18525343 | 16772 | DONE; |
16773 | }) | |
16774 | ||
16775 | (define_expand "ashrv2di3" | |
abd4f58b | 16776 | [(set (match_operand:V2DI 0 "register_operand") |
3297e0a4 | 16777 | (ashiftrt:V2DI |
abd4f58b | 16778 | (match_operand:V2DI 1 "register_operand") |
16779 | (match_operand:DI 2 "nonmemory_operand")))] | |
4f545baf | 16780 | "TARGET_XOP || TARGET_AVX512VL" |
18525343 | 16781 | { |
4f545baf | 16782 | if (!TARGET_AVX512VL) |
16783 | { | |
16784 | rtx reg = gen_reg_rtx (V2DImode); | |
16785 | rtx par; | |
16786 | bool negate = false; | |
16787 | int i; | |
18525343 | 16788 | |
4f545baf | 16789 | if (CONST_INT_P (operands[2])) |
16790 | operands[2] = GEN_INT (-INTVAL (operands[2])); | |
16791 | else | |
16792 | negate = true; | |
885c8b76 | 16793 | |
4f545baf | 16794 | par = gen_rtx_PARALLEL (V2DImode, rtvec_alloc (2)); |
16795 | for (i = 0; i < 2; i++) | |
16796 | XVECEXP (par, 0, i) = operands[2]; | |
18525343 | 16797 | |
4f545baf | 16798 | emit_insn (gen_vec_initv2di (reg, par)); |
885c8b76 | 16799 | |
4f545baf | 16800 | if (negate) |
16801 | emit_insn (gen_negv2di2 (reg, reg)); | |
885c8b76 | 16802 | |
4f545baf | 16803 | emit_insn (gen_xop_shav2di3 (operands[0], operands[1], reg)); |
16804 | DONE; | |
16805 | } | |
18525343 | 16806 | }) |
16807 | ||
16808 | ;; XOP FRCZ support | |
18525343 | 16809 | (define_insn "xop_frcz<mode>2" |
a2f9d5b3 | 16810 | [(set (match_operand:FMAMODE 0 "register_operand" "=x") |
16811 | (unspec:FMAMODE | |
16812 | [(match_operand:FMAMODE 1 "nonimmediate_operand" "xm")] | |
18525343 | 16813 | UNSPEC_FRCZ))] |
16814 | "TARGET_XOP" | |
0061967e | 16815 | "vfrcz<ssemodesuffix>\t{%1, %0|%0, %1}" |
18525343 | 16816 | [(set_attr "type" "ssecvt1") |
16817 | (set_attr "mode" "<MODE>")]) | |
16818 | ||
a2f9d5b3 | 16819 | (define_expand "xop_vmfrcz<mode>2" |
6fe5844b | 16820 | [(set (match_operand:VF_128 0 "register_operand") |
16821 | (vec_merge:VF_128 | |
16822 | (unspec:VF_128 | |
16823 | [(match_operand:VF_128 1 "nonimmediate_operand")] | |
18525343 | 16824 | UNSPEC_FRCZ) |
ea47f46c | 16825 | (match_dup 2) |
18525343 | 16826 | (const_int 1)))] |
16827 | "TARGET_XOP" | |
ea47f46c | 16828 | "operands[2] = CONST0_RTX (<MODE>mode);") |
18525343 | 16829 | |
96188d90 | 16830 | (define_insn "*xop_vmfrcz<mode>2" |
6fe5844b | 16831 | [(set (match_operand:VF_128 0 "register_operand" "=x") |
16832 | (vec_merge:VF_128 | |
16833 | (unspec:VF_128 | |
16834 | [(match_operand:VF_128 1 "nonimmediate_operand" "xm")] | |
a2f9d5b3 | 16835 | UNSPEC_FRCZ) |
6fe5844b | 16836 | (match_operand:VF_128 2 "const0_operand") |
a2f9d5b3 | 16837 | (const_int 1)))] |
18525343 | 16838 | "TARGET_XOP" |
c358a059 | 16839 | "vfrcz<ssescalarmodesuffix>\t{%1, %0|%0, %<iptr>1}" |
18525343 | 16840 | [(set_attr "type" "ssecvt1") |
16841 | (set_attr "mode" "<MODE>")]) | |
16842 | ||
16843 | (define_insn "xop_maskcmp<mode>3" | |
6fe5844b | 16844 | [(set (match_operand:VI_128 0 "register_operand" "=x") |
16845 | (match_operator:VI_128 1 "ix86_comparison_int_operator" | |
16846 | [(match_operand:VI_128 2 "register_operand" "x") | |
16847 | (match_operand:VI_128 3 "nonimmediate_operand" "xm")]))] | |
18525343 | 16848 | "TARGET_XOP" |
63d5e521 | 16849 | "vpcom%Y1<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}" |
18525343 | 16850 | [(set_attr "type" "sse4arg") |
16851 | (set_attr "prefix_data16" "0") | |
16852 | (set_attr "prefix_rep" "0") | |
16853 | (set_attr "prefix_extra" "2") | |
16854 | (set_attr "length_immediate" "1") | |
16855 | (set_attr "mode" "TI")]) | |
16856 | ||
16857 | (define_insn "xop_maskcmp_uns<mode>3" | |
6fe5844b | 16858 | [(set (match_operand:VI_128 0 "register_operand" "=x") |
16859 | (match_operator:VI_128 1 "ix86_comparison_uns_operator" | |
16860 | [(match_operand:VI_128 2 "register_operand" "x") | |
16861 | (match_operand:VI_128 3 "nonimmediate_operand" "xm")]))] | |
18525343 | 16862 | "TARGET_XOP" |
63d5e521 | 16863 | "vpcom%Y1u<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}" |
18525343 | 16864 | [(set_attr "type" "ssecmp") |
16865 | (set_attr "prefix_data16" "0") | |
16866 | (set_attr "prefix_rep" "0") | |
16867 | (set_attr "prefix_extra" "2") | |
16868 | (set_attr "length_immediate" "1") | |
16869 | (set_attr "mode" "TI")]) | |
16870 | ||
16871 | ;; Version of pcom*u* that is called from the intrinsics that allows pcomequ* | |
16872 | ;; and pcomneu* not to be converted to the signed ones in case somebody needs | |
16873 | ;; the exact instruction generated for the intrinsic. | |
16874 | (define_insn "xop_maskcmp_uns2<mode>3" | |
6fe5844b | 16875 | [(set (match_operand:VI_128 0 "register_operand" "=x") |
16876 | (unspec:VI_128 | |
16877 | [(match_operator:VI_128 1 "ix86_comparison_uns_operator" | |
16878 | [(match_operand:VI_128 2 "register_operand" "x") | |
16879 | (match_operand:VI_128 3 "nonimmediate_operand" "xm")])] | |
18525343 | 16880 | UNSPEC_XOP_UNSIGNED_CMP))] |
16881 | "TARGET_XOP" | |
63d5e521 | 16882 | "vpcom%Y1u<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}" |
18525343 | 16883 | [(set_attr "type" "ssecmp") |
16884 | (set_attr "prefix_data16" "0") | |
16885 | (set_attr "prefix_extra" "2") | |
16886 | (set_attr "length_immediate" "1") | |
16887 | (set_attr "mode" "TI")]) | |
16888 | ||
16889 | ;; Pcomtrue and pcomfalse support. These are useless instructions, but are | |
16890 | ;; being added here to be complete. | |
16891 | (define_insn "xop_pcom_tf<mode>3" | |
6fe5844b | 16892 | [(set (match_operand:VI_128 0 "register_operand" "=x") |
16893 | (unspec:VI_128 | |
16894 | [(match_operand:VI_128 1 "register_operand" "x") | |
16895 | (match_operand:VI_128 2 "nonimmediate_operand" "xm") | |
18525343 | 16896 | (match_operand:SI 3 "const_int_operand" "n")] |
16897 | UNSPEC_XOP_TRUEFALSE))] | |
16898 | "TARGET_XOP" | |
16899 | { | |
16900 | return ((INTVAL (operands[3]) != 0) | |
63d5e521 | 16901 | ? "vpcomtrue<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}" |
16902 | : "vpcomfalse<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"); | |
18525343 | 16903 | } |
16904 | [(set_attr "type" "ssecmp") | |
16905 | (set_attr "prefix_data16" "0") | |
16906 | (set_attr "prefix_extra" "2") | |
16907 | (set_attr "length_immediate" "1") | |
16908 | (set_attr "mode" "TI")]) | |
16909 | ||
46fd9685 | 16910 | (define_insn "xop_vpermil2<mode>3" |
6a3f5f59 | 16911 | [(set (match_operand:VF_128_256 0 "register_operand" "=x") |
16912 | (unspec:VF_128_256 | |
16913 | [(match_operand:VF_128_256 1 "register_operand" "x") | |
16914 | (match_operand:VF_128_256 2 "nonimmediate_operand" "%x") | |
63d5e521 | 16915 | (match_operand:<sseintvecmode> 3 "nonimmediate_operand" "xm") |
46fd9685 | 16916 | (match_operand:SI 4 "const_0_to_3_operand" "n")] |
16917 | UNSPEC_VPERMIL2))] | |
16918 | "TARGET_XOP" | |
0061967e | 16919 | "vpermil2<ssemodesuffix>\t{%4, %3, %2, %1, %0|%0, %1, %2, %3, %4}" |
46fd9685 | 16920 | [(set_attr "type" "sse4arg") |
16921 | (set_attr "length_immediate" "1") | |
16922 | (set_attr "mode" "<MODE>")]) | |
16923 | ||
18525343 | 16924 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; |
ed30e0a6 | 16925 | |
1d9ef704 | 16926 | (define_insn "aesenc" |
f6c74054 | 16927 | [(set (match_operand:V2DI 0 "register_operand" "=x,x") |
16928 | (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x") | |
16929 | (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")] | |
1d9ef704 | 16930 | UNSPEC_AESENC))] |
16931 | "TARGET_AES" | |
f6c74054 | 16932 | "@ |
16933 | aesenc\t{%2, %0|%0, %2} | |
16934 | vaesenc\t{%2, %1, %0|%0, %1, %2}" | |
16935 | [(set_attr "isa" "noavx,avx") | |
16936 | (set_attr "type" "sselog1") | |
00a0e418 | 16937 | (set_attr "prefix_extra" "1") |
f6c74054 | 16938 | (set_attr "prefix" "orig,vex") |
6470d004 | 16939 | (set_attr "btver2_decode" "double,double") |
ed30e0a6 | 16940 | (set_attr "mode" "TI")]) |
16941 | ||
1d9ef704 | 16942 | (define_insn "aesenclast" |
f6c74054 | 16943 | [(set (match_operand:V2DI 0 "register_operand" "=x,x") |
16944 | (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x") | |
16945 | (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")] | |
1d9ef704 | 16946 | UNSPEC_AESENCLAST))] |
16947 | "TARGET_AES" | |
f6c74054 | 16948 | "@ |
16949 | aesenclast\t{%2, %0|%0, %2} | |
16950 | vaesenclast\t{%2, %1, %0|%0, %1, %2}" | |
16951 | [(set_attr "isa" "noavx,avx") | |
16952 | (set_attr "type" "sselog1") | |
00a0e418 | 16953 | (set_attr "prefix_extra" "1") |
f6c74054 | 16954 | (set_attr "prefix" "orig,vex") |
6470d004 | 16955 | (set_attr "btver2_decode" "double,double") |
ed30e0a6 | 16956 | (set_attr "mode" "TI")]) |
16957 | ||
1d9ef704 | 16958 | (define_insn "aesdec" |
f6c74054 | 16959 | [(set (match_operand:V2DI 0 "register_operand" "=x,x") |
16960 | (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x") | |
16961 | (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")] | |
1d9ef704 | 16962 | UNSPEC_AESDEC))] |
16963 | "TARGET_AES" | |
f6c74054 | 16964 | "@ |
16965 | aesdec\t{%2, %0|%0, %2} | |
16966 | vaesdec\t{%2, %1, %0|%0, %1, %2}" | |
16967 | [(set_attr "isa" "noavx,avx") | |
16968 | (set_attr "type" "sselog1") | |
00a0e418 | 16969 | (set_attr "prefix_extra" "1") |
f6c74054 | 16970 | (set_attr "prefix" "orig,vex") |
6470d004 | 16971 | (set_attr "btver2_decode" "double,double") |
ed30e0a6 | 16972 | (set_attr "mode" "TI")]) |
16973 | ||
1d9ef704 | 16974 | (define_insn "aesdeclast" |
f6c74054 | 16975 | [(set (match_operand:V2DI 0 "register_operand" "=x,x") |
16976 | (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x") | |
16977 | (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")] | |
1d9ef704 | 16978 | UNSPEC_AESDECLAST))] |
16979 | "TARGET_AES" | |
f6c74054 | 16980 | "@ |
16981 | aesdeclast\t{%2, %0|%0, %2} | |
16982 | vaesdeclast\t{%2, %1, %0|%0, %1, %2}" | |
16983 | [(set_attr "isa" "noavx,avx") | |
16984 | (set_attr "type" "sselog1") | |
1d9ef704 | 16985 | (set_attr "prefix_extra" "1") |
f6c74054 | 16986 | (set_attr "prefix" "orig,vex") |
6470d004 | 16987 | (set_attr "btver2_decode" "double,double") |
1d9ef704 | 16988 | (set_attr "mode" "TI")]) |
16989 | ||
16990 | (define_insn "aesimc" | |
16991 | [(set (match_operand:V2DI 0 "register_operand" "=x") | |
16992 | (unspec:V2DI [(match_operand:V2DI 1 "nonimmediate_operand" "xm")] | |
16993 | UNSPEC_AESIMC))] | |
16994 | "TARGET_AES" | |
ed30e0a6 | 16995 | "%vaesimc\t{%1, %0|%0, %1}" |
1d9ef704 | 16996 | [(set_attr "type" "sselog1") |
16997 | (set_attr "prefix_extra" "1") | |
ed30e0a6 | 16998 | (set_attr "prefix" "maybe_vex") |
1d9ef704 | 16999 | (set_attr "mode" "TI")]) |
17000 | ||
17001 | (define_insn "aeskeygenassist" | |
17002 | [(set (match_operand:V2DI 0 "register_operand" "=x") | |
17003 | (unspec:V2DI [(match_operand:V2DI 1 "nonimmediate_operand" "xm") | |
17004 | (match_operand:SI 2 "const_0_to_255_operand" "n")] | |
17005 | UNSPEC_AESKEYGENASSIST))] | |
17006 | "TARGET_AES" | |
ed30e0a6 | 17007 | "%vaeskeygenassist\t{%2, %1, %0|%0, %1, %2}" |
1d9ef704 | 17008 | [(set_attr "type" "sselog1") |
17009 | (set_attr "prefix_extra" "1") | |
00a0e418 | 17010 | (set_attr "length_immediate" "1") |
ed30e0a6 | 17011 | (set_attr "prefix" "maybe_vex") |
1d9ef704 | 17012 | (set_attr "mode" "TI")]) |
17013 | ||
17014 | (define_insn "pclmulqdq" | |
f6c74054 | 17015 | [(set (match_operand:V2DI 0 "register_operand" "=x,x") |
17016 | (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x") | |
17017 | (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm") | |
17018 | (match_operand:SI 3 "const_0_to_255_operand" "n,n")] | |
1d9ef704 | 17019 | UNSPEC_PCLMUL))] |
17020 | "TARGET_PCLMUL" | |
f6c74054 | 17021 | "@ |
17022 | pclmulqdq\t{%3, %2, %0|%0, %2, %3} | |
17023 | vpclmulqdq\t{%3, %2, %1, %0|%0, %1, %2, %3}" | |
17024 | [(set_attr "isa" "noavx,avx") | |
17025 | (set_attr "type" "sselog1") | |
1d9ef704 | 17026 | (set_attr "prefix_extra" "1") |
00a0e418 | 17027 | (set_attr "length_immediate" "1") |
f6c74054 | 17028 | (set_attr "prefix" "orig,vex") |
1d9ef704 | 17029 | (set_attr "mode" "TI")]) |
ed30e0a6 | 17030 | |
17031 | (define_expand "avx_vzeroall" | |
17032 | [(match_par_dup 0 [(const_int 0)])] | |
17033 | "TARGET_AVX" | |
17034 | { | |
17035 | int nregs = TARGET_64BIT ? 16 : 8; | |
17036 | int regno; | |
17037 | ||
17038 | operands[0] = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (nregs + 1)); | |
17039 | ||
17040 | XVECEXP (operands[0], 0, 0) | |
17041 | = gen_rtx_UNSPEC_VOLATILE (VOIDmode, gen_rtvec (1, const0_rtx), | |
17042 | UNSPECV_VZEROALL); | |
17043 | ||
17044 | for (regno = 0; regno < nregs; regno++) | |
17045 | XVECEXP (operands[0], 0, regno + 1) | |
d1f9b275 | 17046 | = gen_rtx_SET (gen_rtx_REG (V8SImode, SSE_REGNO (regno)), |
ed30e0a6 | 17047 | CONST0_RTX (V8SImode)); |
17048 | }) | |
17049 | ||
17050 | (define_insn "*avx_vzeroall" | |
17051 | [(match_parallel 0 "vzeroall_operation" | |
e2c0f47e | 17052 | [(unspec_volatile [(const_int 0)] UNSPECV_VZEROALL)])] |
ed30e0a6 | 17053 | "TARGET_AVX" |
17054 | "vzeroall" | |
17055 | [(set_attr "type" "sse") | |
00a0e418 | 17056 | (set_attr "modrm" "0") |
ed30e0a6 | 17057 | (set_attr "memory" "none") |
17058 | (set_attr "prefix" "vex") | |
6470d004 | 17059 | (set_attr "btver2_decode" "vector") |
ed30e0a6 | 17060 | (set_attr "mode" "OI")]) |
17061 | ||
3970ad84 | 17062 | ;; Clear the upper 128bits of AVX registers, equivalent to a NOP |
17063 | ;; if the upper 128bits are unused. | |
17064 | (define_insn "avx_vzeroupper" | |
64f28d78 | 17065 | [(unspec_volatile [(const_int 0)] UNSPECV_VZEROUPPER)] |
e2c0f47e | 17066 | "TARGET_AVX" |
ed30e0a6 | 17067 | "vzeroupper" |
17068 | [(set_attr "type" "sse") | |
00a0e418 | 17069 | (set_attr "modrm" "0") |
ed30e0a6 | 17070 | (set_attr "memory" "none") |
17071 | (set_attr "prefix" "vex") | |
6470d004 | 17072 | (set_attr "btver2_decode" "vector") |
ed30e0a6 | 17073 | (set_attr "mode" "OI")]) |
17074 | ||
5deb404d | 17075 | (define_insn "avx2_pbroadcast<mode>" |
17076 | [(set (match_operand:VI 0 "register_operand" "=x") | |
17077 | (vec_duplicate:VI | |
17078 | (vec_select:<ssescalarmode> | |
03ae25dc | 17079 | (match_operand:<ssexmmmode> 1 "nonimmediate_operand" "xm") |
5deb404d | 17080 | (parallel [(const_int 0)]))))] |
17081 | "TARGET_AVX2" | |
c358a059 | 17082 | "vpbroadcast<ssemodesuffix>\t{%1, %0|%0, %<iptr>1}" |
5deb404d | 17083 | [(set_attr "type" "ssemov") |
17084 | (set_attr "prefix_extra" "1") | |
17085 | (set_attr "prefix" "vex") | |
17086 | (set_attr "mode" "<sseinsnmode>")]) | |
17087 | ||
541e350d | 17088 | (define_insn "avx2_pbroadcast<mode>_1" |
c358a059 | 17089 | [(set (match_operand:VI_256 0 "register_operand" "=x,x") |
541e350d | 17090 | (vec_duplicate:VI_256 |
17091 | (vec_select:<ssescalarmode> | |
c358a059 | 17092 | (match_operand:VI_256 1 "nonimmediate_operand" "m,x") |
541e350d | 17093 | (parallel [(const_int 0)]))))] |
17094 | "TARGET_AVX2" | |
c358a059 | 17095 | "@ |
17096 | vpbroadcast<ssemodesuffix>\t{%1, %0|%0, %<iptr>1} | |
17097 | vpbroadcast<ssemodesuffix>\t{%x1, %0|%0, %x1}" | |
541e350d | 17098 | [(set_attr "type" "ssemov") |
17099 | (set_attr "prefix_extra" "1") | |
17100 | (set_attr "prefix" "vex") | |
17101 | (set_attr "mode" "<sseinsnmode>")]) | |
17102 | ||
fcb19554 | 17103 | (define_insn "<avx2_avx512>_permvar<mode><mask_name>" |
bf24193f | 17104 | [(set (match_operand:VI48F_256_512 0 "register_operand" "=v") |
17105 | (unspec:VI48F_256_512 | |
17106 | [(match_operand:VI48F_256_512 1 "nonimmediate_operand" "vm") | |
17107 | (match_operand:<sseintvecmode> 2 "register_operand" "v")] | |
8da8a06b | 17108 | UNSPEC_VPERMVAR))] |
5220cab6 | 17109 | "TARGET_AVX2 && <mask_mode512bit_condition>" |
17110 | "vperm<ssemodesuffix>\t{%1, %2, %0<mask_operand3>|%0<mask_operand3>, %2, %1}" | |
5deb404d | 17111 | [(set_attr "type" "sselog") |
5220cab6 | 17112 | (set_attr "prefix" "<mask_prefix2>") |
bf24193f | 17113 | (set_attr "mode" "<sseinsnmode>")]) |
5deb404d | 17114 | |
afee0628 | 17115 | (define_insn "<avx512>_permvar<mode><mask_name>" |
17116 | [(set (match_operand:VI1_AVX512VL 0 "register_operand" "=v") | |
17117 | (unspec:VI1_AVX512VL | |
17118 | [(match_operand:VI1_AVX512VL 1 "nonimmediate_operand" "vm") | |
17119 | (match_operand:<sseintvecmode> 2 "register_operand" "v")] | |
17120 | UNSPEC_VPERMVAR))] | |
17121 | "TARGET_AVX512VBMI && <mask_mode512bit_condition>" | |
17122 | "vperm<ssemodesuffix>\t{%1, %2, %0<mask_operand3>|%0<mask_operand3>, %2, %1}" | |
17123 | [(set_attr "type" "sselog") | |
17124 | (set_attr "prefix" "<mask_prefix2>") | |
17125 | (set_attr "mode" "<sseinsnmode>")]) | |
17126 | ||
fcb19554 | 17127 | (define_insn "<avx512>_permvar<mode><mask_name>" |
17128 | [(set (match_operand:VI2_AVX512VL 0 "register_operand" "=v") | |
17129 | (unspec:VI2_AVX512VL | |
17130 | [(match_operand:VI2_AVX512VL 1 "nonimmediate_operand" "vm") | |
17131 | (match_operand:<sseintvecmode> 2 "register_operand" "v")] | |
17132 | UNSPEC_VPERMVAR))] | |
17133 | "TARGET_AVX512BW && <mask_mode512bit_condition>" | |
17134 | "vperm<ssemodesuffix>\t{%1, %2, %0<mask_operand3>|%0<mask_operand3>, %2, %1}" | |
17135 | [(set_attr "type" "sselog") | |
17136 | (set_attr "prefix" "<mask_prefix2>") | |
17137 | (set_attr "mode" "<sseinsnmode>")]) | |
17138 | ||
262f9173 | 17139 | (define_expand "<avx2_avx512>_perm<mode>" |
feadfe94 | 17140 | [(match_operand:VI8F_256_512 0 "register_operand") |
17141 | (match_operand:VI8F_256_512 1 "nonimmediate_operand") | |
abd4f58b | 17142 | (match_operand:SI 2 "const_0_to_255_operand")] |
a9e4de7b | 17143 | "TARGET_AVX2" |
17144 | { | |
17145 | int mask = INTVAL (operands[2]); | |
262f9173 | 17146 | emit_insn (gen_<avx2_avx512>_perm<mode>_1 (operands[0], operands[1], |
5220cab6 | 17147 | GEN_INT ((mask >> 0) & 3), |
17148 | GEN_INT ((mask >> 2) & 3), | |
17149 | GEN_INT ((mask >> 4) & 3), | |
17150 | GEN_INT ((mask >> 6) & 3))); | |
17151 | DONE; | |
17152 | }) | |
17153 | ||
262f9173 | 17154 | (define_expand "<avx512>_perm<mode>_mask" |
17155 | [(match_operand:VI8F_256_512 0 "register_operand") | |
17156 | (match_operand:VI8F_256_512 1 "nonimmediate_operand") | |
5220cab6 | 17157 | (match_operand:SI 2 "const_0_to_255_operand") |
262f9173 | 17158 | (match_operand:VI8F_256_512 3 "vector_move_operand") |
5220cab6 | 17159 | (match_operand:<avx512fmaskmode> 4 "register_operand")] |
17160 | "TARGET_AVX512F" | |
17161 | { | |
17162 | int mask = INTVAL (operands[2]); | |
262f9173 | 17163 | emit_insn (gen_<avx2_avx512>_perm<mode>_1_mask (operands[0], operands[1], |
5220cab6 | 17164 | GEN_INT ((mask >> 0) & 3), |
17165 | GEN_INT ((mask >> 2) & 3), | |
17166 | GEN_INT ((mask >> 4) & 3), | |
17167 | GEN_INT ((mask >> 6) & 3), | |
17168 | operands[3], operands[4])); | |
a9e4de7b | 17169 | DONE; |
17170 | }) | |
17171 | ||
262f9173 | 17172 | (define_insn "<avx2_avx512>_perm<mode>_1<mask_name>" |
feadfe94 | 17173 | [(set (match_operand:VI8F_256_512 0 "register_operand" "=v") |
17174 | (vec_select:VI8F_256_512 | |
17175 | (match_operand:VI8F_256_512 1 "nonimmediate_operand" "vm") | |
abd4f58b | 17176 | (parallel [(match_operand 2 "const_0_to_3_operand") |
17177 | (match_operand 3 "const_0_to_3_operand") | |
17178 | (match_operand 4 "const_0_to_3_operand") | |
17179 | (match_operand 5 "const_0_to_3_operand")])))] | |
5220cab6 | 17180 | "TARGET_AVX2 && <mask_mode512bit_condition>" |
a9e4de7b | 17181 | { |
17182 | int mask = 0; | |
17183 | mask |= INTVAL (operands[2]) << 0; | |
17184 | mask |= INTVAL (operands[3]) << 2; | |
17185 | mask |= INTVAL (operands[4]) << 4; | |
17186 | mask |= INTVAL (operands[5]) << 6; | |
17187 | operands[2] = GEN_INT (mask); | |
5220cab6 | 17188 | return "vperm<ssemodesuffix>\t{%2, %1, %0<mask_operand6>|%0<mask_operand6>, %1, %2}"; |
a9e4de7b | 17189 | } |
5deb404d | 17190 | [(set_attr "type" "sselog") |
5220cab6 | 17191 | (set_attr "prefix" "<mask_prefix2>") |
e0c9db9b | 17192 | (set_attr "mode" "<sseinsnmode>")]) |
5deb404d | 17193 | |
17194 | (define_insn "avx2_permv2ti" | |
17195 | [(set (match_operand:V4DI 0 "register_operand" "=x") | |
17196 | (unspec:V4DI | |
17197 | [(match_operand:V4DI 1 "register_operand" "x") | |
a9e4de7b | 17198 | (match_operand:V4DI 2 "nonimmediate_operand" "xm") |
5deb404d | 17199 | (match_operand:SI 3 "const_0_to_255_operand" "n")] |
17200 | UNSPEC_VPERMTI))] | |
17201 | "TARGET_AVX2" | |
17202 | "vperm2i128\t{%3, %2, %1, %0|%0, %1, %2, %3}" | |
17203 | [(set_attr "type" "sselog") | |
17204 | (set_attr "prefix" "vex") | |
17205 | (set_attr "mode" "OI")]) | |
17206 | ||
17207 | (define_insn "avx2_vec_dupv4df" | |
17208 | [(set (match_operand:V4DF 0 "register_operand" "=x") | |
17209 | (vec_duplicate:V4DF | |
17210 | (vec_select:DF | |
17211 | (match_operand:V2DF 1 "register_operand" "x") | |
17212 | (parallel [(const_int 0)]))))] | |
17213 | "TARGET_AVX2" | |
17214 | "vbroadcastsd\t{%1, %0|%0, %1}" | |
17215 | [(set_attr "type" "sselog1") | |
17216 | (set_attr "prefix" "vex") | |
17217 | (set_attr "mode" "V4DF")]) | |
17218 | ||
05e7532b | 17219 | (define_insn "<avx512>_vec_dup<mode>_1" |
17220 | [(set (match_operand:VI_AVX512BW 0 "register_operand" "=v,v") | |
17221 | (vec_duplicate:VI_AVX512BW | |
17222 | (vec_select:VI_AVX512BW | |
17223 | (match_operand:VI_AVX512BW 1 "nonimmediate_operand" "v,m") | |
17224 | (parallel [(const_int 0)]))))] | |
17225 | "TARGET_AVX512F" | |
9751df37 | 17226 | "@ |
17227 | vpbroadcast<ssemodesuffix>\t{%x1, %0|%0, %x1} | |
17228 | vpbroadcast<ssemodesuffix>\t{%x1, %0|%0, %<iptr>1}" | |
05e7532b | 17229 | [(set_attr "type" "ssemov") |
17230 | (set_attr "prefix" "evex") | |
17231 | (set_attr "mode" "<sseinsnmode>")]) | |
17232 | ||
f50aa6e9 | 17233 | (define_insn "<avx512>_vec_dup<mode><mask_name>" |
17234 | [(set (match_operand:V48_AVX512VL 0 "register_operand" "=v") | |
17235 | (vec_duplicate:V48_AVX512VL | |
8e6b975f | 17236 | (vec_select:<ssescalarmode> |
17237 | (match_operand:<ssexmmmode> 1 "nonimmediate_operand" "vm") | |
17238 | (parallel [(const_int 0)]))))] | |
17239 | "TARGET_AVX512F" | |
5220cab6 | 17240 | "v<sseintprefix>broadcast<bcstscalarsuff>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}" |
8e6b975f | 17241 | [(set_attr "type" "ssemov") |
17242 | (set_attr "prefix" "evex") | |
17243 | (set_attr "mode" "<sseinsnmode>")]) | |
17244 | ||
f50aa6e9 | 17245 | (define_insn "<avx512>_vec_dup<mode><mask_name>" |
17246 | [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v") | |
17247 | (vec_duplicate:VI12_AVX512VL | |
17248 | (vec_select:<ssescalarmode> | |
17249 | (match_operand:<ssexmmmode> 1 "nonimmediate_operand" "vm") | |
17250 | (parallel [(const_int 0)]))))] | |
17251 | "TARGET_AVX512BW" | |
17252 | "vpbroadcast<bcstscalarsuff>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}" | |
17253 | [(set_attr "type" "ssemov") | |
17254 | (set_attr "prefix" "evex") | |
17255 | (set_attr "mode" "<sseinsnmode>")]) | |
17256 | ||
5220cab6 | 17257 | (define_insn "<mask_codefor>avx512f_broadcast<mode><mask_name>" |
8e9989b0 | 17258 | [(set (match_operand:V16FI 0 "register_operand" "=v,v") |
17259 | (vec_duplicate:V16FI | |
17260 | (match_operand:<ssexmmmode> 1 "nonimmediate_operand" "v,m")))] | |
17261 | "TARGET_AVX512F" | |
17262 | "@ | |
5220cab6 | 17263 | vshuf<shuffletype>32x4\t{$0x0, %g1, %g1, %0<mask_operand2>|%0<mask_operand2>, %g1, %g1, 0x0} |
17264 | vbroadcast<shuffletype>32x4\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}" | |
8e9989b0 | 17265 | [(set_attr "type" "ssemov") |
17266 | (set_attr "prefix" "evex") | |
17267 | (set_attr "mode" "<sseinsnmode>")]) | |
17268 | ||
5220cab6 | 17269 | (define_insn "<mask_codefor>avx512f_broadcast<mode><mask_name>" |
8e9989b0 | 17270 | [(set (match_operand:V8FI 0 "register_operand" "=v,v") |
17271 | (vec_duplicate:V8FI | |
17272 | (match_operand:<ssehalfvecmode> 1 "nonimmediate_operand" "v,m")))] | |
17273 | "TARGET_AVX512F" | |
17274 | "@ | |
5220cab6 | 17275 | vshuf<shuffletype>64x2\t{$0x44, %g1, %g1, %0<mask_operand2>|%0<mask_operand2>, %g1, %g1, 0x44} |
17276 | vbroadcast<shuffletype>64x4\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}" | |
8e9989b0 | 17277 | [(set_attr "type" "ssemov") |
17278 | (set_attr "prefix" "evex") | |
17279 | (set_attr "mode" "<sseinsnmode>")]) | |
17280 | ||
f50aa6e9 | 17281 | (define_insn "<mask_codefor><avx512>_vec_dup_gpr<mode><mask_name>" |
dc958a45 | 17282 | [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v,v") |
f50aa6e9 | 17283 | (vec_duplicate:VI12_AVX512VL |
dc958a45 | 17284 | (match_operand:<ssescalarmode> 1 "nonimmediate_operand" "vm,r")))] |
f50aa6e9 | 17285 | "TARGET_AVX512BW" |
dc958a45 | 17286 | "@ |
17287 | vpbroadcast<bcstscalarsuff>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1} | |
17288 | vpbroadcast<bcstscalarsuff>\t{%k1, %0<mask_operand2>|%0<mask_operand2>, %k1}" | |
f50aa6e9 | 17289 | [(set_attr "type" "ssemov") |
17290 | (set_attr "prefix" "evex") | |
17291 | (set_attr "mode" "<sseinsnmode>")]) | |
17292 | ||
17293 | (define_insn "<mask_codefor><avx512>_vec_dup_gpr<mode><mask_name>" | |
dc958a45 | 17294 | [(set (match_operand:V48_AVX512VL 0 "register_operand" "=v,v") |
f50aa6e9 | 17295 | (vec_duplicate:V48_AVX512VL |
dc958a45 | 17296 | (match_operand:<ssescalarmode> 1 "nonimmediate_operand" "vm,r")))] |
8e6b975f | 17297 | "TARGET_AVX512F" |
5220cab6 | 17298 | "v<sseintprefix>broadcast<bcstscalarsuff>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}" |
8e6b975f | 17299 | [(set_attr "type" "ssemov") |
17300 | (set_attr "prefix" "evex") | |
dc958a45 | 17301 | (set_attr "mode" "<sseinsnmode>") |
17302 | (set (attr "enabled") | |
17303 | (if_then_else (eq_attr "alternative" "1") | |
17304 | (symbol_ref "GET_MODE_CLASS (<ssescalarmode>mode) == MODE_INT | |
17305 | && (<ssescalarmode>mode != DImode || TARGET_64BIT)") | |
17306 | (const_int 1)))]) | |
8e6b975f | 17307 | |
dc958a45 | 17308 | (define_insn "vec_dupv4sf" |
17309 | [(set (match_operand:V4SF 0 "register_operand" "=x,x,x") | |
17310 | (vec_duplicate:V4SF | |
17311 | (match_operand:SF 1 "nonimmediate_operand" "x,m,0")))] | |
17312 | "TARGET_SSE" | |
17313 | "@ | |
17314 | vshufps\t{$0, %1, %1, %0|%0, %1, %1, 0} | |
17315 | vbroadcastss\t{%1, %0|%0, %1} | |
17316 | shufps\t{$0, %0, %0|%0, %0, 0}" | |
17317 | [(set_attr "isa" "avx,avx,noavx") | |
17318 | (set_attr "type" "sseshuf1,ssemov,sseshuf1") | |
17319 | (set_attr "length_immediate" "1,0,1") | |
17320 | (set_attr "prefix_extra" "0,1,*") | |
17321 | (set_attr "prefix" "vex,vex,orig") | |
17322 | (set_attr "mode" "V4SF")]) | |
17323 | ||
17324 | (define_insn "*vec_dupv4si" | |
17325 | [(set (match_operand:V4SI 0 "register_operand" "=x,x,x") | |
17326 | (vec_duplicate:V4SI | |
17327 | (match_operand:SI 1 "nonimmediate_operand" " x,m,0")))] | |
17328 | "TARGET_SSE" | |
17329 | "@ | |
17330 | %vpshufd\t{$0, %1, %0|%0, %1, 0} | |
17331 | vbroadcastss\t{%1, %0|%0, %1} | |
17332 | shufps\t{$0, %0, %0|%0, %0, 0}" | |
17333 | [(set_attr "isa" "sse2,avx,noavx") | |
17334 | (set_attr "type" "sselog1,ssemov,sselog1") | |
17335 | (set_attr "length_immediate" "1,0,1") | |
17336 | (set_attr "prefix_extra" "0,1,*") | |
17337 | (set_attr "prefix" "maybe_vex,vex,orig") | |
17338 | (set_attr "mode" "TI,V4SF,V4SF")]) | |
17339 | ||
17340 | (define_insn "*vec_dupv2di" | |
17341 | [(set (match_operand:V2DI 0 "register_operand" "=x,x,x,x") | |
17342 | (vec_duplicate:V2DI | |
17343 | (match_operand:DI 1 "nonimmediate_operand" " 0,x,m,0")))] | |
17344 | "TARGET_SSE" | |
17345 | "@ | |
17346 | punpcklqdq\t%0, %0 | |
17347 | vpunpcklqdq\t{%d1, %0|%0, %d1} | |
17348 | %vmovddup\t{%1, %0|%0, %1} | |
17349 | movlhps\t%0, %0" | |
17350 | [(set_attr "isa" "sse2_noavx,avx,sse3,noavx") | |
17351 | (set_attr "type" "sselog1,sselog1,sselog1,ssemov") | |
17352 | (set_attr "prefix" "orig,vex,maybe_vex,orig") | |
17353 | (set_attr "mode" "TI,TI,DF,V4SF")]) | |
f50aa6e9 | 17354 | |
5deb404d | 17355 | (define_insn "avx2_vbroadcasti128_<mode>" |
17356 | [(set (match_operand:VI_256 0 "register_operand" "=x") | |
17357 | (vec_concat:VI_256 | |
17358 | (match_operand:<ssehalfvecmode> 1 "memory_operand" "m") | |
17359 | (match_dup 1)))] | |
17360 | "TARGET_AVX2" | |
17361 | "vbroadcasti128\t{%1, %0|%0, %1}" | |
17362 | [(set_attr "type" "ssemov") | |
17363 | (set_attr "prefix_extra" "1") | |
17364 | (set_attr "prefix" "vex") | |
17365 | (set_attr "mode" "OI")]) | |
17366 | ||
6e154e02 | 17367 | ;; Modes handled by AVX vec_dup patterns. |
17368 | (define_mode_iterator AVX_VEC_DUP_MODE | |
17369 | [V8SI V8SF V4DI V4DF]) | |
17370 | ;; Modes handled by AVX2 vec_dup patterns. | |
17371 | (define_mode_iterator AVX2_VEC_DUP_MODE | |
17372 | [V32QI V16QI V16HI V8HI V8SI V4SI]) | |
17373 | ||
17374 | (define_insn "*vec_dup<mode>" | |
5d896697 | 17375 | [(set (match_operand:AVX2_VEC_DUP_MODE 0 "register_operand" "=x,x,Yi") |
6e154e02 | 17376 | (vec_duplicate:AVX2_VEC_DUP_MODE |
ed6272f7 | 17377 | (match_operand:<ssescalarmode> 1 "nonimmediate_operand" "m,x,$r")))] |
6e154e02 | 17378 | "TARGET_AVX2" |
17379 | "@ | |
17380 | v<sseintprefix>broadcast<bcstscalarsuff>\t{%1, %0|%0, %1} | |
17381 | v<sseintprefix>broadcast<bcstscalarsuff>\t{%x1, %0|%0, %x1} | |
17382 | #" | |
17383 | [(set_attr "type" "ssemov") | |
17384 | (set_attr "prefix_extra" "1") | |
17385 | (set_attr "prefix" "maybe_evex") | |
17386 | (set_attr "mode" "<sseinsnmode>")]) | |
17387 | ||
17388 | (define_insn "vec_dup<mode>" | |
f40f9ee2 | 17389 | [(set (match_operand:AVX_VEC_DUP_MODE 0 "register_operand" "=x,x,x,v,x") |
6e154e02 | 17390 | (vec_duplicate:AVX_VEC_DUP_MODE |
f40f9ee2 | 17391 | (match_operand:<ssescalarmode> 1 "nonimmediate_operand" "m,m,x,v,?x")))] |
6e154e02 | 17392 | "TARGET_AVX" |
17393 | "@ | |
17394 | v<sseintprefix>broadcast<bcstscalarsuff>\t{%1, %0|%0, %1} | |
17395 | vbroadcast<ssescalarmodesuffix>\t{%1, %0|%0, %1} | |
17396 | v<sseintprefix>broadcast<bcstscalarsuff>\t{%x1, %0|%0, %x1} | |
f40f9ee2 | 17397 | v<sseintprefix>broadcast<bcstscalarsuff>\t{%x1, %g0|%g0, %x1} |
6e154e02 | 17398 | #" |
17399 | [(set_attr "type" "ssemov") | |
17400 | (set_attr "prefix_extra" "1") | |
17401 | (set_attr "prefix" "maybe_evex") | |
f40f9ee2 | 17402 | (set_attr "isa" "avx2,noavx2,avx2,avx512f,noavx2") |
17403 | (set_attr "mode" "<sseinsnmode>,V8SF,<sseinsnmode>,<sseinsnmode>,V8SF")]) | |
6e154e02 | 17404 | |
17405 | (define_split | |
17406 | [(set (match_operand:AVX2_VEC_DUP_MODE 0 "register_operand") | |
17407 | (vec_duplicate:AVX2_VEC_DUP_MODE | |
17408 | (match_operand:<ssescalarmode> 1 "register_operand")))] | |
dc958a45 | 17409 | "TARGET_AVX2 |
17410 | /* Disable this splitter if avx512vl_vec_dup_gprv*[qhs]i insn is | |
17411 | available, because then we can broadcast from GPRs directly. | |
17412 | For V*[QH]I modes it requires both -mavx512vl and -mavx512bw, | |
17413 | for V*SI mode it requires just -mavx512vl. */ | |
17414 | && !(TARGET_AVX512VL | |
17415 | && (TARGET_AVX512BW || <ssescalarmode>mode == SImode)) | |
17416 | && reload_completed && GENERAL_REG_P (operands[1])" | |
6e154e02 | 17417 | [(const_int 0)] |
17418 | { | |
17419 | emit_insn (gen_vec_setv4si_0 (gen_lowpart (V4SImode, operands[0]), | |
17420 | CONST0_RTX (V4SImode), | |
17421 | gen_lowpart (SImode, operands[1]))); | |
17422 | emit_insn (gen_avx2_pbroadcast<mode> (operands[0], | |
17423 | gen_lowpart (<ssexmmmode>mode, | |
17424 | operands[0]))); | |
17425 | DONE; | |
17426 | }) | |
17427 | ||
b6fc7168 | 17428 | (define_split |
abd4f58b | 17429 | [(set (match_operand:AVX_VEC_DUP_MODE 0 "register_operand") |
a17124a0 | 17430 | (vec_duplicate:AVX_VEC_DUP_MODE |
abd4f58b | 17431 | (match_operand:<ssescalarmode> 1 "register_operand")))] |
541e350d | 17432 | "TARGET_AVX && !TARGET_AVX2 && reload_completed" |
a17124a0 | 17433 | [(set (match_dup 2) |
17434 | (vec_duplicate:<ssehalfvecmode> (match_dup 1))) | |
17435 | (set (match_dup 0) | |
17436 | (vec_concat:AVX_VEC_DUP_MODE (match_dup 2) (match_dup 2)))] | |
63d5e521 | 17437 | "operands[2] = gen_rtx_REG (<ssehalfvecmode>mode, REGNO (operands[0]));") |
b6fc7168 | 17438 | |
04e14b44 | 17439 | (define_insn "avx_vbroadcastf128_<mode>" |
6fe5844b | 17440 | [(set (match_operand:V_256 0 "register_operand" "=x,x,x") |
17441 | (vec_concat:V_256 | |
63d5e521 | 17442 | (match_operand:<ssehalfvecmode> 1 "nonimmediate_operand" "m,0,?x") |
04e14b44 | 17443 | (match_dup 1)))] |
17444 | "TARGET_AVX" | |
17445 | "@ | |
154d1782 | 17446 | vbroadcast<i128>\t{%1, %0|%0, %1} |
17447 | vinsert<i128>\t{$1, %1, %0, %0|%0, %0, %1, 1} | |
17448 | vperm2<i128>\t{$0, %t1, %t1, %0|%0, %t1, %t1, 0}" | |
04e14b44 | 17449 | [(set_attr "type" "ssemov,sselog1,sselog1") |
17450 | (set_attr "prefix_extra" "1") | |
17451 | (set_attr "length_immediate" "0,1,1") | |
17452 | (set_attr "prefix" "vex") | |
154d1782 | 17453 | (set_attr "mode" "<sseinsnmode>")]) |
04e14b44 | 17454 | |
0aac18e3 | 17455 | ;; For broadcast[i|f]32x2. Yes there is no v4sf version, only v4si. |
17456 | (define_mode_iterator VI4F_BRCST32x2 | |
17457 | [V16SI (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL") | |
17458 | V16SF (V8SF "TARGET_AVX512VL")]) | |
17459 | ||
17460 | (define_mode_attr 64x2mode | |
17461 | [(V8DF "V2DF") (V8DI "V2DI") (V4DI "V2DI") (V4DF "V2DF")]) | |
17462 | ||
17463 | (define_mode_attr 32x2mode | |
17464 | [(V16SF "V2SF") (V16SI "V2SI") (V8SI "V2SI") | |
17465 | (V8SF "V2SF") (V4SI "V2SI")]) | |
17466 | ||
17467 | (define_insn "<mask_codefor>avx512dq_broadcast<mode><mask_name>" | |
17468 | [(set (match_operand:VI4F_BRCST32x2 0 "register_operand" "=v") | |
17469 | (vec_duplicate:VI4F_BRCST32x2 | |
17470 | (vec_select:<32x2mode> | |
17471 | (match_operand:<ssexmmmode> 1 "nonimmediate_operand" "vm") | |
17472 | (parallel [(const_int 0) (const_int 1)]))))] | |
17473 | "TARGET_AVX512DQ" | |
17474 | "vbroadcast<shuffletype>32x2\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}" | |
17475 | [(set_attr "type" "ssemov") | |
17476 | (set_attr "prefix_extra" "1") | |
17477 | (set_attr "prefix" "evex") | |
17478 | (set_attr "mode" "<sseinsnmode>")]) | |
17479 | ||
17480 | (define_insn "<mask_codefor>avx512vl_broadcast<mode><mask_name>_1" | |
17481 | [(set (match_operand:VI4F_256 0 "register_operand" "=v,v") | |
17482 | (vec_duplicate:VI4F_256 | |
17483 | (match_operand:<ssexmmmode> 1 "nonimmediate_operand" "v,m")))] | |
17484 | "TARGET_AVX512VL" | |
17485 | "@ | |
17486 | vshuf<shuffletype>32x4\t{$0x0, %t1, %t1, %0<mask_operand2>|%0<mask_operand2>, %t1, %t1, 0x0} | |
17487 | vbroadcast<shuffletype>32x4\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}" | |
17488 | [(set_attr "type" "ssemov") | |
17489 | (set_attr "prefix_extra" "1") | |
17490 | (set_attr "prefix" "evex") | |
17491 | (set_attr "mode" "<sseinsnmode>")]) | |
17492 | ||
17493 | (define_insn "<mask_codefor>avx512dq_broadcast<mode><mask_name>_1" | |
17494 | [(set (match_operand:V16FI 0 "register_operand" "=v,v") | |
17495 | (vec_duplicate:V16FI | |
17496 | (match_operand:<ssehalfvecmode> 1 "nonimmediate_operand" "v,m")))] | |
17497 | "TARGET_AVX512DQ" | |
17498 | "@ | |
17499 | vshuf<shuffletype>32x4\t{$0x44, %g1, %g1, %0<mask_operand2>|%0<mask_operand2>, %g1, %g1, 0x44} | |
17500 | vbroadcast<shuffletype>32x8\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}" | |
17501 | [(set_attr "type" "ssemov") | |
17502 | (set_attr "prefix_extra" "1") | |
17503 | (set_attr "prefix" "evex") | |
17504 | (set_attr "mode" "<sseinsnmode>")]) | |
17505 | ||
17506 | ;; For broadcast[i|f]64x2 | |
17507 | (define_mode_iterator VI8F_BRCST64x2 | |
17508 | [V8DI V8DF (V4DI "TARGET_AVX512VL") (V4DF "TARGET_AVX512VL")]) | |
17509 | ||
17510 | (define_insn "<mask_codefor>avx512dq_broadcast<mode><mask_name>_1" | |
17511 | [(set (match_operand:VI8F_BRCST64x2 0 "register_operand" "=v,v") | |
17512 | (vec_duplicate:VI8F_BRCST64x2 | |
17513 | (match_operand:<64x2mode> 1 "nonimmediate_operand" "v,m")))] | |
17514 | "TARGET_AVX512DQ" | |
17515 | "@ | |
b6840105 | 17516 | vshuf<shuffletype>64x2\t{$0x0, %<concat_tg_mode>1, %<concat_tg_mode>1, %0<mask_operand2>|%0<mask_operand2>, %<concat_tg_mode>1, %<concat_tg_mode>1, 0x0} |
0aac18e3 | 17517 | vbroadcast<shuffletype>64x2\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}" |
17518 | [(set_attr "type" "ssemov") | |
17519 | (set_attr "prefix_extra" "1") | |
17520 | (set_attr "prefix" "evex") | |
17521 | (set_attr "mode" "<sseinsnmode>")]) | |
17522 | ||
1ffb4a9e | 17523 | (define_insn "avx512cd_maskb_vec_dup<mode>" |
17524 | [(set (match_operand:VI8_AVX512VL 0 "register_operand" "=v") | |
17525 | (vec_duplicate:VI8_AVX512VL | |
697a43f8 | 17526 | (zero_extend:DI |
a31e7f46 | 17527 | (match_operand:QI 1 "register_operand" "Yk"))))] |
697a43f8 | 17528 | "TARGET_AVX512CD" |
17529 | "vpbroadcastmb2q\t{%1, %0|%0, %1}" | |
17530 | [(set_attr "type" "mskmov") | |
17531 | (set_attr "prefix" "evex") | |
17532 | (set_attr "mode" "XI")]) | |
17533 | ||
7da26bee | 17534 | (define_insn "avx512cd_maskw_vec_dup<mode>" |
17535 | [(set (match_operand:VI4_AVX512VL 0 "register_operand" "=v") | |
17536 | (vec_duplicate:VI4_AVX512VL | |
697a43f8 | 17537 | (zero_extend:SI |
a31e7f46 | 17538 | (match_operand:HI 1 "register_operand" "Yk"))))] |
697a43f8 | 17539 | "TARGET_AVX512CD" |
17540 | "vpbroadcastmw2d\t{%1, %0|%0, %1}" | |
17541 | [(set_attr "type" "mskmov") | |
17542 | (set_attr "prefix" "evex") | |
17543 | (set_attr "mode" "XI")]) | |
17544 | ||
04e14b44 | 17545 | ;; Recognize broadcast as a vec_select as produced by builtin_vec_perm. |
17546 | ;; If it so happens that the input is in memory, use vbroadcast. | |
17547 | ;; Otherwise use vpermilp (and in the case of 256-bit modes, vperm2f128). | |
17548 | (define_insn "*avx_vperm_broadcast_v4sf" | |
17549 | [(set (match_operand:V4SF 0 "register_operand" "=x,x,x") | |
17550 | (vec_select:V4SF | |
17551 | (match_operand:V4SF 1 "nonimmediate_operand" "m,o,x") | |
17552 | (match_parallel 2 "avx_vbroadcast_operand" | |
17553 | [(match_operand 3 "const_int_operand" "C,n,n")])))] | |
17554 | "TARGET_AVX" | |
17555 | { | |
17556 | int elt = INTVAL (operands[3]); | |
17557 | switch (which_alternative) | |
17558 | { | |
17559 | case 0: | |
17560 | case 1: | |
17561 | operands[1] = adjust_address_nv (operands[1], SFmode, elt * 4); | |
c358a059 | 17562 | return "vbroadcastss\t{%1, %0|%0, %k1}"; |
04e14b44 | 17563 | case 2: |
17564 | operands[2] = GEN_INT (elt * 0x55); | |
17565 | return "vpermilps\t{%2, %1, %0|%0, %1, %2}"; | |
17566 | default: | |
17567 | gcc_unreachable (); | |
17568 | } | |
17569 | } | |
17570 | [(set_attr "type" "ssemov,ssemov,sselog1") | |
17571 | (set_attr "prefix_extra" "1") | |
17572 | (set_attr "length_immediate" "0,0,1") | |
17573 | (set_attr "prefix" "vex") | |
17574 | (set_attr "mode" "SF,SF,V4SF")]) | |
17575 | ||
17576 | (define_insn_and_split "*avx_vperm_broadcast_<mode>" | |
6fe5844b | 17577 | [(set (match_operand:VF_256 0 "register_operand" "=x,x,x") |
17578 | (vec_select:VF_256 | |
17579 | (match_operand:VF_256 1 "nonimmediate_operand" "m,o,?x") | |
04e14b44 | 17580 | (match_parallel 2 "avx_vbroadcast_operand" |
17581 | [(match_operand 3 "const_int_operand" "C,n,n")])))] | |
17582 | "TARGET_AVX" | |
17583 | "#" | |
541e350d | 17584 | "&& reload_completed && (<MODE>mode != V4DFmode || !TARGET_AVX2)" |
6fe5844b | 17585 | [(set (match_dup 0) (vec_duplicate:VF_256 (match_dup 1)))] |
04e14b44 | 17586 | { |
17587 | rtx op0 = operands[0], op1 = operands[1]; | |
17588 | int elt = INTVAL (operands[3]); | |
17589 | ||
17590 | if (REG_P (op1)) | |
17591 | { | |
17592 | int mask; | |
17593 | ||
541e350d | 17594 | if (TARGET_AVX2 && elt == 0) |
17595 | { | |
17596 | emit_insn (gen_vec_dup<mode> (op0, gen_lowpart (<ssescalarmode>mode, | |
17597 | op1))); | |
17598 | DONE; | |
17599 | } | |
17600 | ||
04e14b44 | 17601 | /* Shuffle element we care about into all elements of the 128-bit lane. |
17602 | The other lane gets shuffled too, but we don't care. */ | |
17603 | if (<MODE>mode == V4DFmode) | |
17604 | mask = (elt & 1 ? 15 : 0); | |
17605 | else | |
17606 | mask = (elt & 3) * 0x55; | |
17607 | emit_insn (gen_avx_vpermil<mode> (op0, op1, GEN_INT (mask))); | |
17608 | ||
17609 | /* Shuffle the lane we care about into both lanes of the dest. */ | |
17610 | mask = (elt / (<ssescalarnum> / 2)) * 0x11; | |
17611 | emit_insn (gen_avx_vperm2f128<mode>3 (op0, op0, op0, GEN_INT (mask))); | |
17612 | DONE; | |
17613 | } | |
17614 | ||
823a2ddd | 17615 | operands[1] = adjust_address (op1, <ssescalarmode>mode, |
17616 | elt * GET_MODE_SIZE (<ssescalarmode>mode)); | |
04e14b44 | 17617 | }) |
17618 | ||
5220cab6 | 17619 | (define_expand "<sse2_avx_avx512f>_vpermil<mode><mask_name>" |
abd4f58b | 17620 | [(set (match_operand:VF2 0 "register_operand") |
6fe5844b | 17621 | (vec_select:VF2 |
abd4f58b | 17622 | (match_operand:VF2 1 "nonimmediate_operand") |
17623 | (match_operand:SI 2 "const_0_to_255_operand")))] | |
5220cab6 | 17624 | "TARGET_AVX && <mask_mode512bit_condition>" |
af6d2927 | 17625 | { |
17626 | int mask = INTVAL (operands[2]); | |
17627 | rtx perm[<ssescalarnum>]; | |
17628 | ||
6615b722 | 17629 | int i; |
17630 | for (i = 0; i < <ssescalarnum>; i = i + 2) | |
af6d2927 | 17631 | { |
6615b722 | 17632 | perm[i] = GEN_INT (((mask >> i) & 1) + i); |
17633 | perm[i + 1] = GEN_INT (((mask >> (i + 1)) & 1) + i); | |
af6d2927 | 17634 | } |
17635 | ||
17636 | operands[2] | |
17637 | = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (<ssescalarnum>, perm)); | |
17638 | }) | |
17639 | ||
5220cab6 | 17640 | (define_expand "<sse2_avx_avx512f>_vpermil<mode><mask_name>" |
abd4f58b | 17641 | [(set (match_operand:VF1 0 "register_operand") |
6fe5844b | 17642 | (vec_select:VF1 |
abd4f58b | 17643 | (match_operand:VF1 1 "nonimmediate_operand") |
17644 | (match_operand:SI 2 "const_0_to_255_operand")))] | |
5220cab6 | 17645 | "TARGET_AVX && <mask_mode512bit_condition>" |
af6d2927 | 17646 | { |
17647 | int mask = INTVAL (operands[2]); | |
17648 | rtx perm[<ssescalarnum>]; | |
17649 | ||
03ae25dc | 17650 | int i; |
17651 | for (i = 0; i < <ssescalarnum>; i = i + 4) | |
af6d2927 | 17652 | { |
03ae25dc | 17653 | perm[i] = GEN_INT (((mask >> 0) & 3) + i); |
17654 | perm[i + 1] = GEN_INT (((mask >> 2) & 3) + i); | |
17655 | perm[i + 2] = GEN_INT (((mask >> 4) & 3) + i); | |
17656 | perm[i + 3] = GEN_INT (((mask >> 6) & 3) + i); | |
af6d2927 | 17657 | } |
17658 | ||
17659 | operands[2] | |
17660 | = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (<ssescalarnum>, perm)); | |
17661 | }) | |
17662 | ||
5220cab6 | 17663 | (define_insn "*<sse2_avx_avx512f>_vpermilp<mode><mask_name>" |
e13e1b39 | 17664 | [(set (match_operand:VF 0 "register_operand" "=v") |
6fe5844b | 17665 | (vec_select:VF |
e13e1b39 | 17666 | (match_operand:VF 1 "nonimmediate_operand" "vm") |
4581fd42 | 17667 | (match_parallel 2 "" |
abd4f58b | 17668 | [(match_operand 3 "const_int_operand")])))] |
5220cab6 | 17669 | "TARGET_AVX && <mask_mode512bit_condition> |
4581fd42 | 17670 | && avx_vpermilp_parallel (operands[2], <MODE>mode)" |
af6d2927 | 17671 | { |
17672 | int mask = avx_vpermilp_parallel (operands[2], <MODE>mode) - 1; | |
17673 | operands[2] = GEN_INT (mask); | |
5220cab6 | 17674 | return "vpermil<ssemodesuffix>\t{%2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2}"; |
af6d2927 | 17675 | } |
ed30e0a6 | 17676 | [(set_attr "type" "sselog") |
00a0e418 | 17677 | (set_attr "prefix_extra" "1") |
17678 | (set_attr "length_immediate" "1") | |
5220cab6 | 17679 | (set_attr "prefix" "<mask_prefix>") |
6a3f5f59 | 17680 | (set_attr "mode" "<sseinsnmode>")]) |
ed30e0a6 | 17681 | |
5220cab6 | 17682 | (define_insn "<sse2_avx_avx512f>_vpermilvar<mode>3<mask_name>" |
e13e1b39 | 17683 | [(set (match_operand:VF 0 "register_operand" "=v") |
6fe5844b | 17684 | (unspec:VF |
e13e1b39 | 17685 | [(match_operand:VF 1 "register_operand" "v") |
17686 | (match_operand:<sseintvecmode> 2 "nonimmediate_operand" "vm")] | |
ed30e0a6 | 17687 | UNSPEC_VPERMIL))] |
5220cab6 | 17688 | "TARGET_AVX && <mask_mode512bit_condition>" |
17689 | "vpermil<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}" | |
ed30e0a6 | 17690 | [(set_attr "type" "sselog") |
00a0e418 | 17691 | (set_attr "prefix_extra" "1") |
6470d004 | 17692 | (set_attr "btver2_decode" "vector") |
5220cab6 | 17693 | (set_attr "prefix" "<mask_prefix>") |
6a3f5f59 | 17694 | (set_attr "mode" "<sseinsnmode>")]) |
17695 | ||
6500d329 | 17696 | (define_expand "<avx512>_vpermi2var<mode>3_maskz" |
17697 | [(match_operand:VI48F 0 "register_operand" "=v") | |
17698 | (match_operand:VI48F 1 "register_operand" "v") | |
9a5ea1d5 | 17699 | (match_operand:<sseintvecmode> 2 "register_operand" "0") |
6500d329 | 17700 | (match_operand:VI48F 3 "nonimmediate_operand" "vm") |
a31e7f46 | 17701 | (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")] |
9a5ea1d5 | 17702 | "TARGET_AVX512F" |
17703 | { | |
6500d329 | 17704 | emit_insn (gen_<avx512>_vpermi2var<mode>3_maskz_1 ( |
9a5ea1d5 | 17705 | operands[0], operands[1], operands[2], operands[3], |
17706 | CONST0_RTX (<MODE>mode), operands[4])); | |
17707 | DONE; | |
17708 | }) | |
17709 | ||
afee0628 | 17710 | (define_expand "<avx512>_vpermi2var<mode>3_maskz" |
17711 | [(match_operand:VI1_AVX512VL 0 "register_operand") | |
17712 | (match_operand:VI1_AVX512VL 1 "register_operand") | |
17713 | (match_operand:<sseintvecmode> 2 "register_operand") | |
17714 | (match_operand:VI1_AVX512VL 3 "nonimmediate_operand") | |
17715 | (match_operand:<avx512fmaskmode> 4 "register_operand")] | |
17716 | "TARGET_AVX512VBMI" | |
17717 | { | |
17718 | emit_insn (gen_<avx512>_vpermi2var<mode>3_maskz_1 ( | |
17719 | operands[0], operands[1], operands[2], operands[3], | |
17720 | CONST0_RTX (<MODE>mode), operands[4])); | |
17721 | DONE; | |
17722 | }) | |
17723 | ||
6500d329 | 17724 | (define_expand "<avx512>_vpermi2var<mode>3_maskz" |
17725 | [(match_operand:VI2_AVX512VL 0 "register_operand" "=v") | |
17726 | (match_operand:VI2_AVX512VL 1 "register_operand" "v") | |
17727 | (match_operand:<sseintvecmode> 2 "register_operand" "0") | |
17728 | (match_operand:VI2_AVX512VL 3 "nonimmediate_operand" "vm") | |
17729 | (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")] | |
17730 | "TARGET_AVX512BW" | |
17731 | { | |
17732 | emit_insn (gen_<avx512>_vpermi2var<mode>3_maskz_1 ( | |
17733 | operands[0], operands[1], operands[2], operands[3], | |
17734 | CONST0_RTX (<MODE>mode), operands[4])); | |
17735 | DONE; | |
17736 | }) | |
17737 | ||
17738 | (define_insn "<avx512>_vpermi2var<mode>3<sd_maskz_name>" | |
17739 | [(set (match_operand:VI48F 0 "register_operand" "=v") | |
17740 | (unspec:VI48F | |
17741 | [(match_operand:VI48F 1 "register_operand" "v") | |
8e6b975f | 17742 | (match_operand:<sseintvecmode> 2 "register_operand" "0") |
6500d329 | 17743 | (match_operand:VI48F 3 "nonimmediate_operand" "vm")] |
8e6b975f | 17744 | UNSPEC_VPERMI2))] |
17745 | "TARGET_AVX512F" | |
9a5ea1d5 | 17746 | "vpermi2<ssemodesuffix>\t{%3, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %3}" |
8e6b975f | 17747 | [(set_attr "type" "sselog") |
17748 | (set_attr "prefix" "evex") | |
17749 | (set_attr "mode" "<sseinsnmode>")]) | |
17750 | ||
afee0628 | 17751 | (define_insn "<avx512>_vpermi2var<mode>3<sd_maskz_name>" |
17752 | [(set (match_operand:VI1_AVX512VL 0 "register_operand" "=v") | |
17753 | (unspec:VI1_AVX512VL | |
17754 | [(match_operand:VI1_AVX512VL 1 "register_operand" "v") | |
17755 | (match_operand:<sseintvecmode> 2 "register_operand" "0") | |
17756 | (match_operand:VI1_AVX512VL 3 "nonimmediate_operand" "vm")] | |
17757 | UNSPEC_VPERMI2))] | |
17758 | "TARGET_AVX512VBMI" | |
17759 | "vpermi2<ssemodesuffix>\t{%3, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %3}" | |
17760 | [(set_attr "type" "sselog") | |
17761 | (set_attr "prefix" "evex") | |
17762 | (set_attr "mode" "<sseinsnmode>")]) | |
17763 | ||
6500d329 | 17764 | (define_insn "<avx512>_vpermi2var<mode>3<sd_maskz_name>" |
17765 | [(set (match_operand:VI2_AVX512VL 0 "register_operand" "=v") | |
17766 | (unspec:VI2_AVX512VL | |
17767 | [(match_operand:VI2_AVX512VL 1 "register_operand" "v") | |
17768 | (match_operand:<sseintvecmode> 2 "register_operand" "0") | |
17769 | (match_operand:VI2_AVX512VL 3 "nonimmediate_operand" "vm")] | |
17770 | UNSPEC_VPERMI2))] | |
17771 | "TARGET_AVX512BW" | |
17772 | "vpermi2<ssemodesuffix>\t{%3, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %3}" | |
17773 | [(set_attr "type" "sselog") | |
17774 | (set_attr "prefix" "evex") | |
17775 | (set_attr "mode" "<sseinsnmode>")]) | |
17776 | ||
17777 | (define_insn "<avx512>_vpermi2var<mode>3_mask" | |
17778 | [(set (match_operand:VI48F 0 "register_operand" "=v") | |
17779 | (vec_merge:VI48F | |
17780 | (unspec:VI48F | |
17781 | [(match_operand:VI48F 1 "register_operand" "v") | |
5220cab6 | 17782 | (match_operand:<sseintvecmode> 2 "register_operand" "0") |
6500d329 | 17783 | (match_operand:VI48F 3 "nonimmediate_operand" "vm")] |
5220cab6 | 17784 | UNSPEC_VPERMI2_MASK) |
17785 | (match_dup 0) | |
a31e7f46 | 17786 | (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))] |
5220cab6 | 17787 | "TARGET_AVX512F" |
17788 | "vpermi2<ssemodesuffix>\t{%3, %1, %0%{%4%}|%0%{%4%}, %1, %3}" | |
17789 | [(set_attr "type" "sselog") | |
17790 | (set_attr "prefix" "evex") | |
17791 | (set_attr "mode" "<sseinsnmode>")]) | |
17792 | ||
afee0628 | 17793 | (define_insn "<avx512>_vpermi2var<mode>3_mask" |
17794 | [(set (match_operand:VI1_AVX512VL 0 "register_operand" "=v") | |
17795 | (vec_merge:VI1_AVX512VL | |
17796 | (unspec:VI1_AVX512VL | |
17797 | [(match_operand:VI1_AVX512VL 1 "register_operand" "v") | |
17798 | (match_operand:<sseintvecmode> 2 "register_operand" "0") | |
17799 | (match_operand:VI1_AVX512VL 3 "nonimmediate_operand" "vm")] | |
17800 | UNSPEC_VPERMI2_MASK) | |
17801 | (match_dup 0) | |
17802 | (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))] | |
17803 | "TARGET_AVX512VBMI" | |
17804 | "vpermi2<ssemodesuffix>\t{%3, %1, %0%{%4%}|%0%{%4%}, %1, %3}" | |
17805 | [(set_attr "type" "sselog") | |
17806 | (set_attr "prefix" "evex") | |
17807 | (set_attr "mode" "<sseinsnmode>")]) | |
17808 | ||
6500d329 | 17809 | (define_insn "<avx512>_vpermi2var<mode>3_mask" |
17810 | [(set (match_operand:VI2_AVX512VL 0 "register_operand" "=v") | |
17811 | (vec_merge:VI2_AVX512VL | |
17812 | (unspec:VI2_AVX512VL | |
17813 | [(match_operand:VI2_AVX512VL 1 "register_operand" "v") | |
17814 | (match_operand:<sseintvecmode> 2 "register_operand" "0") | |
17815 | (match_operand:VI2_AVX512VL 3 "nonimmediate_operand" "vm")] | |
17816 | UNSPEC_VPERMI2_MASK) | |
17817 | (match_dup 0) | |
17818 | (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))] | |
17819 | "TARGET_AVX512BW" | |
17820 | "vpermi2<ssemodesuffix>\t{%3, %1, %0%{%4%}|%0%{%4%}, %1, %3}" | |
17821 | [(set_attr "type" "sselog") | |
17822 | (set_attr "prefix" "evex") | |
17823 | (set_attr "mode" "<sseinsnmode>")]) | |
17824 | ||
17825 | (define_expand "<avx512>_vpermt2var<mode>3_maskz" | |
17826 | [(match_operand:VI48F 0 "register_operand" "=v") | |
9a5ea1d5 | 17827 | (match_operand:<sseintvecmode> 1 "register_operand" "v") |
6500d329 | 17828 | (match_operand:VI48F 2 "register_operand" "0") |
17829 | (match_operand:VI48F 3 "nonimmediate_operand" "vm") | |
a31e7f46 | 17830 | (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")] |
9a5ea1d5 | 17831 | "TARGET_AVX512F" |
17832 | { | |
6500d329 | 17833 | emit_insn (gen_<avx512>_vpermt2var<mode>3_maskz_1 ( |
9a5ea1d5 | 17834 | operands[0], operands[1], operands[2], operands[3], |
17835 | CONST0_RTX (<MODE>mode), operands[4])); | |
17836 | DONE; | |
17837 | }) | |
17838 | ||
afee0628 | 17839 | (define_expand "<avx512>_vpermt2var<mode>3_maskz" |
17840 | [(match_operand:VI1_AVX512VL 0 "register_operand" "=v") | |
17841 | (match_operand:<sseintvecmode> 1 "register_operand" "v") | |
17842 | (match_operand:VI1_AVX512VL 2 "register_operand" "0") | |
17843 | (match_operand:VI1_AVX512VL 3 "nonimmediate_operand" "vm") | |
17844 | (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")] | |
17845 | "TARGET_AVX512VBMI" | |
17846 | { | |
17847 | emit_insn (gen_<avx512>_vpermt2var<mode>3_maskz_1 ( | |
17848 | operands[0], operands[1], operands[2], operands[3], | |
17849 | CONST0_RTX (<MODE>mode), operands[4])); | |
17850 | DONE; | |
17851 | }) | |
17852 | ||
6500d329 | 17853 | (define_expand "<avx512>_vpermt2var<mode>3_maskz" |
17854 | [(match_operand:VI2_AVX512VL 0 "register_operand" "=v") | |
17855 | (match_operand:<sseintvecmode> 1 "register_operand" "v") | |
17856 | (match_operand:VI2_AVX512VL 2 "register_operand" "0") | |
17857 | (match_operand:VI2_AVX512VL 3 "nonimmediate_operand" "vm") | |
17858 | (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")] | |
17859 | "TARGET_AVX512BW" | |
17860 | { | |
17861 | emit_insn (gen_<avx512>_vpermt2var<mode>3_maskz_1 ( | |
17862 | operands[0], operands[1], operands[2], operands[3], | |
17863 | CONST0_RTX (<MODE>mode), operands[4])); | |
17864 | DONE; | |
17865 | }) | |
17866 | ||
17867 | (define_insn "<avx512>_vpermt2var<mode>3<sd_maskz_name>" | |
17868 | [(set (match_operand:VI48F 0 "register_operand" "=v") | |
17869 | (unspec:VI48F | |
8e6b975f | 17870 | [(match_operand:<sseintvecmode> 1 "register_operand" "v") |
6500d329 | 17871 | (match_operand:VI48F 2 "register_operand" "0") |
17872 | (match_operand:VI48F 3 "nonimmediate_operand" "vm")] | |
8e6b975f | 17873 | UNSPEC_VPERMT2))] |
17874 | "TARGET_AVX512F" | |
9a5ea1d5 | 17875 | "vpermt2<ssemodesuffix>\t{%3, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %3}" |
8e6b975f | 17876 | [(set_attr "type" "sselog") |
17877 | (set_attr "prefix" "evex") | |
17878 | (set_attr "mode" "<sseinsnmode>")]) | |
ed30e0a6 | 17879 | |
afee0628 | 17880 | (define_insn "<avx512>_vpermt2var<mode>3<sd_maskz_name>" |
17881 | [(set (match_operand:VI1_AVX512VL 0 "register_operand" "=v") | |
17882 | (unspec:VI1_AVX512VL | |
17883 | [(match_operand:<sseintvecmode> 1 "register_operand" "v") | |
17884 | (match_operand:VI1_AVX512VL 2 "register_operand" "0") | |
17885 | (match_operand:VI1_AVX512VL 3 "nonimmediate_operand" "vm")] | |
17886 | UNSPEC_VPERMT2))] | |
17887 | "TARGET_AVX512VBMI" | |
17888 | "vpermt2<ssemodesuffix>\t{%3, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %3}" | |
17889 | [(set_attr "type" "sselog") | |
17890 | (set_attr "prefix" "evex") | |
17891 | (set_attr "mode" "<sseinsnmode>")]) | |
17892 | ||
6500d329 | 17893 | (define_insn "<avx512>_vpermt2var<mode>3<sd_maskz_name>" |
17894 | [(set (match_operand:VI2_AVX512VL 0 "register_operand" "=v") | |
17895 | (unspec:VI2_AVX512VL | |
17896 | [(match_operand:<sseintvecmode> 1 "register_operand" "v") | |
17897 | (match_operand:VI2_AVX512VL 2 "register_operand" "0") | |
17898 | (match_operand:VI2_AVX512VL 3 "nonimmediate_operand" "vm")] | |
17899 | UNSPEC_VPERMT2))] | |
17900 | "TARGET_AVX512BW" | |
17901 | "vpermt2<ssemodesuffix>\t{%3, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %3}" | |
17902 | [(set_attr "type" "sselog") | |
17903 | (set_attr "prefix" "evex") | |
17904 | (set_attr "mode" "<sseinsnmode>")]) | |
17905 | ||
17906 | (define_insn "<avx512>_vpermt2var<mode>3_mask" | |
17907 | [(set (match_operand:VI48F 0 "register_operand" "=v") | |
17908 | (vec_merge:VI48F | |
17909 | (unspec:VI48F | |
5220cab6 | 17910 | [(match_operand:<sseintvecmode> 1 "register_operand" "v") |
6500d329 | 17911 | (match_operand:VI48F 2 "register_operand" "0") |
17912 | (match_operand:VI48F 3 "nonimmediate_operand" "vm")] | |
5220cab6 | 17913 | UNSPEC_VPERMT2) |
17914 | (match_dup 2) | |
a31e7f46 | 17915 | (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))] |
5220cab6 | 17916 | "TARGET_AVX512F" |
17917 | "vpermt2<ssemodesuffix>\t{%3, %1, %0%{%4%}|%0%{%4%}, %1, %3}" | |
17918 | [(set_attr "type" "sselog") | |
17919 | (set_attr "prefix" "evex") | |
17920 | (set_attr "mode" "<sseinsnmode>")]) | |
17921 | ||
afee0628 | 17922 | (define_insn "<avx512>_vpermt2var<mode>3_mask" |
17923 | [(set (match_operand:VI1_AVX512VL 0 "register_operand" "=v") | |
17924 | (vec_merge:VI1_AVX512VL | |
17925 | (unspec:VI1_AVX512VL | |
17926 | [(match_operand:<sseintvecmode> 1 "register_operand" "v") | |
17927 | (match_operand:VI1_AVX512VL 2 "register_operand" "0") | |
17928 | (match_operand:VI1_AVX512VL 3 "nonimmediate_operand" "vm")] | |
17929 | UNSPEC_VPERMT2) | |
17930 | (match_dup 2) | |
17931 | (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))] | |
17932 | "TARGET_AVX512VBMI" | |
17933 | "vpermt2<ssemodesuffix>\t{%3, %1, %0%{%4%}|%0%{%4%}, %1, %3}" | |
17934 | [(set_attr "type" "sselog") | |
17935 | (set_attr "prefix" "evex") | |
17936 | (set_attr "mode" "<sseinsnmode>")]) | |
17937 | ||
6500d329 | 17938 | (define_insn "<avx512>_vpermt2var<mode>3_mask" |
17939 | [(set (match_operand:VI2_AVX512VL 0 "register_operand" "=v") | |
17940 | (vec_merge:VI2_AVX512VL | |
17941 | (unspec:VI2_AVX512VL | |
17942 | [(match_operand:<sseintvecmode> 1 "register_operand" "v") | |
17943 | (match_operand:VI2_AVX512VL 2 "register_operand" "0") | |
17944 | (match_operand:VI2_AVX512VL 3 "nonimmediate_operand" "vm")] | |
17945 | UNSPEC_VPERMT2) | |
17946 | (match_dup 2) | |
17947 | (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))] | |
17948 | "TARGET_AVX512BW" | |
17949 | "vpermt2<ssemodesuffix>\t{%3, %1, %0%{%4%}|%0%{%4%}, %1, %3}" | |
17950 | [(set_attr "type" "sselog") | |
17951 | (set_attr "prefix" "evex") | |
17952 | (set_attr "mode" "<sseinsnmode>")]) | |
17953 | ||
f2139d4c | 17954 | (define_expand "avx_vperm2f128<mode>3" |
abd4f58b | 17955 | [(set (match_operand:AVX256MODE2P 0 "register_operand") |
f2139d4c | 17956 | (unspec:AVX256MODE2P |
abd4f58b | 17957 | [(match_operand:AVX256MODE2P 1 "register_operand") |
17958 | (match_operand:AVX256MODE2P 2 "nonimmediate_operand") | |
17959 | (match_operand:SI 3 "const_0_to_255_operand")] | |
f2139d4c | 17960 | UNSPEC_VPERMIL2F128))] |
17961 | "TARGET_AVX" | |
17962 | { | |
172d9cbe | 17963 | int mask = INTVAL (operands[3]); |
f2139d4c | 17964 | if ((mask & 0x88) == 0) |
17965 | { | |
17966 | rtx perm[<ssescalarnum>], t1, t2; | |
17967 | int i, base, nelt = <ssescalarnum>, nelt2 = nelt / 2; | |
17968 | ||
17969 | base = (mask & 3) * nelt2; | |
17970 | for (i = 0; i < nelt2; ++i) | |
17971 | perm[i] = GEN_INT (base + i); | |
17972 | ||
17973 | base = ((mask >> 4) & 3) * nelt2; | |
17974 | for (i = 0; i < nelt2; ++i) | |
17975 | perm[i + nelt2] = GEN_INT (base + i); | |
17976 | ||
63d5e521 | 17977 | t2 = gen_rtx_VEC_CONCAT (<ssedoublevecmode>mode, |
f2139d4c | 17978 | operands[1], operands[2]); |
17979 | t1 = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nelt, perm)); | |
17980 | t2 = gen_rtx_VEC_SELECT (<MODE>mode, t2, t1); | |
d1f9b275 | 17981 | t2 = gen_rtx_SET (operands[0], t2); |
f2139d4c | 17982 | emit_insn (t2); |
17983 | DONE; | |
17984 | } | |
17985 | }) | |
17986 | ||
17987 | ;; Note that bits 7 and 3 of the imm8 allow lanes to be zeroed, which | |
17988 | ;; means that in order to represent this properly in rtl we'd have to | |
17989 | ;; nest *another* vec_concat with a zero operand and do the select from | |
17990 | ;; a 4x wide vector. That doesn't seem very nice. | |
17991 | (define_insn "*avx_vperm2f128<mode>_full" | |
ed30e0a6 | 17992 | [(set (match_operand:AVX256MODE2P 0 "register_operand" "=x") |
17993 | (unspec:AVX256MODE2P | |
17994 | [(match_operand:AVX256MODE2P 1 "register_operand" "x") | |
17995 | (match_operand:AVX256MODE2P 2 "nonimmediate_operand" "xm") | |
17996 | (match_operand:SI 3 "const_0_to_255_operand" "n")] | |
17997 | UNSPEC_VPERMIL2F128))] | |
17998 | "TARGET_AVX" | |
154d1782 | 17999 | "vperm2<i128>\t{%3, %2, %1, %0|%0, %1, %2, %3}" |
ed30e0a6 | 18000 | [(set_attr "type" "sselog") |
00a0e418 | 18001 | (set_attr "prefix_extra" "1") |
18002 | (set_attr "length_immediate" "1") | |
ed30e0a6 | 18003 | (set_attr "prefix" "vex") |
154d1782 | 18004 | (set_attr "mode" "<sseinsnmode>")]) |
ed30e0a6 | 18005 | |
f2139d4c | 18006 | (define_insn "*avx_vperm2f128<mode>_nozero" |
18007 | [(set (match_operand:AVX256MODE2P 0 "register_operand" "=x") | |
18008 | (vec_select:AVX256MODE2P | |
63d5e521 | 18009 | (vec_concat:<ssedoublevecmode> |
f2139d4c | 18010 | (match_operand:AVX256MODE2P 1 "register_operand" "x") |
18011 | (match_operand:AVX256MODE2P 2 "nonimmediate_operand" "xm")) | |
4581fd42 | 18012 | (match_parallel 3 "" |
abd4f58b | 18013 | [(match_operand 4 "const_int_operand")])))] |
4581fd42 | 18014 | "TARGET_AVX |
18015 | && avx_vperm2f128_parallel (operands[3], <MODE>mode)" | |
f2139d4c | 18016 | { |
18017 | int mask = avx_vperm2f128_parallel (operands[3], <MODE>mode) - 1; | |
7fc62525 | 18018 | if (mask == 0x12) |
18019 | return "vinsert<i128>\t{$0, %x2, %1, %0|%0, %1, %x2, 0}"; | |
18020 | if (mask == 0x20) | |
18021 | return "vinsert<i128>\t{$1, %x2, %1, %0|%0, %1, %x2, 1}"; | |
f2139d4c | 18022 | operands[3] = GEN_INT (mask); |
154d1782 | 18023 | return "vperm2<i128>\t{%3, %2, %1, %0|%0, %1, %2, %3}"; |
f2139d4c | 18024 | } |
18025 | [(set_attr "type" "sselog") | |
18026 | (set_attr "prefix_extra" "1") | |
18027 | (set_attr "length_immediate" "1") | |
18028 | (set_attr "prefix" "vex") | |
154d1782 | 18029 | (set_attr "mode" "<sseinsnmode>")]) |
f2139d4c | 18030 | |
bafd306d | 18031 | (define_insn "*ssse3_palignr<mode>_perm" |
18032 | [(set (match_operand:V_128 0 "register_operand" "=x,x") | |
18033 | (vec_select:V_128 | |
18034 | (match_operand:V_128 1 "register_operand" "0,x") | |
18035 | (match_parallel 2 "palignr_operand" | |
18036 | [(match_operand 3 "const_int_operand" "n, n")])))] | |
18037 | "TARGET_SSSE3" | |
18038 | { | |
6e256598 | 18039 | operands[2] = |
18040 | GEN_INT (INTVAL (operands[3]) * GET_MODE_UNIT_SIZE (GET_MODE (operands[0]))); | |
bafd306d | 18041 | |
18042 | switch (which_alternative) | |
18043 | { | |
18044 | case 0: | |
18045 | return "palignr\t{%2, %1, %0|%0, %1, %2}"; | |
18046 | case 1: | |
18047 | return "vpalignr\t{%2, %1, %1, %0|%0, %1, %1, %2}"; | |
18048 | default: | |
18049 | gcc_unreachable (); | |
18050 | } | |
18051 | } | |
18052 | [(set_attr "isa" "noavx,avx") | |
18053 | (set_attr "type" "sseishft") | |
18054 | (set_attr "atom_unit" "sishuf") | |
18055 | (set_attr "prefix_data16" "1,*") | |
18056 | (set_attr "prefix_extra" "1") | |
18057 | (set_attr "length_immediate" "1") | |
18058 | (set_attr "prefix" "orig,vex")]) | |
18059 | ||
4e303d3a | 18060 | (define_expand "avx512vl_vinsert<mode>" |
18061 | [(match_operand:VI48F_256 0 "register_operand") | |
18062 | (match_operand:VI48F_256 1 "register_operand") | |
18063 | (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand") | |
18064 | (match_operand:SI 3 "const_0_to_1_operand") | |
18065 | (match_operand:VI48F_256 4 "register_operand") | |
18066 | (match_operand:<avx512fmaskmode> 5 "register_operand")] | |
18067 | "TARGET_AVX512VL" | |
18068 | { | |
18069 | rtx (*insn)(rtx, rtx, rtx, rtx, rtx); | |
18070 | ||
18071 | switch (INTVAL (operands[3])) | |
18072 | { | |
18073 | case 0: | |
18074 | insn = gen_vec_set_lo_<mode>_mask; | |
18075 | break; | |
18076 | case 1: | |
18077 | insn = gen_vec_set_hi_<mode>_mask; | |
18078 | break; | |
18079 | default: | |
18080 | gcc_unreachable (); | |
18081 | } | |
18082 | ||
18083 | emit_insn (insn (operands[0], operands[1], operands[2], operands[4], | |
18084 | operands[5])); | |
18085 | DONE; | |
18086 | }) | |
18087 | ||
ed30e0a6 | 18088 | (define_expand "avx_vinsertf128<mode>" |
abd4f58b | 18089 | [(match_operand:V_256 0 "register_operand") |
18090 | (match_operand:V_256 1 "register_operand") | |
18091 | (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand") | |
18092 | (match_operand:SI 3 "const_0_to_1_operand")] | |
ed30e0a6 | 18093 | "TARGET_AVX" |
18094 | { | |
c3fa352f | 18095 | rtx (*insn)(rtx, rtx, rtx); |
18096 | ||
ed30e0a6 | 18097 | switch (INTVAL (operands[3])) |
18098 | { | |
18099 | case 0: | |
c3fa352f | 18100 | insn = gen_vec_set_lo_<mode>; |
ed30e0a6 | 18101 | break; |
18102 | case 1: | |
c3fa352f | 18103 | insn = gen_vec_set_hi_<mode>; |
ed30e0a6 | 18104 | break; |
18105 | default: | |
18106 | gcc_unreachable (); | |
18107 | } | |
c3fa352f | 18108 | |
18109 | emit_insn (insn (operands[0], operands[1], operands[2])); | |
ed30e0a6 | 18110 | DONE; |
18111 | }) | |
18112 | ||
4e303d3a | 18113 | (define_insn "vec_set_lo_<mode><mask_name>" |
18114 | [(set (match_operand:VI8F_256 0 "register_operand" "=v") | |
6fe5844b | 18115 | (vec_concat:VI8F_256 |
4e303d3a | 18116 | (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm") |
63d5e521 | 18117 | (vec_select:<ssehalfvecmode> |
4e303d3a | 18118 | (match_operand:VI8F_256 1 "register_operand" "v") |
ed30e0a6 | 18119 | (parallel [(const_int 2) (const_int 3)]))))] |
18120 | "TARGET_AVX" | |
4e303d3a | 18121 | { |
18122 | if (TARGET_AVX512VL) | |
18123 | return "vinsert<shuffletype>64x2\t{$0x0, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, 0x0}"; | |
18124 | else | |
18125 | return "vinsert<i128>\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"; | |
18126 | } | |
ed30e0a6 | 18127 | [(set_attr "type" "sselog") |
00a0e418 | 18128 | (set_attr "prefix_extra" "1") |
18129 | (set_attr "length_immediate" "1") | |
ed30e0a6 | 18130 | (set_attr "prefix" "vex") |
154d1782 | 18131 | (set_attr "mode" "<sseinsnmode>")]) |
ed30e0a6 | 18132 | |
4e303d3a | 18133 | (define_insn "vec_set_hi_<mode><mask_name>" |
18134 | [(set (match_operand:VI8F_256 0 "register_operand" "=v") | |
6fe5844b | 18135 | (vec_concat:VI8F_256 |
63d5e521 | 18136 | (vec_select:<ssehalfvecmode> |
4e303d3a | 18137 | (match_operand:VI8F_256 1 "register_operand" "v") |
ed30e0a6 | 18138 | (parallel [(const_int 0) (const_int 1)])) |
4e303d3a | 18139 | (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm")))] |
ed30e0a6 | 18140 | "TARGET_AVX" |
4e303d3a | 18141 | { |
18142 | if (TARGET_AVX512VL) | |
18143 | return "vinsert<shuffletype>64x2\t{$0x1, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, 0x1}"; | |
18144 | else | |
18145 | return "vinsert<i128>\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"; | |
18146 | } | |
ed30e0a6 | 18147 | [(set_attr "type" "sselog") |
00a0e418 | 18148 | (set_attr "prefix_extra" "1") |
18149 | (set_attr "length_immediate" "1") | |
ed30e0a6 | 18150 | (set_attr "prefix" "vex") |
154d1782 | 18151 | (set_attr "mode" "<sseinsnmode>")]) |
ed30e0a6 | 18152 | |
4e303d3a | 18153 | (define_insn "vec_set_lo_<mode><mask_name>" |
18154 | [(set (match_operand:VI4F_256 0 "register_operand" "=v") | |
6fe5844b | 18155 | (vec_concat:VI4F_256 |
4e303d3a | 18156 | (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm") |
63d5e521 | 18157 | (vec_select:<ssehalfvecmode> |
4e303d3a | 18158 | (match_operand:VI4F_256 1 "register_operand" "v") |
ed30e0a6 | 18159 | (parallel [(const_int 4) (const_int 5) |
18160 | (const_int 6) (const_int 7)]))))] | |
18161 | "TARGET_AVX" | |
4e303d3a | 18162 | { |
18163 | if (TARGET_AVX512VL) | |
18164 | return "vinsert<shuffletype>32x4\t{$0x0, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, 0x0}"; | |
18165 | else | |
18166 | return "vinsert<i128>\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"; | |
18167 | } | |
ed30e0a6 | 18168 | [(set_attr "type" "sselog") |
00a0e418 | 18169 | (set_attr "prefix_extra" "1") |
18170 | (set_attr "length_immediate" "1") | |
ed30e0a6 | 18171 | (set_attr "prefix" "vex") |
154d1782 | 18172 | (set_attr "mode" "<sseinsnmode>")]) |
ed30e0a6 | 18173 | |
4e303d3a | 18174 | (define_insn "vec_set_hi_<mode><mask_name>" |
18175 | [(set (match_operand:VI4F_256 0 "register_operand" "=v") | |
6fe5844b | 18176 | (vec_concat:VI4F_256 |
63d5e521 | 18177 | (vec_select:<ssehalfvecmode> |
4e303d3a | 18178 | (match_operand:VI4F_256 1 "register_operand" "v") |
ed30e0a6 | 18179 | (parallel [(const_int 0) (const_int 1) |
18180 | (const_int 2) (const_int 3)])) | |
4e303d3a | 18181 | (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm")))] |
ed30e0a6 | 18182 | "TARGET_AVX" |
4e303d3a | 18183 | { |
18184 | if (TARGET_AVX512VL) | |
18185 | return "vinsert<shuffletype>32x4\t{$0x1, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, 0x1}"; | |
18186 | else | |
18187 | return "vinsert<i128>\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"; | |
18188 | } | |
ed30e0a6 | 18189 | [(set_attr "type" "sselog") |
00a0e418 | 18190 | (set_attr "prefix_extra" "1") |
18191 | (set_attr "length_immediate" "1") | |
ed30e0a6 | 18192 | (set_attr "prefix" "vex") |
154d1782 | 18193 | (set_attr "mode" "<sseinsnmode>")]) |
ed30e0a6 | 18194 | |
18195 | (define_insn "vec_set_lo_v16hi" | |
18196 | [(set (match_operand:V16HI 0 "register_operand" "=x") | |
18197 | (vec_concat:V16HI | |
18198 | (match_operand:V8HI 2 "nonimmediate_operand" "xm") | |
18199 | (vec_select:V8HI | |
18200 | (match_operand:V16HI 1 "register_operand" "x") | |
18201 | (parallel [(const_int 8) (const_int 9) | |
18202 | (const_int 10) (const_int 11) | |
18203 | (const_int 12) (const_int 13) | |
18204 | (const_int 14) (const_int 15)]))))] | |
18205 | "TARGET_AVX" | |
154d1782 | 18206 | "vinsert%~128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}" |
ed30e0a6 | 18207 | [(set_attr "type" "sselog") |
00a0e418 | 18208 | (set_attr "prefix_extra" "1") |
18209 | (set_attr "length_immediate" "1") | |
ed30e0a6 | 18210 | (set_attr "prefix" "vex") |
154d1782 | 18211 | (set_attr "mode" "OI")]) |
ed30e0a6 | 18212 | |
18213 | (define_insn "vec_set_hi_v16hi" | |
18214 | [(set (match_operand:V16HI 0 "register_operand" "=x") | |
18215 | (vec_concat:V16HI | |
18216 | (vec_select:V8HI | |
18217 | (match_operand:V16HI 1 "register_operand" "x") | |
18218 | (parallel [(const_int 0) (const_int 1) | |
18219 | (const_int 2) (const_int 3) | |
18220 | (const_int 4) (const_int 5) | |
18221 | (const_int 6) (const_int 7)])) | |
18222 | (match_operand:V8HI 2 "nonimmediate_operand" "xm")))] | |
18223 | "TARGET_AVX" | |
154d1782 | 18224 | "vinsert%~128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}" |
ed30e0a6 | 18225 | [(set_attr "type" "sselog") |
00a0e418 | 18226 | (set_attr "prefix_extra" "1") |
18227 | (set_attr "length_immediate" "1") | |
ed30e0a6 | 18228 | (set_attr "prefix" "vex") |
154d1782 | 18229 | (set_attr "mode" "OI")]) |
ed30e0a6 | 18230 | |
18231 | (define_insn "vec_set_lo_v32qi" | |
18232 | [(set (match_operand:V32QI 0 "register_operand" "=x") | |
18233 | (vec_concat:V32QI | |
18234 | (match_operand:V16QI 2 "nonimmediate_operand" "xm") | |
18235 | (vec_select:V16QI | |
18236 | (match_operand:V32QI 1 "register_operand" "x") | |
18237 | (parallel [(const_int 16) (const_int 17) | |
18238 | (const_int 18) (const_int 19) | |
18239 | (const_int 20) (const_int 21) | |
18240 | (const_int 22) (const_int 23) | |
18241 | (const_int 24) (const_int 25) | |
18242 | (const_int 26) (const_int 27) | |
18243 | (const_int 28) (const_int 29) | |
18244 | (const_int 30) (const_int 31)]))))] | |
18245 | "TARGET_AVX" | |
154d1782 | 18246 | "vinsert%~128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}" |
ed30e0a6 | 18247 | [(set_attr "type" "sselog") |
00a0e418 | 18248 | (set_attr "prefix_extra" "1") |
18249 | (set_attr "length_immediate" "1") | |
ed30e0a6 | 18250 | (set_attr "prefix" "vex") |
154d1782 | 18251 | (set_attr "mode" "OI")]) |
ed30e0a6 | 18252 | |
18253 | (define_insn "vec_set_hi_v32qi" | |
18254 | [(set (match_operand:V32QI 0 "register_operand" "=x") | |
18255 | (vec_concat:V32QI | |
18256 | (vec_select:V16QI | |
18257 | (match_operand:V32QI 1 "register_operand" "x") | |
18258 | (parallel [(const_int 0) (const_int 1) | |
18259 | (const_int 2) (const_int 3) | |
18260 | (const_int 4) (const_int 5) | |
18261 | (const_int 6) (const_int 7) | |
18262 | (const_int 8) (const_int 9) | |
18263 | (const_int 10) (const_int 11) | |
18264 | (const_int 12) (const_int 13) | |
18265 | (const_int 14) (const_int 15)])) | |
18266 | (match_operand:V16QI 2 "nonimmediate_operand" "xm")))] | |
18267 | "TARGET_AVX" | |
154d1782 | 18268 | "vinsert%~128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}" |
ed30e0a6 | 18269 | [(set_attr "type" "sselog") |
00a0e418 | 18270 | (set_attr "prefix_extra" "1") |
18271 | (set_attr "length_immediate" "1") | |
ed30e0a6 | 18272 | (set_attr "prefix" "vex") |
154d1782 | 18273 | (set_attr "mode" "OI")]) |
ed30e0a6 | 18274 | |
c512f3a4 | 18275 | (define_insn "<avx_avx2>_maskload<ssemodesuffix><avxsizesuffix>" |
18276 | [(set (match_operand:V48_AVX2 0 "register_operand" "=x") | |
5deb404d | 18277 | (unspec:V48_AVX2 |
c512f3a4 | 18278 | [(match_operand:<sseintvecmode> 2 "register_operand" "x") |
18279 | (match_operand:V48_AVX2 1 "memory_operand" "m")] | |
458af25e | 18280 | UNSPEC_MASKMOV))] |
18281 | "TARGET_AVX" | |
c512f3a4 | 18282 | "v<sseintprefix>maskmov<ssemodesuffix>\t{%1, %2, %0|%0, %2, %1}" |
5deb404d | 18283 | [(set_attr "type" "sselog1") |
18284 | (set_attr "prefix_extra" "1") | |
18285 | (set_attr "prefix" "vex") | |
6470d004 | 18286 | (set_attr "btver2_decode" "vector") |
5deb404d | 18287 | (set_attr "mode" "<sseinsnmode>")]) |
18288 | ||
c512f3a4 | 18289 | (define_insn "<avx_avx2>_maskstore<ssemodesuffix><avxsizesuffix>" |
ba11bab4 | 18290 | [(set (match_operand:V48_AVX2 0 "memory_operand" "+m") |
c512f3a4 | 18291 | (unspec:V48_AVX2 |
458af25e | 18292 | [(match_operand:<sseintvecmode> 1 "register_operand" "x") |
c512f3a4 | 18293 | (match_operand:V48_AVX2 2 "register_operand" "x") |
dcab66ec | 18294 | (match_dup 0)] |
18295 | UNSPEC_MASKMOV))] | |
458af25e | 18296 | "TARGET_AVX" |
c512f3a4 | 18297 | "v<sseintprefix>maskmov<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}" |
ed30e0a6 | 18298 | [(set_attr "type" "sselog1") |
00a0e418 | 18299 | (set_attr "prefix_extra" "1") |
ed30e0a6 | 18300 | (set_attr "prefix" "vex") |
6470d004 | 18301 | (set_attr "btver2_decode" "vector") |
c512f3a4 | 18302 | (set_attr "mode" "<sseinsnmode>")]) |
ed30e0a6 | 18303 | |
cf5489c1 | 18304 | (define_expand "maskload<mode><sseintvecmodelower>" |
c71d3c24 | 18305 | [(set (match_operand:V48_AVX2 0 "register_operand") |
18306 | (unspec:V48_AVX2 | |
18307 | [(match_operand:<sseintvecmode> 2 "register_operand") | |
18308 | (match_operand:V48_AVX2 1 "memory_operand")] | |
18309 | UNSPEC_MASKMOV))] | |
18310 | "TARGET_AVX") | |
18311 | ||
cf5489c1 | 18312 | (define_expand "maskload<mode><avx512fmaskmodelower>" |
18313 | [(set (match_operand:V48_AVX512VL 0 "register_operand") | |
18314 | (vec_merge:V48_AVX512VL | |
18315 | (match_operand:V48_AVX512VL 1 "memory_operand") | |
18316 | (match_dup 0) | |
18317 | (match_operand:<avx512fmaskmode> 2 "register_operand")))] | |
18318 | "TARGET_AVX512F") | |
18319 | ||
18320 | (define_expand "maskload<mode><avx512fmaskmodelower>" | |
18321 | [(set (match_operand:VI12_AVX512VL 0 "register_operand") | |
18322 | (vec_merge:VI12_AVX512VL | |
18323 | (match_operand:VI12_AVX512VL 1 "memory_operand") | |
18324 | (match_dup 0) | |
18325 | (match_operand:<avx512fmaskmode> 2 "register_operand")))] | |
18326 | "TARGET_AVX512BW") | |
18327 | ||
18328 | (define_expand "maskstore<mode><sseintvecmodelower>" | |
c71d3c24 | 18329 | [(set (match_operand:V48_AVX2 0 "memory_operand") |
18330 | (unspec:V48_AVX2 | |
18331 | [(match_operand:<sseintvecmode> 2 "register_operand") | |
18332 | (match_operand:V48_AVX2 1 "register_operand") | |
18333 | (match_dup 0)] | |
18334 | UNSPEC_MASKMOV))] | |
18335 | "TARGET_AVX") | |
18336 | ||
cf5489c1 | 18337 | (define_expand "maskstore<mode><avx512fmaskmodelower>" |
18338 | [(set (match_operand:V48_AVX512VL 0 "memory_operand") | |
18339 | (vec_merge:V48_AVX512VL | |
18340 | (match_operand:V48_AVX512VL 1 "register_operand") | |
18341 | (match_dup 0) | |
18342 | (match_operand:<avx512fmaskmode> 2 "register_operand")))] | |
18343 | "TARGET_AVX512F") | |
18344 | ||
18345 | (define_expand "maskstore<mode><avx512fmaskmodelower>" | |
18346 | [(set (match_operand:VI12_AVX512VL 0 "memory_operand") | |
18347 | (vec_merge:VI12_AVX512VL | |
18348 | (match_operand:VI12_AVX512VL 1 "register_operand") | |
18349 | (match_dup 0) | |
18350 | (match_operand:<avx512fmaskmode> 2 "register_operand")))] | |
18351 | "TARGET_AVX512BW") | |
18352 | ||
675d6e0d | 18353 | (define_insn_and_split "avx_<castmode><avxsizesuffix>_<castmode>" |
3d1a0207 | 18354 | [(set (match_operand:AVX256MODE2P 0 "nonimmediate_operand" "=x,m") |
ed30e0a6 | 18355 | (unspec:AVX256MODE2P |
63d5e521 | 18356 | [(match_operand:<ssehalfvecmode> 1 "nonimmediate_operand" "xm,x")] |
ed30e0a6 | 18357 | UNSPEC_CAST))] |
18358 | "TARGET_AVX" | |
3d1a0207 | 18359 | "#" |
18360 | "&& reload_completed" | |
18361 | [(const_int 0)] | |
ed30e0a6 | 18362 | { |
03f65847 | 18363 | rtx op0 = operands[0]; |
3d1a0207 | 18364 | rtx op1 = operands[1]; |
03f65847 | 18365 | if (REG_P (op0)) |
18366 | op0 = gen_rtx_REG (<ssehalfvecmode>mode, REGNO (op0)); | |
33541f98 | 18367 | else |
3d1a0207 | 18368 | op1 = gen_rtx_REG (<MODE>mode, REGNO (op1)); |
03f65847 | 18369 | emit_move_insn (op0, op1); |
3d1a0207 | 18370 | DONE; |
18371 | }) | |
ed30e0a6 | 18372 | |
18373 | (define_expand "vec_init<mode>" | |
abd4f58b | 18374 | [(match_operand:V_256 0 "register_operand") |
18375 | (match_operand 1)] | |
ed30e0a6 | 18376 | "TARGET_AVX" |
18377 | { | |
18378 | ix86_expand_vector_init (false, operands[0], operands[1]); | |
18379 | DONE; | |
18380 | }) | |
18381 | ||
8e6b975f | 18382 | (define_expand "vec_init<mode>" |
da2989a5 | 18383 | [(match_operand:VF48_I1248 0 "register_operand") |
8e6b975f | 18384 | (match_operand 1)] |
18385 | "TARGET_AVX512F" | |
18386 | { | |
18387 | ix86_expand_vector_init (false, operands[0], operands[1]); | |
18388 | DONE; | |
18389 | }) | |
18390 | ||
fcb19554 | 18391 | (define_insn "<avx2_avx512>_ashrv<mode><mask_name>" |
7da26bee | 18392 | [(set (match_operand:VI48_AVX512F_AVX512VL 0 "register_operand" "=v") |
18393 | (ashiftrt:VI48_AVX512F_AVX512VL | |
18394 | (match_operand:VI48_AVX512F_AVX512VL 1 "register_operand" "v") | |
18395 | (match_operand:VI48_AVX512F_AVX512VL 2 "nonimmediate_operand" "vm")))] | |
5220cab6 | 18396 | "TARGET_AVX2 && <mask_mode512bit_condition>" |
18397 | "vpsrav<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}" | |
5deb404d | 18398 | [(set_attr "type" "sseishft") |
f062acd7 | 18399 | (set_attr "prefix" "maybe_evex") |
7d079352 | 18400 | (set_attr "mode" "<sseinsnmode>")]) |
5deb404d | 18401 | |
fcb19554 | 18402 | (define_insn "<avx2_avx512>_ashrv<mode><mask_name>" |
7da26bee | 18403 | [(set (match_operand:VI2_AVX512VL 0 "register_operand" "=v") |
18404 | (ashiftrt:VI2_AVX512VL | |
18405 | (match_operand:VI2_AVX512VL 1 "register_operand" "v") | |
18406 | (match_operand:VI2_AVX512VL 2 "nonimmediate_operand" "vm")))] | |
18407 | "TARGET_AVX512BW" | |
18408 | "vpsravw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}" | |
18409 | [(set_attr "type" "sseishft") | |
18410 | (set_attr "prefix" "maybe_evex") | |
18411 | (set_attr "mode" "<sseinsnmode>")]) | |
18412 | ||
fcb19554 | 18413 | (define_insn "<avx2_avx512>_<shift_insn>v<mode><mask_name>" |
db3a6e9c | 18414 | [(set (match_operand:VI48_AVX512F 0 "register_operand" "=v") |
18415 | (any_lshift:VI48_AVX512F | |
18416 | (match_operand:VI48_AVX512F 1 "register_operand" "v") | |
18417 | (match_operand:VI48_AVX512F 2 "nonimmediate_operand" "vm")))] | |
5220cab6 | 18418 | "TARGET_AVX2 && <mask_mode512bit_condition>" |
18419 | "vp<vshift>v<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}" | |
5deb404d | 18420 | [(set_attr "type" "sseishft") |
f062acd7 | 18421 | (set_attr "prefix" "maybe_evex") |
5deb404d | 18422 | (set_attr "mode" "<sseinsnmode>")]) |
db3a6e9c | 18423 | |
fcb19554 | 18424 | (define_insn "<avx2_avx512>_<shift_insn>v<mode><mask_name>" |
db3a6e9c | 18425 | [(set (match_operand:VI2_AVX512VL 0 "register_operand" "=v") |
18426 | (any_lshift:VI2_AVX512VL | |
18427 | (match_operand:VI2_AVX512VL 1 "register_operand" "v") | |
18428 | (match_operand:VI2_AVX512VL 2 "nonimmediate_operand" "vm")))] | |
18429 | "TARGET_AVX512BW" | |
18430 | "vp<vshift>v<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}" | |
18431 | [(set_attr "type" "sseishft") | |
18432 | (set_attr "prefix" "maybe_evex") | |
18433 | (set_attr "mode" "<sseinsnmode>")]) | |
5deb404d | 18434 | |
12cbfa26 | 18435 | (define_insn "avx_vec_concat<mode>" |
6615b722 | 18436 | [(set (match_operand:V_256_512 0 "register_operand" "=x,x") |
18437 | (vec_concat:V_256_512 | |
63d5e521 | 18438 | (match_operand:<ssehalfvecmode> 1 "register_operand" "x,x") |
18439 | (match_operand:<ssehalfvecmode> 2 "vector_move_operand" "xm,C")))] | |
ed30e0a6 | 18440 | "TARGET_AVX" |
18441 | { | |
18442 | switch (which_alternative) | |
18443 | { | |
18444 | case 0: | |
6615b722 | 18445 | return "vinsert<i128>\t{$0x1, %2, %<concat_tg_mode>1, %0|%0, %<concat_tg_mode>1, %2, 0x1}"; |
ed30e0a6 | 18446 | case 1: |
18447 | switch (get_attr_mode (insn)) | |
5deb404d | 18448 | { |
6615b722 | 18449 | case MODE_V16SF: |
18450 | return "vmovaps\t{%1, %t0|%t0, %1}"; | |
18451 | case MODE_V8DF: | |
18452 | return "vmovapd\t{%1, %t0|%t0, %1}"; | |
ed30e0a6 | 18453 | case MODE_V8SF: |
18454 | return "vmovaps\t{%1, %x0|%x0, %1}"; | |
18455 | case MODE_V4DF: | |
18456 | return "vmovapd\t{%1, %x0|%x0, %1}"; | |
6615b722 | 18457 | case MODE_XI: |
18458 | return "vmovdqa\t{%1, %t0|%t0, %1}"; | |
18459 | case MODE_OI: | |
ed30e0a6 | 18460 | return "vmovdqa\t{%1, %x0|%x0, %1}"; |
6615b722 | 18461 | default: |
18462 | gcc_unreachable (); | |
ed30e0a6 | 18463 | } |
18464 | default: | |
18465 | gcc_unreachable (); | |
18466 | } | |
18467 | } | |
18468 | [(set_attr "type" "sselog,ssemov") | |
00a0e418 | 18469 | (set_attr "prefix_extra" "1,*") |
18470 | (set_attr "length_immediate" "1,*") | |
6615b722 | 18471 | (set_attr "prefix" "maybe_evex") |
63d5e521 | 18472 | (set_attr "mode" "<sseinsnmode>")]) |
ec113e67 | 18473 | |
af00a412 | 18474 | (define_insn "vcvtph2ps<mask_name>" |
18475 | [(set (match_operand:V4SF 0 "register_operand" "=v") | |
ec113e67 | 18476 | (vec_select:V4SF |
af00a412 | 18477 | (unspec:V8SF [(match_operand:V8HI 1 "register_operand" "v")] |
ec113e67 | 18478 | UNSPEC_VCVTPH2PS) |
18479 | (parallel [(const_int 0) (const_int 1) | |
30f58bc8 | 18480 | (const_int 2) (const_int 3)])))] |
af00a412 | 18481 | "TARGET_F16C || TARGET_AVX512VL" |
18482 | "vcvtph2ps\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}" | |
ec113e67 | 18483 | [(set_attr "type" "ssecvt") |
af00a412 | 18484 | (set_attr "prefix" "maybe_evex") |
ec113e67 | 18485 | (set_attr "mode" "V4SF")]) |
18486 | ||
af00a412 | 18487 | (define_insn "*vcvtph2ps_load<mask_name>" |
18488 | [(set (match_operand:V4SF 0 "register_operand" "=v") | |
ec113e67 | 18489 | (unspec:V4SF [(match_operand:V4HI 1 "memory_operand" "m")] |
18490 | UNSPEC_VCVTPH2PS))] | |
af00a412 | 18491 | "TARGET_F16C || TARGET_AVX512VL" |
18492 | "vcvtph2ps\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}" | |
ec113e67 | 18493 | [(set_attr "type" "ssecvt") |
18494 | (set_attr "prefix" "vex") | |
18495 | (set_attr "mode" "V8SF")]) | |
18496 | ||
af00a412 | 18497 | (define_insn "vcvtph2ps256<mask_name>" |
18498 | [(set (match_operand:V8SF 0 "register_operand" "=v") | |
18499 | (unspec:V8SF [(match_operand:V8HI 1 "nonimmediate_operand" "vm")] | |
ec113e67 | 18500 | UNSPEC_VCVTPH2PS))] |
af00a412 | 18501 | "TARGET_F16C || TARGET_AVX512VL" |
18502 | "vcvtph2ps\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}" | |
ec113e67 | 18503 | [(set_attr "type" "ssecvt") |
18504 | (set_attr "prefix" "vex") | |
6470d004 | 18505 | (set_attr "btver2_decode" "double") |
ec113e67 | 18506 | (set_attr "mode" "V8SF")]) |
18507 | ||
dbfe84d5 | 18508 | (define_insn "<mask_codefor>avx512f_vcvtph2ps512<mask_name><round_saeonly_name>" |
697a43f8 | 18509 | [(set (match_operand:V16SF 0 "register_operand" "=v") |
5220cab6 | 18510 | (unspec:V16SF |
dbfe84d5 | 18511 | [(match_operand:V16HI 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")] |
5220cab6 | 18512 | UNSPEC_VCVTPH2PS))] |
697a43f8 | 18513 | "TARGET_AVX512F" |
dbfe84d5 | 18514 | "vcvtph2ps\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}" |
697a43f8 | 18515 | [(set_attr "type" "ssecvt") |
18516 | (set_attr "prefix" "evex") | |
18517 | (set_attr "mode" "V16SF")]) | |
18518 | ||
af00a412 | 18519 | (define_expand "vcvtps2ph_mask" |
18520 | [(set (match_operand:V8HI 0 "register_operand") | |
18521 | (vec_merge:V8HI | |
18522 | (vec_concat:V8HI | |
18523 | (unspec:V4HI [(match_operand:V4SF 1 "register_operand") | |
18524 | (match_operand:SI 2 "const_0_to_255_operand")] | |
18525 | UNSPEC_VCVTPS2PH) | |
18526 | (match_dup 5)) | |
18527 | (match_operand:V8HI 3 "vector_move_operand") | |
18528 | (match_operand:QI 4 "register_operand")))] | |
18529 | "TARGET_AVX512VL" | |
18530 | "operands[5] = CONST0_RTX (V4HImode);") | |
18531 | ||
ec113e67 | 18532 | (define_expand "vcvtps2ph" |
abd4f58b | 18533 | [(set (match_operand:V8HI 0 "register_operand") |
ec113e67 | 18534 | (vec_concat:V8HI |
abd4f58b | 18535 | (unspec:V4HI [(match_operand:V4SF 1 "register_operand") |
18536 | (match_operand:SI 2 "const_0_to_255_operand")] | |
ec113e67 | 18537 | UNSPEC_VCVTPS2PH) |
18538 | (match_dup 3)))] | |
18539 | "TARGET_F16C" | |
18540 | "operands[3] = CONST0_RTX (V4HImode);") | |
18541 | ||
af00a412 | 18542 | (define_insn "*vcvtps2ph<mask_name>" |
18543 | [(set (match_operand:V8HI 0 "register_operand" "=v") | |
ec113e67 | 18544 | (vec_concat:V8HI |
af00a412 | 18545 | (unspec:V4HI [(match_operand:V4SF 1 "register_operand" "v") |
e16e10c8 | 18546 | (match_operand:SI 2 "const_0_to_255_operand" "N")] |
ec113e67 | 18547 | UNSPEC_VCVTPS2PH) |
abd4f58b | 18548 | (match_operand:V4HI 3 "const0_operand")))] |
fd1fee28 | 18549 | "(TARGET_F16C || TARGET_AVX512VL) && <mask_avx512vl_condition>" |
af00a412 | 18550 | "vcvtps2ph\t{%2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2}" |
ec113e67 | 18551 | [(set_attr "type" "ssecvt") |
af00a412 | 18552 | (set_attr "prefix" "maybe_evex") |
ec113e67 | 18553 | (set_attr "mode" "V4SF")]) |
18554 | ||
af00a412 | 18555 | (define_insn "*vcvtps2ph_store<mask_name>" |
ec113e67 | 18556 | [(set (match_operand:V4HI 0 "memory_operand" "=m") |
18557 | (unspec:V4HI [(match_operand:V4SF 1 "register_operand" "x") | |
e16e10c8 | 18558 | (match_operand:SI 2 "const_0_to_255_operand" "N")] |
ec113e67 | 18559 | UNSPEC_VCVTPS2PH))] |
af00a412 | 18560 | "TARGET_F16C || TARGET_AVX512VL" |
18561 | "vcvtps2ph\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}" | |
ec113e67 | 18562 | [(set_attr "type" "ssecvt") |
af00a412 | 18563 | (set_attr "prefix" "maybe_evex") |
ec113e67 | 18564 | (set_attr "mode" "V4SF")]) |
18565 | ||
af00a412 | 18566 | (define_insn "vcvtps2ph256<mask_name>" |
ec113e67 | 18567 | [(set (match_operand:V8HI 0 "nonimmediate_operand" "=xm") |
18568 | (unspec:V8HI [(match_operand:V8SF 1 "register_operand" "x") | |
e16e10c8 | 18569 | (match_operand:SI 2 "const_0_to_255_operand" "N")] |
ec113e67 | 18570 | UNSPEC_VCVTPS2PH))] |
af00a412 | 18571 | "TARGET_F16C || TARGET_AVX512VL" |
18572 | "vcvtps2ph\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}" | |
ec113e67 | 18573 | [(set_attr "type" "ssecvt") |
af00a412 | 18574 | (set_attr "prefix" "maybe_evex") |
6470d004 | 18575 | (set_attr "btver2_decode" "vector") |
ec113e67 | 18576 | (set_attr "mode" "V8SF")]) |
5deb404d | 18577 | |
5220cab6 | 18578 | (define_insn "<mask_codefor>avx512f_vcvtps2ph512<mask_name>" |
697a43f8 | 18579 | [(set (match_operand:V16HI 0 "nonimmediate_operand" "=vm") |
5220cab6 | 18580 | (unspec:V16HI |
18581 | [(match_operand:V16SF 1 "register_operand" "v") | |
18582 | (match_operand:SI 2 "const_0_to_255_operand" "N")] | |
18583 | UNSPEC_VCVTPS2PH))] | |
697a43f8 | 18584 | "TARGET_AVX512F" |
5220cab6 | 18585 | "vcvtps2ph\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}" |
697a43f8 | 18586 | [(set_attr "type" "ssecvt") |
18587 | (set_attr "prefix" "evex") | |
18588 | (set_attr "mode" "V16SF")]) | |
18589 | ||
5deb404d | 18590 | ;; For gather* insn patterns |
18591 | (define_mode_iterator VEC_GATHER_MODE | |
18592 | [V2DI V2DF V4DI V4DF V4SI V4SF V8SI V8SF]) | |
16dfb112 | 18593 | (define_mode_attr VEC_GATHER_IDXSI |
8e6b975f | 18594 | [(V2DI "V4SI") (V4DI "V4SI") (V8DI "V8SI") |
18595 | (V2DF "V4SI") (V4DF "V4SI") (V8DF "V8SI") | |
18596 | (V4SI "V4SI") (V8SI "V8SI") (V16SI "V16SI") | |
18597 | (V4SF "V4SI") (V8SF "V8SI") (V16SF "V16SI")]) | |
18598 | ||
16dfb112 | 18599 | (define_mode_attr VEC_GATHER_IDXDI |
8e6b975f | 18600 | [(V2DI "V2DI") (V4DI "V4DI") (V8DI "V8DI") |
18601 | (V2DF "V2DI") (V4DF "V4DI") (V8DF "V8DI") | |
18602 | (V4SI "V2DI") (V8SI "V4DI") (V16SI "V8DI") | |
18603 | (V4SF "V2DI") (V8SF "V4DI") (V16SF "V8DI")]) | |
18604 | ||
16dfb112 | 18605 | (define_mode_attr VEC_GATHER_SRCDI |
8e6b975f | 18606 | [(V2DI "V2DI") (V4DI "V4DI") (V8DI "V8DI") |
18607 | (V2DF "V2DF") (V4DF "V4DF") (V8DF "V8DF") | |
18608 | (V4SI "V4SI") (V8SI "V4SI") (V16SI "V8SI") | |
18609 | (V4SF "V4SF") (V8SF "V4SF") (V16SF "V8SF")]) | |
5deb404d | 18610 | |
18611 | (define_expand "avx2_gathersi<mode>" | |
abd4f58b | 18612 | [(parallel [(set (match_operand:VEC_GATHER_MODE 0 "register_operand") |
a24c6b3c | 18613 | (unspec:VEC_GATHER_MODE |
abd4f58b | 18614 | [(match_operand:VEC_GATHER_MODE 1 "register_operand") |
de084923 | 18615 | (mem:<ssescalarmode> |
18616 | (match_par_dup 7 | |
abd4f58b | 18617 | [(match_operand 2 "vsib_address_operand") |
16dfb112 | 18618 | (match_operand:<VEC_GATHER_IDXSI> |
abd4f58b | 18619 | 3 "register_operand") |
18620 | (match_operand:SI 5 "const1248_operand ")])) | |
a24c6b3c | 18621 | (mem:BLK (scratch)) |
abd4f58b | 18622 | (match_operand:VEC_GATHER_MODE 4 "register_operand")] |
a24c6b3c | 18623 | UNSPEC_GATHER)) |
abd4f58b | 18624 | (clobber (match_scratch:VEC_GATHER_MODE 6))])] |
de084923 | 18625 | "TARGET_AVX2" |
18626 | { | |
18627 | operands[7] | |
18628 | = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[3], | |
18629 | operands[5]), UNSPEC_VSIBADDR); | |
18630 | }) | |
5deb404d | 18631 | |
18632 | (define_insn "*avx2_gathersi<mode>" | |
a24c6b3c | 18633 | [(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "=&x") |
5deb404d | 18634 | (unspec:VEC_GATHER_MODE |
a24c6b3c | 18635 | [(match_operand:VEC_GATHER_MODE 2 "register_operand" "0") |
de084923 | 18636 | (match_operator:<ssescalarmode> 7 "vsib_mem_operator" |
18637 | [(unspec:P | |
1e662e65 | 18638 | [(match_operand:P 3 "vsib_address_operand" "Tv") |
16dfb112 | 18639 | (match_operand:<VEC_GATHER_IDXSI> 4 "register_operand" "x") |
de084923 | 18640 | (match_operand:SI 6 "const1248_operand" "n")] |
18641 | UNSPEC_VSIBADDR)]) | |
a24c6b3c | 18642 | (mem:BLK (scratch)) |
de084923 | 18643 | (match_operand:VEC_GATHER_MODE 5 "register_operand" "1")] |
a24c6b3c | 18644 | UNSPEC_GATHER)) |
18645 | (clobber (match_scratch:VEC_GATHER_MODE 1 "=&x"))] | |
5deb404d | 18646 | "TARGET_AVX2" |
de084923 | 18647 | "v<sseintprefix>gatherd<ssemodesuffix>\t{%1, %7, %0|%0, %7, %1}" |
5deb404d | 18648 | [(set_attr "type" "ssemov") |
18649 | (set_attr "prefix" "vex") | |
18650 | (set_attr "mode" "<sseinsnmode>")]) | |
18651 | ||
6296bd96 | 18652 | (define_insn "*avx2_gathersi<mode>_2" |
18653 | [(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "=&x") | |
18654 | (unspec:VEC_GATHER_MODE | |
18655 | [(pc) | |
18656 | (match_operator:<ssescalarmode> 6 "vsib_mem_operator" | |
18657 | [(unspec:P | |
1e662e65 | 18658 | [(match_operand:P 2 "vsib_address_operand" "Tv") |
6296bd96 | 18659 | (match_operand:<VEC_GATHER_IDXSI> 3 "register_operand" "x") |
18660 | (match_operand:SI 5 "const1248_operand" "n")] | |
18661 | UNSPEC_VSIBADDR)]) | |
18662 | (mem:BLK (scratch)) | |
18663 | (match_operand:VEC_GATHER_MODE 4 "register_operand" "1")] | |
18664 | UNSPEC_GATHER)) | |
18665 | (clobber (match_scratch:VEC_GATHER_MODE 1 "=&x"))] | |
18666 | "TARGET_AVX2" | |
18667 | "v<sseintprefix>gatherd<ssemodesuffix>\t{%1, %6, %0|%0, %6, %1}" | |
18668 | [(set_attr "type" "ssemov") | |
18669 | (set_attr "prefix" "vex") | |
18670 | (set_attr "mode" "<sseinsnmode>")]) | |
18671 | ||
5deb404d | 18672 | (define_expand "avx2_gatherdi<mode>" |
abd4f58b | 18673 | [(parallel [(set (match_operand:VEC_GATHER_MODE 0 "register_operand") |
a24c6b3c | 18674 | (unspec:VEC_GATHER_MODE |
abd4f58b | 18675 | [(match_operand:<VEC_GATHER_SRCDI> 1 "register_operand") |
de084923 | 18676 | (mem:<ssescalarmode> |
18677 | (match_par_dup 7 | |
abd4f58b | 18678 | [(match_operand 2 "vsib_address_operand") |
16dfb112 | 18679 | (match_operand:<VEC_GATHER_IDXDI> |
abd4f58b | 18680 | 3 "register_operand") |
18681 | (match_operand:SI 5 "const1248_operand ")])) | |
a24c6b3c | 18682 | (mem:BLK (scratch)) |
7d9c40e2 | 18683 | (match_operand:<VEC_GATHER_SRCDI> 4 "register_operand")] |
a24c6b3c | 18684 | UNSPEC_GATHER)) |
abd4f58b | 18685 | (clobber (match_scratch:VEC_GATHER_MODE 6))])] |
de084923 | 18686 | "TARGET_AVX2" |
18687 | { | |
18688 | operands[7] | |
18689 | = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[3], | |
18690 | operands[5]), UNSPEC_VSIBADDR); | |
18691 | }) | |
5deb404d | 18692 | |
18693 | (define_insn "*avx2_gatherdi<mode>" | |
16dfb112 | 18694 | [(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "=&x") |
18695 | (unspec:VEC_GATHER_MODE | |
18696 | [(match_operand:<VEC_GATHER_SRCDI> 2 "register_operand" "0") | |
de084923 | 18697 | (match_operator:<ssescalarmode> 7 "vsib_mem_operator" |
18698 | [(unspec:P | |
1e662e65 | 18699 | [(match_operand:P 3 "vsib_address_operand" "Tv") |
16dfb112 | 18700 | (match_operand:<VEC_GATHER_IDXDI> 4 "register_operand" "x") |
de084923 | 18701 | (match_operand:SI 6 "const1248_operand" "n")] |
18702 | UNSPEC_VSIBADDR)]) | |
a24c6b3c | 18703 | (mem:BLK (scratch)) |
16dfb112 | 18704 | (match_operand:<VEC_GATHER_SRCDI> 5 "register_operand" "1")] |
a24c6b3c | 18705 | UNSPEC_GATHER)) |
16dfb112 | 18706 | (clobber (match_scratch:VEC_GATHER_MODE 1 "=&x"))] |
5deb404d | 18707 | "TARGET_AVX2" |
16dfb112 | 18708 | "v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %7, %2|%2, %7, %5}" |
5deb404d | 18709 | [(set_attr "type" "ssemov") |
18710 | (set_attr "prefix" "vex") | |
18711 | (set_attr "mode" "<sseinsnmode>")]) | |
6296bd96 | 18712 | |
18713 | (define_insn "*avx2_gatherdi<mode>_2" | |
18714 | [(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "=&x") | |
18715 | (unspec:VEC_GATHER_MODE | |
18716 | [(pc) | |
18717 | (match_operator:<ssescalarmode> 6 "vsib_mem_operator" | |
18718 | [(unspec:P | |
1e662e65 | 18719 | [(match_operand:P 2 "vsib_address_operand" "Tv") |
6296bd96 | 18720 | (match_operand:<VEC_GATHER_IDXDI> 3 "register_operand" "x") |
18721 | (match_operand:SI 5 "const1248_operand" "n")] | |
18722 | UNSPEC_VSIBADDR)]) | |
18723 | (mem:BLK (scratch)) | |
18724 | (match_operand:<VEC_GATHER_SRCDI> 4 "register_operand" "1")] | |
18725 | UNSPEC_GATHER)) | |
18726 | (clobber (match_scratch:VEC_GATHER_MODE 1 "=&x"))] | |
18727 | "TARGET_AVX2" | |
18728 | { | |
18729 | if (<MODE>mode != <VEC_GATHER_SRCDI>mode) | |
18730 | return "v<sseintprefix>gatherq<ssemodesuffix>\t{%4, %6, %x0|%x0, %6, %4}"; | |
18731 | return "v<sseintprefix>gatherq<ssemodesuffix>\t{%4, %6, %0|%0, %6, %4}"; | |
18732 | } | |
18733 | [(set_attr "type" "ssemov") | |
18734 | (set_attr "prefix" "vex") | |
18735 | (set_attr "mode" "<sseinsnmode>")]) | |
2cded00a | 18736 | |
18737 | (define_insn "*avx2_gatherdi<mode>_3" | |
18738 | [(set (match_operand:<VEC_GATHER_SRCDI> 0 "register_operand" "=&x") | |
18739 | (vec_select:<VEC_GATHER_SRCDI> | |
18740 | (unspec:VI4F_256 | |
18741 | [(match_operand:<VEC_GATHER_SRCDI> 2 "register_operand" "0") | |
18742 | (match_operator:<ssescalarmode> 7 "vsib_mem_operator" | |
18743 | [(unspec:P | |
1e662e65 | 18744 | [(match_operand:P 3 "vsib_address_operand" "Tv") |
2cded00a | 18745 | (match_operand:<VEC_GATHER_IDXDI> 4 "register_operand" "x") |
18746 | (match_operand:SI 6 "const1248_operand" "n")] | |
18747 | UNSPEC_VSIBADDR)]) | |
18748 | (mem:BLK (scratch)) | |
18749 | (match_operand:<VEC_GATHER_SRCDI> 5 "register_operand" "1")] | |
18750 | UNSPEC_GATHER) | |
18751 | (parallel [(const_int 0) (const_int 1) | |
18752 | (const_int 2) (const_int 3)]))) | |
18753 | (clobber (match_scratch:VI4F_256 1 "=&x"))] | |
18754 | "TARGET_AVX2" | |
18755 | "v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %7, %0|%0, %7, %5}" | |
18756 | [(set_attr "type" "ssemov") | |
18757 | (set_attr "prefix" "vex") | |
18758 | (set_attr "mode" "<sseinsnmode>")]) | |
18759 | ||
18760 | (define_insn "*avx2_gatherdi<mode>_4" | |
18761 | [(set (match_operand:<VEC_GATHER_SRCDI> 0 "register_operand" "=&x") | |
18762 | (vec_select:<VEC_GATHER_SRCDI> | |
18763 | (unspec:VI4F_256 | |
18764 | [(pc) | |
18765 | (match_operator:<ssescalarmode> 6 "vsib_mem_operator" | |
18766 | [(unspec:P | |
1e662e65 | 18767 | [(match_operand:P 2 "vsib_address_operand" "Tv") |
2cded00a | 18768 | (match_operand:<VEC_GATHER_IDXDI> 3 "register_operand" "x") |
18769 | (match_operand:SI 5 "const1248_operand" "n")] | |
18770 | UNSPEC_VSIBADDR)]) | |
18771 | (mem:BLK (scratch)) | |
18772 | (match_operand:<VEC_GATHER_SRCDI> 4 "register_operand" "1")] | |
18773 | UNSPEC_GATHER) | |
18774 | (parallel [(const_int 0) (const_int 1) | |
18775 | (const_int 2) (const_int 3)]))) | |
18776 | (clobber (match_scratch:VI4F_256 1 "=&x"))] | |
18777 | "TARGET_AVX2" | |
18778 | "v<sseintprefix>gatherq<ssemodesuffix>\t{%4, %6, %0|%0, %6, %4}" | |
18779 | [(set_attr "type" "ssemov") | |
18780 | (set_attr "prefix" "vex") | |
18781 | (set_attr "mode" "<sseinsnmode>")]) | |
8e6b975f | 18782 | |
06ea6335 | 18783 | (define_expand "<avx512>_gathersi<mode>" |
18784 | [(parallel [(set (match_operand:VI48F 0 "register_operand") | |
18785 | (unspec:VI48F | |
18786 | [(match_operand:VI48F 1 "register_operand") | |
8e6b975f | 18787 | (match_operand:<avx512fmaskmode> 4 "register_operand") |
18788 | (mem:<ssescalarmode> | |
18789 | (match_par_dup 6 | |
18790 | [(match_operand 2 "vsib_address_operand") | |
18791 | (match_operand:<VEC_GATHER_IDXSI> 3 "register_operand") | |
18792 | (match_operand:SI 5 "const1248_operand")]))] | |
18793 | UNSPEC_GATHER)) | |
18794 | (clobber (match_scratch:<avx512fmaskmode> 7))])] | |
18795 | "TARGET_AVX512F" | |
18796 | { | |
18797 | operands[6] | |
18798 | = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[3], | |
18799 | operands[5]), UNSPEC_VSIBADDR); | |
18800 | }) | |
18801 | ||
18802 | (define_insn "*avx512f_gathersi<mode>" | |
06ea6335 | 18803 | [(set (match_operand:VI48F 0 "register_operand" "=&v") |
18804 | (unspec:VI48F | |
18805 | [(match_operand:VI48F 1 "register_operand" "0") | |
8e6b975f | 18806 | (match_operand:<avx512fmaskmode> 7 "register_operand" "2") |
18807 | (match_operator:<ssescalarmode> 6 "vsib_mem_operator" | |
18808 | [(unspec:P | |
1e662e65 | 18809 | [(match_operand:P 4 "vsib_address_operand" "Tv") |
8e6b975f | 18810 | (match_operand:<VEC_GATHER_IDXSI> 3 "register_operand" "v") |
18811 | (match_operand:SI 5 "const1248_operand" "n")] | |
18812 | UNSPEC_VSIBADDR)])] | |
18813 | UNSPEC_GATHER)) | |
a31e7f46 | 18814 | (clobber (match_scratch:<avx512fmaskmode> 2 "=&Yk"))] |
8e6b975f | 18815 | "TARGET_AVX512F" |
18816 | "v<sseintprefix>gatherd<ssemodesuffix>\t{%6, %0%{%2%}|%0%{%2%}, %g6}" | |
18817 | [(set_attr "type" "ssemov") | |
18818 | (set_attr "prefix" "evex") | |
18819 | (set_attr "mode" "<sseinsnmode>")]) | |
18820 | ||
18821 | (define_insn "*avx512f_gathersi<mode>_2" | |
06ea6335 | 18822 | [(set (match_operand:VI48F 0 "register_operand" "=&v") |
18823 | (unspec:VI48F | |
8e6b975f | 18824 | [(pc) |
18825 | (match_operand:<avx512fmaskmode> 6 "register_operand" "1") | |
18826 | (match_operator:<ssescalarmode> 5 "vsib_mem_operator" | |
18827 | [(unspec:P | |
1e662e65 | 18828 | [(match_operand:P 3 "vsib_address_operand" "Tv") |
8e6b975f | 18829 | (match_operand:<VEC_GATHER_IDXSI> 2 "register_operand" "v") |
18830 | (match_operand:SI 4 "const1248_operand" "n")] | |
18831 | UNSPEC_VSIBADDR)])] | |
18832 | UNSPEC_GATHER)) | |
a31e7f46 | 18833 | (clobber (match_scratch:<avx512fmaskmode> 1 "=&Yk"))] |
8e6b975f | 18834 | "TARGET_AVX512F" |
18835 | "v<sseintprefix>gatherd<ssemodesuffix>\t{%5, %0%{%1%}|%0%{%1%}, %g5}" | |
18836 | [(set_attr "type" "ssemov") | |
18837 | (set_attr "prefix" "evex") | |
18838 | (set_attr "mode" "<sseinsnmode>")]) | |
18839 | ||
18840 | ||
06ea6335 | 18841 | (define_expand "<avx512>_gatherdi<mode>" |
18842 | [(parallel [(set (match_operand:VI48F 0 "register_operand") | |
18843 | (unspec:VI48F | |
8e6b975f | 18844 | [(match_operand:<VEC_GATHER_SRCDI> 1 "register_operand") |
18845 | (match_operand:QI 4 "register_operand") | |
18846 | (mem:<ssescalarmode> | |
18847 | (match_par_dup 6 | |
18848 | [(match_operand 2 "vsib_address_operand") | |
18849 | (match_operand:<VEC_GATHER_IDXDI> 3 "register_operand") | |
18850 | (match_operand:SI 5 "const1248_operand")]))] | |
18851 | UNSPEC_GATHER)) | |
18852 | (clobber (match_scratch:QI 7))])] | |
18853 | "TARGET_AVX512F" | |
18854 | { | |
18855 | operands[6] | |
18856 | = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[3], | |
18857 | operands[5]), UNSPEC_VSIBADDR); | |
18858 | }) | |
18859 | ||
18860 | (define_insn "*avx512f_gatherdi<mode>" | |
06ea6335 | 18861 | [(set (match_operand:VI48F 0 "register_operand" "=&v") |
18862 | (unspec:VI48F | |
8e6b975f | 18863 | [(match_operand:<VEC_GATHER_SRCDI> 1 "register_operand" "0") |
18864 | (match_operand:QI 7 "register_operand" "2") | |
18865 | (match_operator:<ssescalarmode> 6 "vsib_mem_operator" | |
18866 | [(unspec:P | |
1e662e65 | 18867 | [(match_operand:P 4 "vsib_address_operand" "Tv") |
8e6b975f | 18868 | (match_operand:<VEC_GATHER_IDXDI> 3 "register_operand" "v") |
18869 | (match_operand:SI 5 "const1248_operand" "n")] | |
18870 | UNSPEC_VSIBADDR)])] | |
18871 | UNSPEC_GATHER)) | |
a31e7f46 | 18872 | (clobber (match_scratch:QI 2 "=&Yk"))] |
8e6b975f | 18873 | "TARGET_AVX512F" |
18874 | "v<sseintprefix>gatherq<ssemodesuffix>\t{%6, %1%{%2%}|%1%{%2%}, %g6}" | |
18875 | [(set_attr "type" "ssemov") | |
18876 | (set_attr "prefix" "evex") | |
18877 | (set_attr "mode" "<sseinsnmode>")]) | |
18878 | ||
18879 | (define_insn "*avx512f_gatherdi<mode>_2" | |
06ea6335 | 18880 | [(set (match_operand:VI48F 0 "register_operand" "=&v") |
18881 | (unspec:VI48F | |
8e6b975f | 18882 | [(pc) |
18883 | (match_operand:QI 6 "register_operand" "1") | |
18884 | (match_operator:<ssescalarmode> 5 "vsib_mem_operator" | |
18885 | [(unspec:P | |
1e662e65 | 18886 | [(match_operand:P 3 "vsib_address_operand" "Tv") |
8e6b975f | 18887 | (match_operand:<VEC_GATHER_IDXDI> 2 "register_operand" "v") |
18888 | (match_operand:SI 4 "const1248_operand" "n")] | |
18889 | UNSPEC_VSIBADDR)])] | |
18890 | UNSPEC_GATHER)) | |
a31e7f46 | 18891 | (clobber (match_scratch:QI 1 "=&Yk"))] |
8e6b975f | 18892 | "TARGET_AVX512F" |
18893 | { | |
18894 | if (<MODE>mode != <VEC_GATHER_SRCDI>mode) | |
06ea6335 | 18895 | { |
996b47b0 | 18896 | if (<MODE_SIZE> != 64) |
06ea6335 | 18897 | return "v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %x0%{%1%}|%t0%{%1%}, %g5}"; |
18898 | else | |
18899 | return "v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %t0%{%1%}|%t0%{%1%}, %g5}"; | |
18900 | } | |
8e6b975f | 18901 | return "v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %0%{%1%}|%0%{%1%}, %g5}"; |
18902 | } | |
18903 | [(set_attr "type" "ssemov") | |
18904 | (set_attr "prefix" "evex") | |
18905 | (set_attr "mode" "<sseinsnmode>")]) | |
18906 | ||
06ea6335 | 18907 | (define_expand "<avx512>_scattersi<mode>" |
18908 | [(parallel [(set (mem:VI48F | |
8e6b975f | 18909 | (match_par_dup 5 |
18910 | [(match_operand 0 "vsib_address_operand") | |
18911 | (match_operand:<VEC_GATHER_IDXSI> 2 "register_operand") | |
18912 | (match_operand:SI 4 "const1248_operand")])) | |
06ea6335 | 18913 | (unspec:VI48F |
8e6b975f | 18914 | [(match_operand:<avx512fmaskmode> 1 "register_operand") |
06ea6335 | 18915 | (match_operand:VI48F 3 "register_operand")] |
8e6b975f | 18916 | UNSPEC_SCATTER)) |
18917 | (clobber (match_scratch:<avx512fmaskmode> 6))])] | |
18918 | "TARGET_AVX512F" | |
18919 | { | |
18920 | operands[5] | |
18921 | = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[0], operands[2], | |
18922 | operands[4]), UNSPEC_VSIBADDR); | |
18923 | }) | |
18924 | ||
18925 | (define_insn "*avx512f_scattersi<mode>" | |
06ea6335 | 18926 | [(set (match_operator:VI48F 5 "vsib_mem_operator" |
8e6b975f | 18927 | [(unspec:P |
1e662e65 | 18928 | [(match_operand:P 0 "vsib_address_operand" "Tv") |
8e6b975f | 18929 | (match_operand:<VEC_GATHER_IDXSI> 2 "register_operand" "v") |
18930 | (match_operand:SI 4 "const1248_operand" "n")] | |
18931 | UNSPEC_VSIBADDR)]) | |
06ea6335 | 18932 | (unspec:VI48F |
8e6b975f | 18933 | [(match_operand:<avx512fmaskmode> 6 "register_operand" "1") |
06ea6335 | 18934 | (match_operand:VI48F 3 "register_operand" "v")] |
8e6b975f | 18935 | UNSPEC_SCATTER)) |
a31e7f46 | 18936 | (clobber (match_scratch:<avx512fmaskmode> 1 "=&Yk"))] |
8e6b975f | 18937 | "TARGET_AVX512F" |
18938 | "v<sseintprefix>scatterd<ssemodesuffix>\t{%3, %5%{%1%}|%5%{%1%}, %3}" | |
18939 | [(set_attr "type" "ssemov") | |
18940 | (set_attr "prefix" "evex") | |
18941 | (set_attr "mode" "<sseinsnmode>")]) | |
18942 | ||
06ea6335 | 18943 | (define_expand "<avx512>_scatterdi<mode>" |
18944 | [(parallel [(set (mem:VI48F | |
8e6b975f | 18945 | (match_par_dup 5 |
18946 | [(match_operand 0 "vsib_address_operand") | |
06ea6335 | 18947 | (match_operand:<VEC_GATHER_IDXDI> 2 "register_operand") |
8e6b975f | 18948 | (match_operand:SI 4 "const1248_operand")])) |
06ea6335 | 18949 | (unspec:VI48F |
8e6b975f | 18950 | [(match_operand:QI 1 "register_operand") |
18951 | (match_operand:<VEC_GATHER_SRCDI> 3 "register_operand")] | |
18952 | UNSPEC_SCATTER)) | |
18953 | (clobber (match_scratch:QI 6))])] | |
18954 | "TARGET_AVX512F" | |
18955 | { | |
18956 | operands[5] | |
18957 | = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[0], operands[2], | |
18958 | operands[4]), UNSPEC_VSIBADDR); | |
18959 | }) | |
18960 | ||
18961 | (define_insn "*avx512f_scatterdi<mode>" | |
06ea6335 | 18962 | [(set (match_operator:VI48F 5 "vsib_mem_operator" |
8e6b975f | 18963 | [(unspec:P |
1e662e65 | 18964 | [(match_operand:P 0 "vsib_address_operand" "Tv") |
06ea6335 | 18965 | (match_operand:<VEC_GATHER_IDXDI> 2 "register_operand" "v") |
8e6b975f | 18966 | (match_operand:SI 4 "const1248_operand" "n")] |
18967 | UNSPEC_VSIBADDR)]) | |
06ea6335 | 18968 | (unspec:VI48F |
8e6b975f | 18969 | [(match_operand:QI 6 "register_operand" "1") |
18970 | (match_operand:<VEC_GATHER_SRCDI> 3 "register_operand" "v")] | |
18971 | UNSPEC_SCATTER)) | |
a31e7f46 | 18972 | (clobber (match_scratch:QI 1 "=&Yk"))] |
8e6b975f | 18973 | "TARGET_AVX512F" |
18974 | "v<sseintprefix>scatterq<ssemodesuffix>\t{%3, %5%{%1%}|%5%{%1%}, %3}" | |
18975 | [(set_attr "type" "ssemov") | |
18976 | (set_attr "prefix" "evex") | |
18977 | (set_attr "mode" "<sseinsnmode>")]) | |
d2ff59d6 | 18978 | |
6ce48b02 | 18979 | (define_insn "<avx512>_compress<mode>_mask" |
18980 | [(set (match_operand:VI48F 0 "register_operand" "=v") | |
18981 | (unspec:VI48F | |
18982 | [(match_operand:VI48F 1 "register_operand" "v") | |
18983 | (match_operand:VI48F 2 "vector_move_operand" "0C") | |
a31e7f46 | 18984 | (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk")] |
5220cab6 | 18985 | UNSPEC_COMPRESS))] |
18986 | "TARGET_AVX512F" | |
18987 | "v<sseintprefix>compress<ssemodesuffix>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}" | |
18988 | [(set_attr "type" "ssemov") | |
18989 | (set_attr "prefix" "evex") | |
18990 | (set_attr "mode" "<sseinsnmode>")]) | |
18991 | ||
6ce48b02 | 18992 | (define_insn "<avx512>_compressstore<mode>_mask" |
18993 | [(set (match_operand:VI48F 0 "memory_operand" "=m") | |
18994 | (unspec:VI48F | |
18995 | [(match_operand:VI48F 1 "register_operand" "x") | |
5220cab6 | 18996 | (match_dup 0) |
a31e7f46 | 18997 | (match_operand:<avx512fmaskmode> 2 "register_operand" "Yk")] |
5220cab6 | 18998 | UNSPEC_COMPRESS_STORE))] |
18999 | "TARGET_AVX512F" | |
19000 | "v<sseintprefix>compress<ssemodesuffix>\t{%1, %0%{%2%}|%0%{%2%}, %1}" | |
19001 | [(set_attr "type" "ssemov") | |
19002 | (set_attr "prefix" "evex") | |
19003 | (set_attr "memory" "store") | |
19004 | (set_attr "mode" "<sseinsnmode>")]) | |
19005 | ||
6ce48b02 | 19006 | (define_expand "<avx512>_expand<mode>_maskz" |
19007 | [(set (match_operand:VI48F 0 "register_operand") | |
19008 | (unspec:VI48F | |
19009 | [(match_operand:VI48F 1 "nonimmediate_operand") | |
19010 | (match_operand:VI48F 2 "vector_move_operand") | |
9a5ea1d5 | 19011 | (match_operand:<avx512fmaskmode> 3 "register_operand")] |
19012 | UNSPEC_EXPAND))] | |
19013 | "TARGET_AVX512F" | |
19014 | "operands[2] = CONST0_RTX (<MODE>mode);") | |
19015 | ||
6ce48b02 | 19016 | (define_insn "<avx512>_expand<mode>_mask" |
19017 | [(set (match_operand:VI48F 0 "register_operand" "=v,v") | |
19018 | (unspec:VI48F | |
19019 | [(match_operand:VI48F 1 "nonimmediate_operand" "v,m") | |
19020 | (match_operand:VI48F 2 "vector_move_operand" "0C,0C") | |
a31e7f46 | 19021 | (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk,Yk")] |
5220cab6 | 19022 | UNSPEC_EXPAND))] |
19023 | "TARGET_AVX512F" | |
19024 | "v<sseintprefix>expand<ssemodesuffix>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}" | |
19025 | [(set_attr "type" "ssemov") | |
19026 | (set_attr "prefix" "evex") | |
19027 | (set_attr "memory" "none,load") | |
19028 | (set_attr "mode" "<sseinsnmode>")]) | |
19029 | ||
6164575a | 19030 | (define_insn "avx512dq_rangep<mode><mask_name><round_saeonly_name>" |
19031 | [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v") | |
19032 | (unspec:VF_AVX512VL | |
19033 | [(match_operand:VF_AVX512VL 1 "register_operand" "v") | |
19034 | (match_operand:VF_AVX512VL 2 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>") | |
19035 | (match_operand:SI 3 "const_0_to_15_operand")] | |
19036 | UNSPEC_RANGE))] | |
19037 | "TARGET_AVX512DQ && <round_saeonly_mode512bit_condition>" | |
88048095 | 19038 | "vrange<ssemodesuffix>\t{%3, <round_saeonly_mask_op4>%2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2<round_saeonly_mask_op4>, %3}" |
6164575a | 19039 | [(set_attr "type" "sse") |
19040 | (set_attr "prefix" "evex") | |
19041 | (set_attr "mode" "<MODE>")]) | |
19042 | ||
19043 | (define_insn "avx512dq_ranges<mode><round_saeonly_name>" | |
19044 | [(set (match_operand:VF_128 0 "register_operand" "=v") | |
19045 | (vec_merge:VF_128 | |
19046 | (unspec:VF_128 | |
19047 | [(match_operand:VF_128 1 "register_operand" "v") | |
19048 | (match_operand:VF_128 2 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>") | |
19049 | (match_operand:SI 3 "const_0_to_15_operand")] | |
19050 | UNSPEC_RANGE) | |
19051 | (match_dup 1) | |
19052 | (const_int 1)))] | |
19053 | "TARGET_AVX512DQ" | |
88048095 | 19054 | "vrange<ssescalarmodesuffix>\t{%3, <round_saeonly_op4>%2, %1, %0|%0, %1, %2<round_saeonly_op4>, %3}" |
6164575a | 19055 | [(set_attr "type" "sse") |
19056 | (set_attr "prefix" "evex") | |
19057 | (set_attr "mode" "<MODE>")]) | |
19058 | ||
19059 | (define_insn "avx512dq_fpclass<mode><mask_scalar_merge_name>" | |
19060 | [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk") | |
19061 | (unspec:<avx512fmaskmode> | |
19062 | [(match_operand:VF_AVX512VL 1 "register_operand" "v") | |
19063 | (match_operand:QI 2 "const_0_to_255_operand" "n")] | |
19064 | UNSPEC_FPCLASS))] | |
19065 | "TARGET_AVX512DQ" | |
19066 | "vfpclass<ssemodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}"; | |
19067 | [(set_attr "type" "sse") | |
19068 | (set_attr "length_immediate" "1") | |
19069 | (set_attr "prefix" "evex") | |
19070 | (set_attr "mode" "<MODE>")]) | |
19071 | ||
19072 | (define_insn "avx512dq_vmfpclass<mode>" | |
19073 | [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk") | |
19074 | (and:<avx512fmaskmode> | |
19075 | (unspec:<avx512fmaskmode> | |
19076 | [(match_operand:VF_128 1 "register_operand" "v") | |
19077 | (match_operand:QI 2 "const_0_to_255_operand" "n")] | |
19078 | UNSPEC_FPCLASS) | |
19079 | (const_int 1)))] | |
19080 | "TARGET_AVX512DQ" | |
19081 | "vfpclass<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"; | |
19082 | [(set_attr "type" "sse") | |
19083 | (set_attr "length_immediate" "1") | |
19084 | (set_attr "prefix" "evex") | |
19085 | (set_attr "mode" "<MODE>")]) | |
19086 | ||
250533c0 | 19087 | (define_insn "<avx512>_getmant<mode><mask_name><round_saeonly_name>" |
19088 | [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v") | |
19089 | (unspec:VF_AVX512VL | |
19090 | [(match_operand:VF_AVX512VL 1 "nonimmediate_operand" "<round_saeonly_constraint>") | |
85065932 | 19091 | (match_operand:SI 2 "const_0_to_15_operand")] |
19092 | UNSPEC_GETMANT))] | |
19093 | "TARGET_AVX512F" | |
dbfe84d5 | 19094 | "vgetmant<ssemodesuffix>\t{%2, <round_saeonly_mask_op3>%1, %0<mask_operand3>|%0<mask_operand3>, %1<round_saeonly_mask_op3>, %2}"; |
85065932 | 19095 | [(set_attr "prefix" "evex") |
19096 | (set_attr "mode" "<MODE>")]) | |
19097 | ||
250533c0 | 19098 | (define_insn "avx512f_vgetmant<mode><round_saeonly_name>" |
85065932 | 19099 | [(set (match_operand:VF_128 0 "register_operand" "=v") |
19100 | (vec_merge:VF_128 | |
19101 | (unspec:VF_128 | |
19102 | [(match_operand:VF_128 1 "register_operand" "v") | |
fbf4df62 | 19103 | (match_operand:VF_128 2 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>") |
85065932 | 19104 | (match_operand:SI 3 "const_0_to_15_operand")] |
19105 | UNSPEC_GETMANT) | |
19106 | (match_dup 1) | |
19107 | (const_int 1)))] | |
19108 | "TARGET_AVX512F" | |
0b7cc9c6 | 19109 | "vgetmant<ssescalarmodesuffix>\t{%3, <round_saeonly_op4>%2, %1, %0|%0, %1, %2<round_saeonly_op4>, %3}"; |
85065932 | 19110 | [(set_attr "prefix" "evex") |
19111 | (set_attr "mode" "<ssescalarmode>")]) | |
19112 | ||
d58134c2 | 19113 | ;; The correct representation for this is absolutely enormous, and |
19114 | ;; surely not generally useful. | |
19115 | (define_insn "<mask_codefor>avx512bw_dbpsadbw<mode><mask_name>" | |
19116 | [(set (match_operand:VI2_AVX512VL 0 "register_operand" "=v") | |
19117 | (unspec:VI2_AVX512VL | |
19118 | [(match_operand:<dbpsadbwmode> 1 "register_operand" "v") | |
19119 | (match_operand:<dbpsadbwmode> 2 "nonimmediate_operand" "vm") | |
19120 | (match_operand:SI 3 "const_0_to_255_operand")] | |
19121 | UNSPEC_DBPSADBW))] | |
19122 | "TARGET_AVX512BW" | |
19123 | "vdbpsadbw\t{%3, %2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2, %3}" | |
19124 | [(set_attr "isa" "avx") | |
19125 | (set_attr "type" "sselog1") | |
19126 | (set_attr "length_immediate" "1") | |
19127 | (set_attr "prefix" "evex") | |
19128 | (set_attr "mode" "<sseinsnmode>")]) | |
19129 | ||
5220cab6 | 19130 | (define_insn "clz<mode>2<mask_name>" |
3d038641 | 19131 | [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v") |
19132 | (clz:VI48_AVX512VL | |
19133 | (match_operand:VI48_AVX512VL 1 "nonimmediate_operand" "vm")))] | |
d2ff59d6 | 19134 | "TARGET_AVX512CD" |
5220cab6 | 19135 | "vplzcnt<ssemodesuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}" |
d2ff59d6 | 19136 | [(set_attr "type" "sse") |
19137 | (set_attr "prefix" "evex") | |
19138 | (set_attr "mode" "<sseinsnmode>")]) | |
19139 | ||
5220cab6 | 19140 | (define_insn "<mask_codefor>conflict<mode><mask_name>" |
3d038641 | 19141 | [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v") |
19142 | (unspec:VI48_AVX512VL | |
19143 | [(match_operand:VI48_AVX512VL 1 "nonimmediate_operand" "vm")] | |
d2ff59d6 | 19144 | UNSPEC_CONFLICT))] |
19145 | "TARGET_AVX512CD" | |
5220cab6 | 19146 | "vpconflict<ssemodesuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}" |
d2ff59d6 | 19147 | [(set_attr "type" "sse") |
19148 | (set_attr "prefix" "evex") | |
19149 | (set_attr "mode" "<sseinsnmode>")]) | |
fc975a40 | 19150 | |
19151 | (define_insn "sha1msg1" | |
19152 | [(set (match_operand:V4SI 0 "register_operand" "=x") | |
19153 | (unspec:V4SI | |
19154 | [(match_operand:V4SI 1 "register_operand" "0") | |
19155 | (match_operand:V4SI 2 "nonimmediate_operand" "xm")] | |
19156 | UNSPEC_SHA1MSG1))] | |
19157 | "TARGET_SHA" | |
19158 | "sha1msg1\t{%2, %0|%0, %2}" | |
19159 | [(set_attr "type" "sselog1") | |
19160 | (set_attr "mode" "TI")]) | |
19161 | ||
19162 | (define_insn "sha1msg2" | |
19163 | [(set (match_operand:V4SI 0 "register_operand" "=x") | |
19164 | (unspec:V4SI | |
19165 | [(match_operand:V4SI 1 "register_operand" "0") | |
19166 | (match_operand:V4SI 2 "nonimmediate_operand" "xm")] | |
19167 | UNSPEC_SHA1MSG2))] | |
19168 | "TARGET_SHA" | |
19169 | "sha1msg2\t{%2, %0|%0, %2}" | |
19170 | [(set_attr "type" "sselog1") | |
19171 | (set_attr "mode" "TI")]) | |
19172 | ||
19173 | (define_insn "sha1nexte" | |
19174 | [(set (match_operand:V4SI 0 "register_operand" "=x") | |
19175 | (unspec:V4SI | |
19176 | [(match_operand:V4SI 1 "register_operand" "0") | |
19177 | (match_operand:V4SI 2 "nonimmediate_operand" "xm")] | |
19178 | UNSPEC_SHA1NEXTE))] | |
19179 | "TARGET_SHA" | |
19180 | "sha1nexte\t{%2, %0|%0, %2}" | |
19181 | [(set_attr "type" "sselog1") | |
19182 | (set_attr "mode" "TI")]) | |
19183 | ||
19184 | (define_insn "sha1rnds4" | |
19185 | [(set (match_operand:V4SI 0 "register_operand" "=x") | |
19186 | (unspec:V4SI | |
19187 | [(match_operand:V4SI 1 "register_operand" "0") | |
19188 | (match_operand:V4SI 2 "nonimmediate_operand" "xm") | |
19189 | (match_operand:SI 3 "const_0_to_3_operand" "n")] | |
19190 | UNSPEC_SHA1RNDS4))] | |
19191 | "TARGET_SHA" | |
19192 | "sha1rnds4\t{%3, %2, %0|%0, %2, %3}" | |
19193 | [(set_attr "type" "sselog1") | |
19194 | (set_attr "length_immediate" "1") | |
19195 | (set_attr "mode" "TI")]) | |
19196 | ||
19197 | (define_insn "sha256msg1" | |
19198 | [(set (match_operand:V4SI 0 "register_operand" "=x") | |
19199 | (unspec:V4SI | |
19200 | [(match_operand:V4SI 1 "register_operand" "0") | |
19201 | (match_operand:V4SI 2 "nonimmediate_operand" "xm")] | |
19202 | UNSPEC_SHA256MSG1))] | |
19203 | "TARGET_SHA" | |
19204 | "sha256msg1\t{%2, %0|%0, %2}" | |
19205 | [(set_attr "type" "sselog1") | |
19206 | (set_attr "mode" "TI")]) | |
19207 | ||
19208 | (define_insn "sha256msg2" | |
19209 | [(set (match_operand:V4SI 0 "register_operand" "=x") | |
19210 | (unspec:V4SI | |
19211 | [(match_operand:V4SI 1 "register_operand" "0") | |
19212 | (match_operand:V4SI 2 "nonimmediate_operand" "xm")] | |
19213 | UNSPEC_SHA256MSG2))] | |
19214 | "TARGET_SHA" | |
19215 | "sha256msg2\t{%2, %0|%0, %2}" | |
19216 | [(set_attr "type" "sselog1") | |
19217 | (set_attr "mode" "TI")]) | |
19218 | ||
19219 | (define_insn "sha256rnds2" | |
19220 | [(set (match_operand:V4SI 0 "register_operand" "=x") | |
19221 | (unspec:V4SI | |
19222 | [(match_operand:V4SI 1 "register_operand" "0") | |
19223 | (match_operand:V4SI 2 "nonimmediate_operand" "xm") | |
19224 | (match_operand:V4SI 3 "register_operand" "Yz")] | |
19225 | UNSPEC_SHA256RNDS2))] | |
19226 | "TARGET_SHA" | |
19227 | "sha256rnds2\t{%3, %2, %0|%0, %2, %3}" | |
19228 | [(set_attr "type" "sselog1") | |
19229 | (set_attr "length_immediate" "1") | |
19230 | (set_attr "mode" "TI")]) | |
889d21f6 | 19231 | |
19232 | (define_insn_and_split "avx512f_<castmode><avxsizesuffix>_<castmode>" | |
19233 | [(set (match_operand:AVX512MODE2P 0 "nonimmediate_operand" "=x,m") | |
19234 | (unspec:AVX512MODE2P | |
19235 | [(match_operand:<ssequartermode> 1 "nonimmediate_operand" "xm,x")] | |
19236 | UNSPEC_CAST))] | |
19237 | "TARGET_AVX512F" | |
19238 | "#" | |
19239 | "&& reload_completed" | |
19240 | [(const_int 0)] | |
19241 | { | |
19242 | rtx op0 = operands[0]; | |
19243 | rtx op1 = operands[1]; | |
19244 | if (REG_P (op0)) | |
19245 | op0 = gen_rtx_REG (<ssequartermode>mode, REGNO (op0)); | |
19246 | else | |
19247 | op1 = gen_rtx_REG (<MODE>mode, REGNO (op1)); | |
19248 | emit_move_insn (op0, op1); | |
19249 | DONE; | |
19250 | }) | |
19251 | ||
19252 | (define_insn_and_split "avx512f_<castmode><avxsizesuffix>_256<castmode>" | |
19253 | [(set (match_operand:AVX512MODE2P 0 "nonimmediate_operand" "=x,m") | |
19254 | (unspec:AVX512MODE2P | |
19255 | [(match_operand:<ssehalfvecmode> 1 "nonimmediate_operand" "xm,x")] | |
19256 | UNSPEC_CAST))] | |
19257 | "TARGET_AVX512F" | |
19258 | "#" | |
19259 | "&& reload_completed" | |
19260 | [(const_int 0)] | |
19261 | { | |
19262 | rtx op0 = operands[0]; | |
19263 | rtx op1 = operands[1]; | |
19264 | if (REG_P (op0)) | |
19265 | op0 = gen_rtx_REG (<ssehalfvecmode>mode, REGNO (op0)); | |
19266 | else | |
19267 | op1 = gen_rtx_REG (<MODE>mode, REGNO (op1)); | |
19268 | emit_move_insn (op0, op1); | |
19269 | DONE; | |
19270 | }) | |
8a12b665 | 19271 | |
19272 | (define_int_iterator VPMADD52 | |
19273 | [UNSPEC_VPMADD52LUQ | |
19274 | UNSPEC_VPMADD52HUQ]) | |
19275 | ||
19276 | (define_int_attr vpmadd52type | |
19277 | [(UNSPEC_VPMADD52LUQ "luq") (UNSPEC_VPMADD52HUQ "huq")]) | |
19278 | ||
19279 | (define_expand "vpamdd52huq<mode>_maskz" | |
19280 | [(match_operand:VI8_AVX512VL 0 "register_operand") | |
19281 | (match_operand:VI8_AVX512VL 1 "register_operand") | |
19282 | (match_operand:VI8_AVX512VL 2 "register_operand") | |
19283 | (match_operand:VI8_AVX512VL 3 "nonimmediate_operand") | |
19284 | (match_operand:<avx512fmaskmode> 4 "register_operand")] | |
19285 | "TARGET_AVX512IFMA" | |
19286 | { | |
19287 | emit_insn (gen_vpamdd52huq<mode>_maskz_1 ( | |
19288 | operands[0], operands[1], operands[2], operands[3], | |
19289 | CONST0_RTX (<MODE>mode), operands[4])); | |
19290 | DONE; | |
19291 | }) | |
19292 | ||
19293 | (define_expand "vpamdd52luq<mode>_maskz" | |
19294 | [(match_operand:VI8_AVX512VL 0 "register_operand") | |
19295 | (match_operand:VI8_AVX512VL 1 "register_operand") | |
19296 | (match_operand:VI8_AVX512VL 2 "register_operand") | |
19297 | (match_operand:VI8_AVX512VL 3 "nonimmediate_operand") | |
19298 | (match_operand:<avx512fmaskmode> 4 "register_operand")] | |
19299 | "TARGET_AVX512IFMA" | |
19300 | { | |
19301 | emit_insn (gen_vpamdd52luq<mode>_maskz_1 ( | |
19302 | operands[0], operands[1], operands[2], operands[3], | |
19303 | CONST0_RTX (<MODE>mode), operands[4])); | |
19304 | DONE; | |
19305 | }) | |
19306 | ||
19307 | (define_insn "vpamdd52<vpmadd52type><mode><sd_maskz_name>" | |
19308 | [(set (match_operand:VI8_AVX512VL 0 "register_operand" "=v") | |
19309 | (unspec:VI8_AVX512VL | |
19310 | [(match_operand:VI8_AVX512VL 1 "register_operand" "0") | |
19311 | (match_operand:VI8_AVX512VL 2 "register_operand" "v") | |
19312 | (match_operand:VI8_AVX512VL 3 "nonimmediate_operand" "vm")] | |
19313 | VPMADD52))] | |
19314 | "TARGET_AVX512IFMA" | |
19315 | "vpmadd52<vpmadd52type>\t{%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3}" | |
19316 | [(set_attr "type" "ssemuladd") | |
19317 | (set_attr "prefix" "evex") | |
19318 | (set_attr "mode" "<sseinsnmode>")]) | |
19319 | ||
19320 | (define_insn "vpamdd52<vpmadd52type><mode>_mask" | |
19321 | [(set (match_operand:VI8_AVX512VL 0 "register_operand" "=v") | |
19322 | (vec_merge:VI8_AVX512VL | |
19323 | (unspec:VI8_AVX512VL | |
19324 | [(match_operand:VI8_AVX512VL 1 "register_operand" "0") | |
19325 | (match_operand:VI8_AVX512VL 2 "register_operand" "v") | |
19326 | (match_operand:VI8_AVX512VL 3 "nonimmediate_operand" "vm")] | |
19327 | VPMADD52) | |
19328 | (match_dup 1) | |
19329 | (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))] | |
19330 | "TARGET_AVX512IFMA" | |
19331 | "vpmadd52<vpmadd52type>\t{%3, %2, %0%{%4%}|%0%{%4%}, %2, %3}" | |
19332 | [(set_attr "type" "ssemuladd") | |
19333 | (set_attr "prefix" "evex") | |
19334 | (set_attr "mode" "<sseinsnmode>")]) | |
19335 | ||
afee0628 | 19336 | (define_insn "vpmultishiftqb<mode><mask_name>" |
19337 | [(set (match_operand:VI1_AVX512VL 0 "register_operand" "=v") | |
19338 | (unspec:VI1_AVX512VL | |
19339 | [(match_operand:VI1_AVX512VL 1 "register_operand" "v") | |
19340 | (match_operand:VI1_AVX512VL 2 "nonimmediate_operand" "vm")] | |
19341 | UNSPEC_VPMULTISHIFT))] | |
19342 | "TARGET_AVX512VBMI" | |
19343 | "vpmultishiftqb\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}" | |
19344 | [(set_attr "type" "sselog") | |
19345 | (set_attr "prefix" "evex") | |
19346 | (set_attr "mode" "<sseinsnmode>")]) |