]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/config/i386/mmx.md
i386: Fix and improve insn constraint for V2QI arithmetic/shift insns
[thirdparty/gcc.git] / gcc / config / i386 / mmx.md
1 ;; GCC machine description for MMX and 3dNOW! instructions
2 ;; Copyright (C) 2005-2024 Free Software Foundation, Inc.
3 ;;
4 ;; This file is part of GCC.
5 ;;
6 ;; GCC is free software; you can redistribute it and/or modify
7 ;; it under the terms of the GNU General Public License as published by
8 ;; the Free Software Foundation; either version 3, or (at your option)
9 ;; any later version.
10 ;;
11 ;; GCC is distributed in the hope that it will be useful,
12 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
13 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 ;; GNU General Public License for more details.
15 ;;
16 ;; You should have received a copy of the GNU General Public License
17 ;; along with GCC; see the file COPYING3. If not see
18 ;; <http://www.gnu.org/licenses/>.
19
20 ;; The MMX and 3dNOW! patterns are in the same file because they use
21 ;; the same register file, and 3dNOW! adds a number of extensions to
22 ;; the base integer MMX isa.
23
24 ;; Note! Except for the basic move instructions, *all* of these
25 ;; patterns are outside the normal optabs namespace. This is because
26 ;; use of these registers requires the insertion of emms or femms
27 ;; instructions to return to normal fpu mode. The compiler doesn't
28 ;; know how to do that itself, which means it's up to the user. Which
29 ;; means that we should never use any of these patterns except at the
30 ;; direction of the user via a builtin.
31
32 (define_c_enum "unspec" [
33 UNSPEC_MOVNTQ
34 UNSPEC_PFRCP
35 UNSPEC_PFRCPIT1
36 UNSPEC_PFRCPIT2
37 UNSPEC_PFRSQRT
38 UNSPEC_PFRSQIT1
39 ])
40
41 (define_c_enum "unspecv" [
42 UNSPECV_EMMS
43 UNSPECV_FEMMS
44 ])
45
46 ;; 8 byte integral modes handled by MMX (and by extension, SSE)
47 (define_mode_iterator MMXMODEI [V8QI V4HI V2SI])
48 (define_mode_iterator MMXMODEI8 [V8QI V4HI V2SI (V1DI "TARGET_SSE2")])
49
50 ;; All 8-byte vector modes handled by MMX
51 (define_mode_iterator MMXMODE [V8QI V4HI V2SI V1DI V2SF V4HF V4BF])
52 (define_mode_iterator MMXMODE124 [V8QI V4HI V2SI V2SF])
53
54 ;; Mix-n-match
55 (define_mode_iterator MMXMODE12 [V8QI V4HI])
56 (define_mode_iterator MMXMODE14 [V8QI V2SI])
57 (define_mode_iterator MMXMODE24 [V4HI V2SI])
58 (define_mode_iterator MMXMODE248 [V4HI V2SI V1DI])
59
60 ;; All 4-byte integer/float16 vector modes
61 (define_mode_iterator V_32 [V4QI V2HI V1SI V2HF V2BF])
62
63 (define_mode_iterator V2FI_32 [V2HF V2BF V2HI])
64 (define_mode_iterator V4FI_64 [V4HF V4BF V4HI])
65 (define_mode_iterator V4F_64 [V4HF V4BF])
66 (define_mode_iterator V2F_32 [V2HF V2BF])
67 ;; 4-byte integer vector modes
68 (define_mode_iterator VI_32 [V4QI V2HI])
69
70 ;; 4-byte and 2-byte integer vector modes
71 (define_mode_iterator VI_16_32 [V4QI V2QI V2HI])
72
73 ;; 4-byte and 2-byte QImode vector modes
74 (define_mode_iterator VI1_16_32 [V4QI V2QI])
75
76 ;; All 2-byte, 4-byte and 8-byte vector modes with more than 1 element
77 (define_mode_iterator V_16_32_64
78 [V2QI V4QI V2HI V2HF
79 (V8QI "TARGET_64BIT") (V4HI "TARGET_64BIT")
80 (V4HF "TARGET_64BIT") (V4BF "TARGET_64BIT")
81 (V2SI "TARGET_64BIT") (V2SF "TARGET_64BIT")])
82
83 ;; V2S* modes
84 (define_mode_iterator V2FI [V2SF V2SI])
85
86 (define_mode_iterator V24FI [V2SF V2SI V4HF V4HI])
87
88 (define_mode_iterator V248FI [V2SF V2SI V4HF V4BF V4HI V8QI])
89
90 (define_mode_iterator V24FI_32 [V2HF V2BF V2HI V4QI])
91
92 ;; Mapping from integer vector mode to mnemonic suffix
93 (define_mode_attr mmxvecsize
94 [(V8QI "b") (V4QI "b") (V2QI "b")
95 (V4HI "w") (V2HI "w") (V2SI "d") (V1DI "q")])
96
97 ;; Mapping to same size integral mode.
98 (define_mode_attr mmxinsnmode
99 [(V8QI "DI") (V4QI "SI") (V2QI "HI")
100 (V4HI "DI") (V2HI "SI")
101 (V2SI "DI")
102 (V4HF "DI") (V2HF "SI")
103 (V4BF "DI") (V2BF "SI")
104 (V2SF "DI")])
105
106 (define_mode_attr mmxdoublemode
107 [(V8QI "V8HI") (V4HI "V4SI")])
108
109 ;; Mapping of vector float modes to an integer mode of the same size
110 (define_mode_attr mmxintvecmode
111 [(V2SF "V2SI") (V2SI "V2SI") (V4HI "V4HI") (V8QI "V8QI")
112 (V4HF "V4HI") (V2HF "V2HI")])
113
114 (define_mode_attr mmxintvecmodelower
115 [(V2SF "v2si") (V2SI "v2si") (V4HI "v4hi") (V8QI "v8qi")
116 (V4HF "v4hi") (V2HF "v2hi")])
117
118 ;; Mapping of vector modes to a vector mode of double size
119 (define_mode_attr mmxdoublevecmode
120 [(V2SF "V4SF") (V2SI "V4SI") (V4HF "V8HF") (V4HI "V8HI")
121 (V2HI "V4HI") (V2HF "V4HF") (V2BF "V4BF")])
122
123 ;; Mapping of vector modes back to the scalar modes
124 (define_mode_attr mmxscalarmode
125 [(V2SI "SI") (V2SF "SF")
126 (V4HF "HF") (V4BF "BF")
127 (V2HF "HF") (V2BF "BF")
128 (V4HI "HI") (V2HI "HI")
129 (V8QI "QI")])
130
131 (define_mode_attr mmxscalarmodelower
132 [(V2SI "si") (V2SF "sf")
133 (V4HF "hf") (V4BF "bf")
134 (V2HF "hf") (V2BF "bf")
135 (V4HI "hi") (V2HI "hi")
136 (V8QI "qi")])
137
138 (define_mode_attr Yv_Yw
139 [(V8QI "Yw") (V4HI "Yw") (V2SI "Yv") (V1DI "Yv") (V2SF "Yv")])
140
141 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
142 ;;
143 ;; Move patterns
144 ;;
145 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
146
147 ;; All of these patterns are enabled for MMX as well as 3dNOW.
148 ;; This is essential for maintaining stable calling conventions.
149
150 (define_expand "mov<mode>"
151 [(set (match_operand:MMXMODE 0 "nonimmediate_operand")
152 (match_operand:MMXMODE 1 "nonimmediate_operand"))]
153 "TARGET_MMX || TARGET_MMX_WITH_SSE"
154 {
155 ix86_expand_vector_move (<MODE>mode, operands);
156 DONE;
157 })
158
159 (define_insn "*mov<mode>_internal"
160 [(set (match_operand:MMXMODE 0 "nonimmediate_operand"
161 "=r ,o ,r,r ,m ,?!y,!y,?!y,m ,r ,?!y,v,v,v,m,r,v,!y,*x")
162 (match_operand:MMXMODE 1 "nonimm_or_0_operand"
163 "rCo,rC,C,rm,rC,C ,!y,m ,?!y,?!y,r ,C,v,m,v,v,r,*x,!y"))]
164 "(TARGET_MMX || TARGET_MMX_WITH_SSE)
165 && !(MEM_P (operands[0]) && MEM_P (operands[1]))
166 && ix86_hardreg_mov_ok (operands[0], operands[1])"
167 {
168 switch (get_attr_type (insn))
169 {
170 case TYPE_MULTI:
171 return "#";
172
173 case TYPE_IMOV:
174 if (get_attr_mode (insn) == MODE_SI)
175 return "mov{l}\t{%1, %k0|%k0, %1}";
176 else
177 return "mov{q}\t{%1, %0|%0, %1}";
178
179 case TYPE_MMX:
180 return "pxor\t%0, %0";
181
182 case TYPE_MMXMOV:
183 /* Handle broken assemblers that require movd instead of movq. */
184 if (!HAVE_AS_IX86_INTERUNIT_MOVQ
185 && (GENERAL_REG_P (operands[0]) || GENERAL_REG_P (operands[1])))
186 return "movd\t{%1, %0|%0, %1}";
187 return "movq\t{%1, %0|%0, %1}";
188
189 case TYPE_SSECVT:
190 if (SSE_REG_P (operands[0]))
191 return "movq2dq\t{%1, %0|%0, %1}";
192 else
193 return "movdq2q\t{%1, %0|%0, %1}";
194
195 case TYPE_SSELOG1:
196 return standard_sse_constant_opcode (insn, operands);
197
198 case TYPE_SSEMOV:
199 return ix86_output_ssemov (insn, operands);
200
201 default:
202 gcc_unreachable ();
203 }
204 }
205 [(set (attr "isa")
206 (cond [(eq_attr "alternative" "0,1")
207 (const_string "nox64")
208 (eq_attr "alternative" "2,3,4,9,10")
209 (const_string "x64")
210 (eq_attr "alternative" "15,16")
211 (const_string "x64_sse2")
212 (eq_attr "alternative" "17,18")
213 (const_string "sse2")
214 ]
215 (const_string "*")))
216 (set (attr "type")
217 (cond [(eq_attr "alternative" "0,1")
218 (const_string "multi")
219 (eq_attr "alternative" "2,3,4")
220 (const_string "imov")
221 (eq_attr "alternative" "5")
222 (const_string "mmx")
223 (eq_attr "alternative" "6,7,8,9,10")
224 (const_string "mmxmov")
225 (eq_attr "alternative" "11")
226 (const_string "sselog1")
227 (eq_attr "alternative" "17,18")
228 (const_string "ssecvt")
229 ]
230 (const_string "ssemov")))
231 (set (attr "prefix_rex")
232 (if_then_else (eq_attr "alternative" "9,10,15,16")
233 (const_string "1")
234 (const_string "*")))
235 (set (attr "prefix")
236 (if_then_else (eq_attr "type" "sselog1,ssemov")
237 (const_string "maybe_vex")
238 (const_string "orig")))
239 (set (attr "prefix_data16")
240 (if_then_else
241 (and (eq_attr "type" "ssemov") (eq_attr "mode" "DI"))
242 (const_string "1")
243 (const_string "*")))
244 (set (attr "mode")
245 (cond [(eq_attr "alternative" "2")
246 (const_string "SI")
247 (eq_attr "alternative" "11,12")
248 (cond [(match_test "<MODE>mode == V2SFmode
249 || <MODE>mode == V4HFmode
250 || <MODE>mode == V4BFmode")
251 (const_string "V4SF")
252 (ior (not (match_test "TARGET_SSE2"))
253 (match_test "optimize_function_for_size_p (cfun)"))
254 (const_string "V4SF")
255 ]
256 (const_string "TI"))
257
258 (and (eq_attr "alternative" "13")
259 (ior (ior (and (match_test "<MODE>mode == V2SFmode")
260 (not (match_test "TARGET_MMX_WITH_SSE")))
261 (not (match_test "TARGET_SSE2")))
262 (match_test "<MODE>mode == V4HFmode
263 || <MODE>mode == V4BFmode")))
264 (const_string "V2SF")
265
266 (and (eq_attr "alternative" "14")
267 (ior (ior (match_test "<MODE>mode == V2SFmode")
268 (not (match_test "TARGET_SSE2")))
269 (match_test "<MODE>mode == V4HFmode
270 || <MODE>mode == V4BFmode")))
271 (const_string "V2SF")
272 ]
273 (const_string "DI")))
274 (set (attr "preferred_for_speed")
275 (cond [(eq_attr "alternative" "9,15")
276 (symbol_ref "TARGET_INTER_UNIT_MOVES_FROM_VEC")
277 (eq_attr "alternative" "10,16")
278 (symbol_ref "TARGET_INTER_UNIT_MOVES_TO_VEC")
279 ]
280 (symbol_ref "true")))])
281
282 (define_split
283 [(set (match_operand:MMXMODE 0 "nonimmediate_gr_operand")
284 (match_operand:MMXMODE 1 "nonimmediate_gr_operand"))]
285 "!TARGET_64BIT && reload_completed"
286 [(const_int 0)]
287 "ix86_split_long_move (operands); DONE;")
288
289 (define_split
290 [(set (match_operand:MMXMODE 0 "nonimmediate_gr_operand")
291 (match_operand:MMXMODE 1 "const0_operand"))]
292 "!TARGET_64BIT && reload_completed"
293 [(const_int 0)]
294 "ix86_split_long_move (operands); DONE;")
295
296 (define_expand "movmisalign<mode>"
297 [(set (match_operand:MMXMODE 0 "nonimmediate_operand")
298 (match_operand:MMXMODE 1 "nonimmediate_operand"))]
299 "TARGET_MMX || TARGET_MMX_WITH_SSE"
300 {
301 ix86_expand_vector_move (<MODE>mode, operands);
302 DONE;
303 })
304
305 (define_expand "mov<mode>"
306 [(set (match_operand:V_32 0 "nonimmediate_operand")
307 (match_operand:V_32 1 "nonimmediate_operand"))]
308 ""
309 {
310 ix86_expand_vector_move (<MODE>mode, operands);
311 DONE;
312 })
313
314 (define_insn "*mov<mode>_internal"
315 [(set (match_operand:V_32 0 "nonimmediate_operand"
316 "=r ,m ,v,v,v,m,r,v")
317 (match_operand:V_32 1 "general_operand"
318 "rmC,rC,C,v,m,v,v,r"))]
319 "!(MEM_P (operands[0]) && MEM_P (operands[1]))
320 && ix86_hardreg_mov_ok (operands[0], operands[1])"
321 {
322 switch (get_attr_type (insn))
323 {
324 case TYPE_IMOV:
325 return "mov{l}\t{%1, %0|%0, %1}";
326
327 case TYPE_SSELOG1:
328 return standard_sse_constant_opcode (insn, operands);
329
330 case TYPE_SSEMOV:
331 return ix86_output_ssemov (insn, operands);
332
333 default:
334 gcc_unreachable ();
335 }
336 }
337 [(set (attr "isa")
338 (cond [(eq_attr "alternative" "6,7")
339 (const_string "sse2")
340 ]
341 (const_string "*")))
342 (set (attr "type")
343 (cond [(eq_attr "alternative" "2")
344 (const_string "sselog1")
345 (eq_attr "alternative" "3,4,5,6,7")
346 (const_string "ssemov")
347 ]
348 (const_string "imov")))
349 (set (attr "prefix")
350 (if_then_else (eq_attr "type" "sselog1,ssemov")
351 (const_string "maybe_vex")
352 (const_string "orig")))
353 (set (attr "prefix_data16")
354 (if_then_else (and (eq_attr "type" "ssemov") (eq_attr "mode" "SI"))
355 (const_string "1")
356 (const_string "*")))
357 (set (attr "mode")
358 (cond [(eq_attr "alternative" "2,3")
359 (cond [(match_test "<MODE>mode == V2HFmode
360 || <MODE>mode == V2BFmode")
361 (const_string "V4SF")
362 (match_test "TARGET_AVX")
363 (const_string "TI")
364 (ior (not (match_test "TARGET_SSE2"))
365 (match_test "optimize_function_for_size_p (cfun)"))
366 (const_string "V4SF")
367 ]
368 (const_string "TI"))
369
370 (and (eq_attr "alternative" "4,5")
371 (ior (match_test "<MODE>mode == V2HFmode
372 || <MODE>mode == V2BFmode")
373 (not (match_test "TARGET_SSE2"))))
374 (const_string "SF")
375 ]
376 (const_string "SI")))
377 (set (attr "preferred_for_speed")
378 (cond [(eq_attr "alternative" "6")
379 (symbol_ref "TARGET_INTER_UNIT_MOVES_FROM_VEC")
380 (eq_attr "alternative" "7")
381 (symbol_ref "TARGET_INTER_UNIT_MOVES_TO_VEC")
382 ]
383 (symbol_ref "true")))])
384
385 ;; 16-bit, 32-bit and 64-bit constant vector stores. After reload,
386 ;; convert them to immediate integer stores.
387 (define_insn_and_split "*mov<mode>_imm"
388 [(set (match_operand:V_16_32_64 0 "memory_operand" "=m")
389 (match_operand:V_16_32_64 1 "x86_64_const_vector_operand" "i"))]
390 ""
391 "#"
392 "&& reload_completed"
393 [(set (match_dup 0) (match_dup 1))]
394 {
395 HOST_WIDE_INT val = ix86_convert_const_vector_to_integer (operands[1],
396 <MODE>mode);
397 operands[1] = GEN_INT (val);
398 operands[0] = lowpart_subreg (<mmxinsnmode>mode, operands[0], <MODE>mode);
399 })
400
401 ;; For TARGET_64BIT we always round up to 8 bytes.
402 (define_insn "*push<mode>2_rex64"
403 [(set (match_operand:V_32 0 "push_operand" "=X,X")
404 (match_operand:V_32 1 "nonmemory_no_elim_operand" "rC,*v"))]
405 "TARGET_64BIT"
406 "@
407 push{q}\t%q1
408 #"
409 [(set_attr "type" "push,multi")
410 (set_attr "mode" "DI")])
411
412 (define_split
413 [(set (match_operand:V_32 0 "push_operand")
414 (match_operand:V_32 1 "sse_reg_operand"))]
415 "TARGET_64BIT && TARGET_SSE && reload_completed"
416 [(set (reg:P SP_REG) (plus:P (reg:P SP_REG) (match_dup 2)))
417 (set (match_dup 0) (match_dup 1))]
418 {
419 operands[2] = GEN_INT (-PUSH_ROUNDING (GET_MODE_SIZE (<V_32:MODE>mode)));
420 /* Preserve memory attributes. */
421 operands[0] = replace_equiv_address (operands[0], stack_pointer_rtx);
422 })
423
424 (define_expand "movmisalign<mode>"
425 [(set (match_operand:V_32 0 "nonimmediate_operand")
426 (match_operand:V_32 1 "nonimmediate_operand"))]
427 ""
428 {
429 ix86_expand_vector_move (<MODE>mode, operands);
430 DONE;
431 })
432
433 (define_expand "movv2qi"
434 [(set (match_operand:V2QI 0 "nonimmediate_operand")
435 (match_operand:V2QI 1 "nonimmediate_operand"))]
436 ""
437 {
438 ix86_expand_vector_move (V2QImode, operands);
439 DONE;
440 })
441
442 (define_insn "*movv2qi_internal"
443 [(set (match_operand:V2QI 0 "nonimmediate_operand"
444 "=r,r,r,m ,v,v,v,jm,m,r,v")
445 (match_operand:V2QI 1 "general_operand"
446 "r ,C,m,rC,C,v,m,x,v,v,r"))]
447 "!(MEM_P (operands[0]) && MEM_P (operands[1]))"
448 {
449 switch (get_attr_type (insn))
450 {
451 case TYPE_IMOV:
452 if (get_attr_mode (insn) == MODE_SI)
453 return "mov{l}\t{%k1, %k0|%k0, %k1}";
454 else
455 return "mov{w}\t{%1, %0|%0, %1}";
456
457 case TYPE_IMOVX:
458 /* movzwl is faster than movw on p2 due to partial word stalls,
459 though not as fast as an aligned movl. */
460 return "movz{wl|x}\t{%1, %k0|%k0, %1}";
461
462 case TYPE_SSELOG1:
463 if (satisfies_constraint_C (operands[1]))
464 return standard_sse_constant_opcode (insn, operands);
465
466 if (SSE_REG_P (operands[0]))
467 return "%vpinsrw\t{$0, %1, %d0|%d0, %1, 0}";
468 else
469 return "%vpextrw\t{$0, %1, %0|%0, %1, 0}";
470
471 case TYPE_SSEMOV:
472 return ix86_output_ssemov (insn, operands);
473
474 default:
475 gcc_unreachable ();
476 }
477 }
478 [(set (attr "isa")
479 (cond [(eq_attr "alternative" "6,9,10")
480 (const_string "sse2")
481 (eq_attr "alternative" "7")
482 (const_string "sse4_noavx")
483 (eq_attr "alternative" "8")
484 (const_string "avx")
485 ]
486 (const_string "*")))
487 (set (attr "addr")
488 (if_then_else (eq_attr "alternative" "7")
489 (const_string "gpr16")
490 (const_string "*")))
491 (set (attr "type")
492 (cond [(eq_attr "alternative" "6,7,8")
493 (if_then_else (match_test "TARGET_AVX512FP16")
494 (const_string "ssemov")
495 (const_string "sselog1"))
496 (eq_attr "alternative" "4")
497 (const_string "sselog1")
498 (eq_attr "alternative" "5,9,10")
499 (const_string "ssemov")
500 (match_test "optimize_function_for_size_p (cfun)")
501 (const_string "imov")
502 (and (eq_attr "alternative" "0")
503 (ior (not (match_test "TARGET_PARTIAL_REG_STALL"))
504 (not (match_test "TARGET_HIMODE_MATH"))))
505 (const_string "imov")
506 (and (eq_attr "alternative" "1,2")
507 (match_operand:V2QI 1 "aligned_operand"))
508 (const_string "imov")
509 (and (match_test "TARGET_MOVX")
510 (eq_attr "alternative" "0,2"))
511 (const_string "imovx")
512 ]
513 (const_string "imov")))
514 (set (attr "prefix")
515 (cond [(eq_attr "alternative" "4,5,6,7,8,9,10")
516 (const_string "maybe_evex")
517 ]
518 (const_string "orig")))
519 (set (attr "mode")
520 (cond [(eq_attr "alternative" "6,7,8")
521 (if_then_else (match_test "TARGET_AVX512FP16")
522 (const_string "HI")
523 (const_string "TI"))
524 (eq_attr "alternative" "9,10")
525 (if_then_else (match_test "TARGET_AVX512FP16")
526 (const_string "HI")
527 (const_string "SI"))
528 (eq_attr "alternative" "4")
529 (cond [(match_test "TARGET_AVX")
530 (const_string "TI")
531 (ior (not (match_test "TARGET_SSE2"))
532 (match_test "optimize_function_for_size_p (cfun)"))
533 (const_string "V4SF")
534 ]
535 (const_string "TI"))
536 (eq_attr "alternative" "5")
537 (cond [(match_test "TARGET_AVX512FP16")
538 (const_string "HF")
539 (match_test "TARGET_AVX")
540 (const_string "TI")
541 (ior (not (match_test "TARGET_SSE2"))
542 (match_test "optimize_function_for_size_p (cfun)"))
543 (const_string "V4SF")
544 ]
545 (const_string "TI"))
546 (eq_attr "type" "imovx")
547 (const_string "SI")
548 (and (eq_attr "alternative" "1,2")
549 (match_operand:V2QI 1 "aligned_operand"))
550 (const_string "SI")
551 (and (eq_attr "alternative" "0")
552 (ior (not (match_test "TARGET_PARTIAL_REG_STALL"))
553 (not (match_test "TARGET_HIMODE_MATH"))))
554 (const_string "SI")
555 ]
556 (const_string "HI")))
557 (set (attr "preferred_for_speed")
558 (cond [(eq_attr "alternative" "9")
559 (symbol_ref "TARGET_INTER_UNIT_MOVES_FROM_VEC")
560 (eq_attr "alternative" "10")
561 (symbol_ref "TARGET_INTER_UNIT_MOVES_TO_VEC")
562 ]
563 (symbol_ref "true")))])
564
565 ;; We always round up to UNITS_PER_WORD bytes.
566 (define_insn "*pushv2qi2"
567 [(set (match_operand:V2QI 0 "push_operand" "=X,X")
568 (match_operand:V2QI 1 "nonmemory_no_elim_operand" "rC,v"))]
569 ""
570 "* return TARGET_64BIT ? \"push{q}\t%q1\" : \"push{l}\t%k1\";
571 #"
572 [(set_attr "isa" "*,sse4")
573 (set_attr "type" "push,multi")
574 (set (attr "mode")
575 (cond [(eq_attr "alternative" "0")
576 (if_then_else (match_test "TARGET_64BIT")
577 (const_string "DI")
578 (const_string "SI"))
579 (eq_attr "alternative" "1")
580 (if_then_else (match_test "TARGET_AVX512FP16")
581 (const_string "HI")
582 (const_string "TI"))
583 ]
584 (const_string "HI")))])
585
586 (define_split
587 [(set (match_operand:V2QI 0 "push_operand")
588 (match_operand:V2QI 1 "sse_reg_operand"))]
589 "TARGET_SSE4_1 && reload_completed"
590 [(set (reg:P SP_REG) (plus:P (reg:P SP_REG) (match_dup 2)))
591 (set (match_dup 0) (match_dup 1))]
592 {
593 operands[2] = GEN_INT (-PUSH_ROUNDING (GET_MODE_SIZE (V2QImode)));
594 /* Preserve memory attributes. */
595 operands[0] = replace_equiv_address (operands[0], stack_pointer_rtx);
596 })
597
598 (define_expand "movmisalignv2qi"
599 [(set (match_operand:V2QI 0 "nonimmediate_operand")
600 (match_operand:V2QI 1 "nonimmediate_operand"))]
601 ""
602 {
603 ix86_expand_vector_move (V2QImode, operands);
604 DONE;
605 })
606
607 (define_insn "sse_movntq"
608 [(set (match_operand:DI 0 "memory_operand" "=m,m")
609 (unspec:DI [(match_operand:DI 1 "register_operand" "y,r")]
610 UNSPEC_MOVNTQ))]
611 "(TARGET_MMX || TARGET_MMX_WITH_SSE)
612 && (TARGET_SSE || TARGET_3DNOW_A)"
613 "@
614 movntq\t{%1, %0|%0, %1}
615 movnti\t{%1, %0|%0, %1}"
616 [(set_attr "isa" "*,x64")
617 (set_attr "mmx_isa" "native,*")
618 (set_attr "type" "mmxmov,ssemov")
619 (set_attr "mode" "DI")])
620
621 (define_expand "movq_<mode>_to_sse"
622 [(set (match_operand:<mmxdoublevecmode> 0 "register_operand")
623 (vec_concat:<mmxdoublevecmode>
624 (match_operand:V24FI 1 "nonimmediate_operand")
625 (match_dup 2)))]
626 "TARGET_SSE2"
627 {
628 if (<MODE>mode != V2SImode
629 && !flag_trapping_math)
630 {
631 rtx op1 = force_reg (<MODE>mode, operands[1]);
632 emit_move_insn (operands[0], lowpart_subreg (<mmxdoublevecmode>mode,
633 op1, <MODE>mode));
634 DONE;
635 }
636
637 operands[2] = CONST0_RTX (<MODE>mode);
638 })
639
640 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
641 ;;
642 ;; Parallel single-precision floating point arithmetic
643 ;;
644 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
645
646 (define_expand "<code>v2sf2"
647 [(set (match_operand:V2SF 0 "register_operand")
648 (absneg:V2SF
649 (match_operand:V2SF 1 "register_operand")))]
650 "TARGET_MMX_WITH_SSE"
651 "ix86_expand_fp_absneg_operator (<CODE>, V2SFmode, operands); DONE;")
652
653 (define_insn_and_split "*mmx_<code>v2sf2"
654 [(set (match_operand:V2SF 0 "register_operand" "=x,x,x")
655 (absneg:V2SF
656 (match_operand:V2SF 1 "register_operand" "0,x,x")))
657 (use (match_operand:V2SF 2 "nonimmediate_operand" "x,0,x"))]
658 "TARGET_MMX_WITH_SSE"
659 "#"
660 "&& reload_completed"
661 [(set (match_dup 0)
662 (<absneg_op>:V2SF (match_dup 1) (match_dup 2)))]
663 {
664 if (!TARGET_AVX && operands_match_p (operands[0], operands[2]))
665 std::swap (operands[1], operands[2]);
666 }
667 [(set_attr "isa" "noavx,noavx,avx")])
668
669 (define_insn_and_split "*mmx_nabsv2sf2"
670 [(set (match_operand:V2SF 0 "register_operand" "=x,x,x")
671 (neg:V2SF
672 (abs:V2SF
673 (match_operand:V2SF 1 "register_operand" "0,x,x"))))
674 (use (match_operand:V2SF 2 "nonimmediate_operand" "x,0,x"))]
675 "TARGET_MMX_WITH_SSE"
676 "#"
677 "&& reload_completed"
678 [(set (match_dup 0)
679 (ior:V2SF (match_dup 1) (match_dup 2)))]
680 {
681 if (!TARGET_AVX && operands_match_p (operands[0], operands[2]))
682 std::swap (operands[1], operands[2]);
683 }
684 [(set_attr "isa" "noavx,noavx,avx")])
685
686 (define_expand "<insn>v2sf3"
687 [(set (match_operand:V2SF 0 "register_operand")
688 (plusminusmult:V2SF
689 (match_operand:V2SF 1 "nonimmediate_operand")
690 (match_operand:V2SF 2 "nonimmediate_operand")))]
691 "TARGET_MMX_WITH_SSE && ix86_partial_vec_fp_math"
692 {
693 rtx op2 = gen_reg_rtx (V4SFmode);
694 rtx op1 = gen_reg_rtx (V4SFmode);
695 rtx op0 = gen_reg_rtx (V4SFmode);
696
697 emit_insn (gen_movq_v2sf_to_sse (op2, operands[2]));
698 emit_insn (gen_movq_v2sf_to_sse (op1, operands[1]));
699
700 emit_insn (gen_<insn>v4sf3 (op0, op1, op2));
701
702 emit_move_insn (operands[0], lowpart_subreg (V2SFmode, op0, V4SFmode));
703 DONE;
704 })
705
706 (define_expand "mmx_addv2sf3"
707 [(set (match_operand:V2SF 0 "register_operand")
708 (plus:V2SF
709 (match_operand:V2SF 1 "nonimmediate_operand")
710 (match_operand:V2SF 2 "nonimmediate_operand")))]
711 "TARGET_3DNOW"
712 "ix86_fixup_binary_operands_no_copy (PLUS, V2SFmode, operands);")
713
714 (define_insn "*mmx_addv2sf3"
715 [(set (match_operand:V2SF 0 "register_operand" "=y")
716 (plus:V2SF (match_operand:V2SF 1 "nonimmediate_operand" "%0")
717 (match_operand:V2SF 2 "nonimmediate_operand" "ym")))]
718 "TARGET_3DNOW && ix86_binary_operator_ok (PLUS, V2SFmode, operands)"
719 "pfadd\t{%2, %0|%0, %2}"
720 [(set_attr "type" "mmxadd")
721 (set_attr "prefix_extra" "1")
722 (set_attr "mode" "V2SF")])
723
724 (define_expand "mmx_subv2sf3"
725 [(set (match_operand:V2SF 0 "register_operand")
726 (minus:V2SF (match_operand:V2SF 1 "register_operand")
727 (match_operand:V2SF 2 "nonimmediate_operand")))]
728 "TARGET_3DNOW")
729
730 (define_expand "mmx_subrv2sf3"
731 [(set (match_operand:V2SF 0 "register_operand")
732 (minus:V2SF (match_operand:V2SF 2 "register_operand")
733 (match_operand:V2SF 1 "nonimmediate_operand")))]
734 "TARGET_3DNOW")
735
736 (define_insn "*mmx_subv2sf3"
737 [(set (match_operand:V2SF 0 "register_operand" "=y,y")
738 (minus:V2SF (match_operand:V2SF 1 "nonimmediate_operand" "0,ym")
739 (match_operand:V2SF 2 "nonimmediate_operand" "ym,0")))]
740 "TARGET_3DNOW && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
741 "@
742 pfsub\t{%2, %0|%0, %2}
743 pfsubr\t{%1, %0|%0, %1}"
744 [(set_attr "type" "mmxadd")
745 (set_attr "prefix_extra" "1")
746 (set_attr "mode" "V2SF")])
747
748 (define_expand "mmx_mulv2sf3"
749 [(set (match_operand:V2SF 0 "register_operand")
750 (mult:V2SF (match_operand:V2SF 1 "nonimmediate_operand")
751 (match_operand:V2SF 2 "nonimmediate_operand")))]
752 "TARGET_3DNOW"
753 "ix86_fixup_binary_operands_no_copy (MULT, V2SFmode, operands);")
754
755 (define_insn "*mmx_mulv2sf3"
756 [(set (match_operand:V2SF 0 "register_operand" "=y")
757 (mult:V2SF (match_operand:V2SF 1 "nonimmediate_operand" "%0")
758 (match_operand:V2SF 2 "nonimmediate_operand" "ym")))]
759 "TARGET_3DNOW && ix86_binary_operator_ok (MULT, V2SFmode, operands)"
760 "pfmul\t{%2, %0|%0, %2}"
761 [(set_attr "type" "mmxmul")
762 (set_attr "prefix_extra" "1")
763 (set_attr "mode" "V2SF")])
764
765 (define_expand "divv2sf3"
766 [(set (match_operand:V2SF 0 "register_operand")
767 (div:V2SF (match_operand:V2SF 1 "register_operand")
768 (match_operand:V2SF 2 "register_operand")))]
769 "TARGET_MMX_WITH_SSE && ix86_partial_vec_fp_math"
770 {
771 rtx op2 = gen_reg_rtx (V4SFmode);
772 rtx op1 = gen_reg_rtx (V4SFmode);
773 rtx op0 = gen_reg_rtx (V4SFmode);
774
775 rtx tmp = gen_rtx_VEC_CONCAT (V4SFmode, operands[2],
776 force_reg (V2SFmode, CONST1_RTX (V2SFmode)));
777 emit_insn (gen_rtx_SET (op2, tmp));
778 emit_insn (gen_movq_v2sf_to_sse (op1, operands[1]));
779
780 emit_insn (gen_divv4sf3 (op0, op1, op2));
781
782 emit_move_insn (operands[0], lowpart_subreg (V2SFmode, op0, V4SFmode));
783 DONE;
784 })
785
786 (define_expand "<code>v2sf3"
787 [(set (match_operand:V2SF 0 "register_operand")
788 (smaxmin:V2SF
789 (match_operand:V2SF 1 "register_operand")
790 (match_operand:V2SF 2 "register_operand")))]
791 "TARGET_MMX_WITH_SSE && ix86_partial_vec_fp_math"
792 {
793 rtx op2 = gen_reg_rtx (V4SFmode);
794 rtx op1 = gen_reg_rtx (V4SFmode);
795 rtx op0 = gen_reg_rtx (V4SFmode);
796
797 emit_insn (gen_movq_v2sf_to_sse (op2, operands[2]));
798 emit_insn (gen_movq_v2sf_to_sse (op1, operands[1]));
799
800 emit_insn (gen_<code>v4sf3 (op0, op1, op2));
801
802 emit_move_insn (operands[0], lowpart_subreg (V2SFmode, op0, V4SFmode));
803 DONE;
804 })
805
806 (define_expand "mmx_<code>v2sf3"
807 [(set (match_operand:V2SF 0 "register_operand")
808 (smaxmin:V2SF
809 (match_operand:V2SF 1 "nonimmediate_operand")
810 (match_operand:V2SF 2 "nonimmediate_operand")))]
811 "TARGET_3DNOW"
812 {
813 if (!flag_finite_math_only || flag_signed_zeros)
814 {
815 operands[1] = force_reg (V2SFmode, operands[1]);
816 emit_insn (gen_mmx_ieee_<maxmin_float>v2sf3
817 (operands[0], operands[1], operands[2]));
818 DONE;
819 }
820 else
821 ix86_fixup_binary_operands_no_copy (<CODE>, V2SFmode, operands);
822 })
823
824 ;; These versions of the min/max patterns are intentionally ignorant of
825 ;; their behavior wrt -0.0 and NaN (via the commutative operand mark).
826 ;; Since both the tree-level MAX_EXPR and the rtl-level SMAX operator
827 ;; are undefined in this condition, we're certain this is correct.
828
829 (define_insn "*mmx_<code>v2sf3"
830 [(set (match_operand:V2SF 0 "register_operand" "=y")
831 (smaxmin:V2SF
832 (match_operand:V2SF 1 "nonimmediate_operand" "%0")
833 (match_operand:V2SF 2 "nonimmediate_operand" "ym")))]
834 "TARGET_3DNOW && ix86_binary_operator_ok (<CODE>, V2SFmode, operands)"
835 "pf<maxmin_float>\t{%2, %0|%0, %2}"
836 [(set_attr "type" "mmxadd")
837 (set_attr "prefix_extra" "1")
838 (set_attr "mode" "V2SF")])
839
840 ;; These versions of the min/max patterns implement exactly the operations
841 ;; min = (op1 < op2 ? op1 : op2)
842 ;; max = (!(op1 < op2) ? op1 : op2)
843 ;; Their operands are not commutative, and thus they may be used in the
844 ;; presence of -0.0 and NaN.
845
846 (define_insn "mmx_ieee_<ieee_maxmin>v2sf3"
847 [(set (match_operand:V2SF 0 "register_operand" "=y")
848 (unspec:V2SF
849 [(match_operand:V2SF 1 "register_operand" "0")
850 (match_operand:V2SF 2 "nonimmediate_operand" "ym")]
851 IEEE_MAXMIN))]
852 "TARGET_3DNOW"
853 "pf<ieee_maxmin>\t{%2, %0|%0, %2}"
854 [(set_attr "type" "mmxadd")
855 (set_attr "prefix_extra" "1")
856 (set_attr "mode" "V2SF")])
857
858 (define_insn "mmx_rcpv2sf2"
859 [(set (match_operand:V2SF 0 "register_operand" "=y")
860 (unspec:V2SF [(match_operand:V2SF 1 "nonimmediate_operand" "ym")]
861 UNSPEC_PFRCP))]
862 "TARGET_3DNOW"
863 "pfrcp\t{%1, %0|%0, %1}"
864 [(set_attr "type" "mmx")
865 (set_attr "prefix_extra" "1")
866 (set_attr "mode" "V2SF")])
867
868 (define_insn "mmx_rcpit1v2sf3"
869 [(set (match_operand:V2SF 0 "register_operand" "=y")
870 (unspec:V2SF [(match_operand:V2SF 1 "register_operand" "0")
871 (match_operand:V2SF 2 "nonimmediate_operand" "ym")]
872 UNSPEC_PFRCPIT1))]
873 "TARGET_3DNOW"
874 "pfrcpit1\t{%2, %0|%0, %2}"
875 [(set_attr "type" "mmx")
876 (set_attr "prefix_extra" "1")
877 (set_attr "mode" "V2SF")])
878
879 (define_insn "mmx_rcpit2v2sf3"
880 [(set (match_operand:V2SF 0 "register_operand" "=y")
881 (unspec:V2SF [(match_operand:V2SF 1 "register_operand" "0")
882 (match_operand:V2SF 2 "nonimmediate_operand" "ym")]
883 UNSPEC_PFRCPIT2))]
884 "TARGET_3DNOW"
885 "pfrcpit2\t{%2, %0|%0, %2}"
886 [(set_attr "type" "mmx")
887 (set_attr "prefix_extra" "1")
888 (set_attr "mode" "V2SF")])
889
890 (define_expand "sqrtv2sf2"
891 [(set (match_operand:V2SF 0 "register_operand")
892 (sqrt:V2SF (match_operand:V2SF 1 "nonimmediate_operand")))]
893 "TARGET_MMX_WITH_SSE && ix86_partial_vec_fp_math"
894 {
895 rtx op1 = gen_reg_rtx (V4SFmode);
896 rtx op0 = gen_reg_rtx (V4SFmode);
897
898 emit_insn (gen_movq_v2sf_to_sse (op1, operands[1]));
899
900 emit_insn (gen_sqrtv4sf2 (op0, op1));
901
902 emit_move_insn (operands[0], lowpart_subreg (V2SFmode, op0, V4SFmode));
903 DONE;
904 })
905
906 (define_insn "mmx_rsqrtv2sf2"
907 [(set (match_operand:V2SF 0 "register_operand" "=y")
908 (unspec:V2SF [(match_operand:V2SF 1 "nonimmediate_operand" "ym")]
909 UNSPEC_PFRSQRT))]
910 "TARGET_3DNOW"
911 "pfrsqrt\t{%1, %0|%0, %1}"
912 [(set_attr "type" "mmx")
913 (set_attr "prefix_extra" "1")
914 (set_attr "mode" "V2SF")])
915
916 (define_insn "mmx_rsqit1v2sf3"
917 [(set (match_operand:V2SF 0 "register_operand" "=y")
918 (unspec:V2SF [(match_operand:V2SF 1 "register_operand" "0")
919 (match_operand:V2SF 2 "nonimmediate_operand" "ym")]
920 UNSPEC_PFRSQIT1))]
921 "TARGET_3DNOW"
922 "pfrsqit1\t{%2, %0|%0, %2}"
923 [(set_attr "type" "mmx")
924 (set_attr "prefix_extra" "1")
925 (set_attr "mode" "V2SF")])
926
927 (define_expand "mmx_haddv2sf3"
928 [(set (match_operand:V2SF 0 "register_operand")
929 (vec_concat:V2SF
930 (plus:SF
931 (vec_select:SF
932 (match_operand:V2SF 1 "register_operand")
933 (parallel [(const_int 0)]))
934 (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
935 (plus:SF
936 (vec_select:SF
937 (match_operand:V2SF 2 "nonimmediate_operand")
938 (parallel [(const_int 0)]))
939 (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))))]
940 "TARGET_3DNOW")
941
942 (define_insn "*mmx_haddv2sf3"
943 [(set (match_operand:V2SF 0 "register_operand" "=y")
944 (vec_concat:V2SF
945 (plus:SF
946 (vec_select:SF
947 (match_operand:V2SF 1 "register_operand" "0")
948 (parallel [(match_operand:SI 3 "const_0_to_1_operand")]))
949 (vec_select:SF (match_dup 1)
950 (parallel [(match_operand:SI 4 "const_0_to_1_operand")])))
951 (plus:SF
952 (vec_select:SF
953 (match_operand:V2SF 2 "nonimmediate_operand" "ym")
954 (parallel [(match_operand:SI 5 "const_0_to_1_operand")]))
955 (vec_select:SF (match_dup 2)
956 (parallel [(match_operand:SI 6 "const_0_to_1_operand")])))))]
957 "TARGET_3DNOW
958 && INTVAL (operands[3]) != INTVAL (operands[4])
959 && INTVAL (operands[5]) != INTVAL (operands[6])"
960 "pfacc\t{%2, %0|%0, %2}"
961 [(set_attr "type" "mmxadd")
962 (set_attr "prefix_extra" "1")
963 (set_attr "mode" "V2SF")])
964
965 (define_insn_and_split "*mmx_haddv2sf3_low"
966 [(set (match_operand:SF 0 "register_operand")
967 (plus:SF
968 (vec_select:SF
969 (match_operand:V2SF 1 "nonimmediate_operand")
970 (parallel [(match_operand:SI 2 "const_0_to_1_operand")]))
971 (vec_select:SF
972 (match_dup 1)
973 (parallel [(match_operand:SI 3 "const_0_to_1_operand")]))))]
974 "TARGET_SSE3 && TARGET_MMX_WITH_SSE && ix86_partial_vec_fp_math
975 && INTVAL (operands[2]) != INTVAL (operands[3])
976 && ix86_pre_reload_split ()"
977 "#"
978 "&& 1"
979 [(const_int 0)]
980 {
981 rtx op1 = gen_reg_rtx (V4SFmode);
982 rtx op0 = gen_reg_rtx (V4SFmode);
983
984 emit_insn (gen_movq_v2sf_to_sse (op1, operands[1]));
985
986 emit_insn (gen_sse3_haddv4sf3 (op0, op1, op1));
987
988 emit_move_insn (operands[0], lowpart_subreg (SFmode, op0, V4SFmode));
989 DONE;
990 })
991
992 (define_insn "mmx_hsubv2sf3"
993 [(set (match_operand:V2SF 0 "register_operand" "=y")
994 (vec_concat:V2SF
995 (minus:SF
996 (vec_select:SF
997 (match_operand:V2SF 1 "register_operand" "0")
998 (parallel [(const_int 0)]))
999 (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
1000 (minus:SF
1001 (vec_select:SF
1002 (match_operand:V2SF 2 "nonimmediate_operand" "ym")
1003 (parallel [(const_int 0)]))
1004 (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))))]
1005 "TARGET_3DNOW_A"
1006 "pfnacc\t{%2, %0|%0, %2}"
1007 [(set_attr "type" "mmxadd")
1008 (set_attr "prefix_extra" "1")
1009 (set_attr "mode" "V2SF")])
1010
1011 (define_insn_and_split "*mmx_hsubv2sf3_low"
1012 [(set (match_operand:SF 0 "register_operand")
1013 (minus:SF
1014 (vec_select:SF
1015 (match_operand:V2SF 1 "register_operand")
1016 (parallel [(const_int 0)]))
1017 (vec_select:SF
1018 (match_dup 1)
1019 (parallel [(const_int 1)]))))]
1020 "TARGET_SSE3 && TARGET_MMX_WITH_SSE && ix86_partial_vec_fp_math
1021 && ix86_pre_reload_split ()"
1022 "#"
1023 "&& 1"
1024 [(const_int 0)]
1025 {
1026 rtx op1 = gen_reg_rtx (V4SFmode);
1027 rtx op0 = gen_reg_rtx (V4SFmode);
1028
1029 emit_insn (gen_movq_v2sf_to_sse (op1, operands[1]));
1030
1031 emit_insn (gen_sse3_hsubv4sf3 (op0, op1, op1));
1032
1033 emit_move_insn (operands[0], lowpart_subreg (SFmode, op0, V4SFmode));
1034 DONE;
1035 })
1036
1037 (define_expand "mmx_haddsubv2sf3"
1038 [(set (match_operand:V2SF 0 "register_operand")
1039 (vec_concat:V2SF
1040 (minus:SF
1041 (vec_select:SF
1042 (match_operand:V2SF 1 "register_operand")
1043 (parallel [(const_int 0)]))
1044 (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
1045 (plus:SF
1046 (vec_select:SF
1047 (match_operand:V2SF 2 "nonimmediate_operand")
1048 (parallel [(const_int 0)]))
1049 (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))))]
1050 "TARGET_3DNOW_A")
1051
1052 (define_insn "*mmx_haddsubv2sf3"
1053 [(set (match_operand:V2SF 0 "register_operand" "=y")
1054 (vec_concat:V2SF
1055 (minus:SF
1056 (vec_select:SF
1057 (match_operand:V2SF 1 "register_operand" "0")
1058 (parallel [(const_int 0)]))
1059 (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
1060 (plus:SF
1061 (vec_select:SF
1062 (match_operand:V2SF 2 "nonimmediate_operand" "ym")
1063 (parallel [(match_operand:SI 3 "const_0_to_1_operand")]))
1064 (vec_select:SF
1065 (match_dup 2)
1066 (parallel [(match_operand:SI 4 "const_0_to_1_operand")])))))]
1067 "TARGET_3DNOW_A
1068 && INTVAL (operands[3]) != INTVAL (operands[4])"
1069 "pfpnacc\t{%2, %0|%0, %2}"
1070 [(set_attr "type" "mmxadd")
1071 (set_attr "prefix_extra" "1")
1072 (set_attr "mode" "V2SF")])
1073
1074 (define_expand "vec_addsubv2sf3"
1075 [(set (match_operand:V2SF 0 "register_operand")
1076 (vec_merge:V2SF
1077 (minus:V2SF
1078 (match_operand:V2SF 1 "nonimmediate_operand")
1079 (match_operand:V2SF 2 "nonimmediate_operand"))
1080 (plus:V2SF (match_dup 1) (match_dup 2))
1081 (const_int 1)))]
1082 "TARGET_SSE3 && TARGET_MMX_WITH_SSE && ix86_partial_vec_fp_math"
1083 {
1084 rtx op2 = gen_reg_rtx (V4SFmode);
1085 rtx op1 = gen_reg_rtx (V4SFmode);
1086 rtx op0 = gen_reg_rtx (V4SFmode);
1087
1088 emit_insn (gen_movq_v2sf_to_sse (op2, operands[2]));
1089 emit_insn (gen_movq_v2sf_to_sse (op1, operands[1]));
1090
1091 emit_insn (gen_vec_addsubv4sf3 (op0, op1, op2));
1092
1093 emit_move_insn (operands[0], lowpart_subreg (V2SFmode, op0, V4SFmode));
1094 DONE;
1095 })
1096
1097 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1098 ;;
1099 ;; Parallel single-precision floating point comparisons
1100 ;;
1101 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1102
1103 (define_expand "mmx_eqv2sf3"
1104 [(set (match_operand:V2SI 0 "register_operand")
1105 (eq:V2SI (match_operand:V2SF 1 "nonimmediate_operand")
1106 (match_operand:V2SF 2 "nonimmediate_operand")))]
1107 "TARGET_3DNOW"
1108 "ix86_fixup_binary_operands_no_copy (EQ, V2SFmode, operands);")
1109
1110 (define_insn "*mmx_eqv2sf3"
1111 [(set (match_operand:V2SI 0 "register_operand" "=y")
1112 (eq:V2SI (match_operand:V2SF 1 "nonimmediate_operand" "%0")
1113 (match_operand:V2SF 2 "nonimmediate_operand" "ym")))]
1114 "TARGET_3DNOW && ix86_binary_operator_ok (EQ, V2SFmode, operands)"
1115 "pfcmpeq\t{%2, %0|%0, %2}"
1116 [(set_attr "type" "mmxcmp")
1117 (set_attr "prefix_extra" "1")
1118 (set_attr "mode" "V2SF")])
1119
1120 (define_insn "mmx_gtv2sf3"
1121 [(set (match_operand:V2SI 0 "register_operand" "=y")
1122 (gt:V2SI (match_operand:V2SF 1 "register_operand" "0")
1123 (match_operand:V2SF 2 "nonimmediate_operand" "ym")))]
1124 "TARGET_3DNOW"
1125 "pfcmpgt\t{%2, %0|%0, %2}"
1126 [(set_attr "type" "mmxcmp")
1127 (set_attr "prefix_extra" "1")
1128 (set_attr "mode" "V2SF")])
1129
1130 (define_insn "mmx_gev2sf3"
1131 [(set (match_operand:V2SI 0 "register_operand" "=y")
1132 (ge:V2SI (match_operand:V2SF 1 "register_operand" "0")
1133 (match_operand:V2SF 2 "nonimmediate_operand" "ym")))]
1134 "TARGET_3DNOW"
1135 "pfcmpge\t{%2, %0|%0, %2}"
1136 [(set_attr "type" "mmxcmp")
1137 (set_attr "prefix_extra" "1")
1138 (set_attr "mode" "V2SF")])
1139
1140 (define_expand "vec_cmpv2sfv2si"
1141 [(set (match_operand:V2SI 0 "register_operand")
1142 (match_operator:V2SI 1 ""
1143 [(match_operand:V2SF 2 "nonimmediate_operand")
1144 (match_operand:V2SF 3 "nonimmediate_operand")]))]
1145 "TARGET_MMX_WITH_SSE && ix86_partial_vec_fp_math"
1146 {
1147 rtx ops[4];
1148 ops[3] = gen_reg_rtx (V4SFmode);
1149 ops[2] = gen_reg_rtx (V4SFmode);
1150 ops[1] = gen_rtx_fmt_ee (GET_CODE (operands[1]), V4SImode, ops[2], ops[3]);
1151 ops[0] = gen_reg_rtx (V4SImode);
1152
1153 emit_insn (gen_movq_v2sf_to_sse (ops[3], operands[3]));
1154 emit_insn (gen_movq_v2sf_to_sse (ops[2], operands[2]));
1155
1156 bool ok = ix86_expand_fp_vec_cmp (ops);
1157 gcc_assert (ok);
1158
1159 emit_move_insn (operands[0], lowpart_subreg (V2SImode, ops[0], V4SImode));
1160 DONE;
1161 })
1162
1163 (define_expand "vcond<mode>v2sf"
1164 [(set (match_operand:V2FI 0 "register_operand")
1165 (if_then_else:V2FI
1166 (match_operator 3 ""
1167 [(match_operand:V2SF 4 "nonimmediate_operand")
1168 (match_operand:V2SF 5 "nonimmediate_operand")])
1169 (match_operand:V2FI 1 "general_operand")
1170 (match_operand:V2FI 2 "general_operand")))]
1171 "TARGET_MMX_WITH_SSE && ix86_partial_vec_fp_math"
1172 {
1173 rtx ops[6];
1174 ops[5] = gen_reg_rtx (V4SFmode);
1175 ops[4] = gen_reg_rtx (V4SFmode);
1176 ops[3] = gen_rtx_fmt_ee (GET_CODE (operands[3]), VOIDmode, ops[4], ops[5]);
1177 ops[2] = lowpart_subreg (<mmxdoublevecmode>mode,
1178 force_reg (<MODE>mode, operands[2]),
1179 <MODE>mode);
1180 ops[1] = lowpart_subreg (<mmxdoublevecmode>mode,
1181 force_reg (<MODE>mode, operands[1]),
1182 <MODE>mode);
1183 ops[0] = gen_reg_rtx (<mmxdoublevecmode>mode);
1184
1185 emit_insn (gen_movq_v2sf_to_sse (ops[5], operands[5]));
1186 emit_insn (gen_movq_v2sf_to_sse (ops[4], operands[4]));
1187
1188 bool ok = ix86_expand_fp_vcond (ops);
1189 gcc_assert (ok);
1190
1191 emit_move_insn (operands[0], lowpart_subreg (<MODE>mode, ops[0],
1192 <mmxdoublevecmode>mode));
1193 DONE;
1194 })
1195
1196 (define_insn "@sse4_1_insertps_<mode>"
1197 [(set (match_operand:V2FI 0 "register_operand" "=Yr,*x,v")
1198 (unspec:V2FI
1199 [(match_operand:V2FI 2 "nonimmediate_operand" "Yrjm,*xjm,vm")
1200 (match_operand:V2FI 1 "register_operand" "0,0,v")
1201 (match_operand:SI 3 "const_0_to_255_operand")]
1202 UNSPEC_INSERTPS))]
1203 "TARGET_SSE4_1 && TARGET_MMX_WITH_SSE"
1204 {
1205 if (MEM_P (operands[2]))
1206 {
1207 unsigned count_s = INTVAL (operands[3]) >> 6;
1208 if (count_s)
1209 operands[3] = GEN_INT (INTVAL (operands[3]) & 0x3f);
1210 operands[2] = adjust_address_nv (operands[2],
1211 <mmxscalarmode>mode, count_s * 4);
1212 }
1213 switch (which_alternative)
1214 {
1215 case 0:
1216 case 1:
1217 return "insertps\t{%3, %2, %0|%0, %2, %3}";
1218 case 2:
1219 return "vinsertps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
1220 default:
1221 gcc_unreachable ();
1222 }
1223 }
1224 [(set_attr "isa" "noavx,noavx,avx")
1225 (set_attr "addr" "gpr16,gpr16,*")
1226 (set_attr "type" "sselog")
1227 (set_attr "prefix_data16" "1,1,*")
1228 (set_attr "prefix_extra" "1")
1229 (set_attr "length_immediate" "1")
1230 (set_attr "prefix" "orig,orig,maybe_evex")
1231 (set_attr "mode" "V4SF")])
1232
1233 (define_insn "*mmx_blendps"
1234 [(set (match_operand:V2SF 0 "register_operand" "=Yr,*x,x")
1235 (vec_merge:V2SF
1236 (match_operand:V2SF 2 "register_operand" "Yr,*x,x")
1237 (match_operand:V2SF 1 "register_operand" "0,0,x")
1238 (match_operand:SI 3 "const_0_to_3_operand")))]
1239 "TARGET_SSE4_1 && TARGET_MMX_WITH_SSE"
1240 "@
1241 blendps\t{%3, %2, %0|%0, %2, %3}
1242 blendps\t{%3, %2, %0|%0, %2, %3}
1243 vblendps\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1244 [(set_attr "isa" "noavx,noavx,avx")
1245 (set_attr "type" "ssemov")
1246 (set_attr "length_immediate" "1")
1247 (set_attr "prefix_data16" "1,1,*")
1248 (set_attr "prefix_extra" "1")
1249 (set_attr "prefix" "orig,orig,vex")
1250 (set_attr "mode" "V4SF")])
1251
1252 (define_insn "mmx_blendvps"
1253 [(set (match_operand:V2SF 0 "register_operand" "=Yr,*x,x")
1254 (unspec:V2SF
1255 [(match_operand:V2SF 1 "register_operand" "0,0,x")
1256 (match_operand:V2SF 2 "register_operand" "Yr,*x,x")
1257 (match_operand:V2SF 3 "register_operand" "Yz,Yz,x")]
1258 UNSPEC_BLENDV))]
1259 "TARGET_SSE4_1 && TARGET_MMX_WITH_SSE"
1260 "@
1261 blendvps\t{%3, %2, %0|%0, %2, %3}
1262 blendvps\t{%3, %2, %0|%0, %2, %3}
1263 vblendvps\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1264 [(set_attr "isa" "noavx,noavx,avx")
1265 (set_attr "type" "ssemov")
1266 (set_attr "length_immediate" "1")
1267 (set_attr "prefix_data16" "1,1,*")
1268 (set_attr "prefix_extra" "1")
1269 (set_attr "prefix" "orig,orig,vex")
1270 (set_attr "btver2_decode" "vector")
1271 (set_attr "mode" "V4SF")])
1272
1273 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1274 ;;
1275 ;; Parallel single-precision floating point logical operations
1276 ;;
1277 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1278
1279 (define_insn "*mmx_andnotv2sf3"
1280 [(set (match_operand:V2SF 0 "register_operand" "=x,x")
1281 (and:V2SF
1282 (not:V2SF
1283 (match_operand:V2SF 1 "register_operand" "0,x"))
1284 (match_operand:V2SF 2 "register_operand" "x,x")))]
1285 "TARGET_MMX_WITH_SSE"
1286 "@
1287 andnps\t{%2, %0|%0, %2}
1288 vandnps\t{%2, %1, %0|%0, %1, %2}"
1289 [(set_attr "isa" "noavx,avx")
1290 (set_attr "type" "sselog")
1291 (set_attr "prefix" "orig,vex")
1292 (set_attr "mode" "V4SF")])
1293
1294 (define_insn "<code>v2sf3"
1295 [(set (match_operand:V2SF 0 "register_operand" "=x,x")
1296 (any_logic:V2SF
1297 (match_operand:V2SF 1 "register_operand" "%0,x")
1298 (match_operand:V2SF 2 "register_operand" "x,x")))]
1299 "TARGET_MMX_WITH_SSE"
1300 "@
1301 <logic>ps\t{%2, %0|%0, %2}
1302 v<logic>ps\t{%2, %1, %0|%0, %1, %2}"
1303 [(set_attr "isa" "noavx,avx")
1304 (set_attr "type" "sselog")
1305 (set_attr "prefix" "orig,vex")
1306 (set_attr "mode" "V4SF")])
1307
1308 (define_expand "copysignv2sf3"
1309 [(set (match_dup 4)
1310 (and:V2SF
1311 (not:V2SF (match_dup 3))
1312 (match_operand:V2SF 1 "register_operand")))
1313 (set (match_dup 5)
1314 (and:V2SF (match_dup 3)
1315 (match_operand:V2SF 2 "register_operand")))
1316 (set (match_operand:V2SF 0 "register_operand")
1317 (ior:V2SF (match_dup 4) (match_dup 5)))]
1318 "TARGET_MMX_WITH_SSE"
1319 {
1320 operands[3] = ix86_build_signbit_mask (V2SFmode, true, false);
1321
1322 operands[4] = gen_reg_rtx (V2SFmode);
1323 operands[5] = gen_reg_rtx (V2SFmode);
1324 })
1325
1326 (define_expand "xorsignv2sf3"
1327 [(set (match_dup 4)
1328 (and:V2SF (match_dup 3)
1329 (match_operand:V2SF 2 "register_operand")))
1330 (set (match_operand:V2SF 0 "register_operand")
1331 (xor:V2SF (match_dup 4)
1332 (match_operand:V2SF 1 "register_operand")))]
1333 "TARGET_MMX_WITH_SSE"
1334 {
1335 operands[3] = ix86_build_signbit_mask (V2SFmode, true, false);
1336
1337 operands[4] = gen_reg_rtx (V2SFmode);
1338 })
1339
1340 (define_expand "signbitv2sf2"
1341 [(set (match_operand:V2SI 0 "register_operand")
1342 (lshiftrt:V2SI
1343 (subreg:V2SI
1344 (match_operand:V2SF 1 "register_operand") 0)
1345 (match_dup 2)))]
1346 "TARGET_MMX_WITH_SSE"
1347 {
1348 operands[1] = force_reg (V2SFmode, operands[1]);
1349 operands[2] = GEN_INT (GET_MODE_UNIT_BITSIZE (V2SFmode)-1);
1350 })
1351
1352 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1353 ;;
1354 ;; Parallel single-precision FMA multiply/accumulate instructions.
1355 ;;
1356 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1357
1358 (define_expand "fmav2sf4"
1359 [(set (match_operand:V2SF 0 "register_operand")
1360 (fma:V2SF
1361 (match_operand:V2SF 1 "nonimmediate_operand")
1362 (match_operand:V2SF 2 "nonimmediate_operand")
1363 (match_operand:V2SF 3 "nonimmediate_operand")))]
1364 "(TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL)
1365 && TARGET_MMX_WITH_SSE && ix86_partial_vec_fp_math"
1366 {
1367 rtx op3 = gen_reg_rtx (V4SFmode);
1368 rtx op2 = gen_reg_rtx (V4SFmode);
1369 rtx op1 = gen_reg_rtx (V4SFmode);
1370 rtx op0 = gen_reg_rtx (V4SFmode);
1371
1372 emit_insn (gen_movq_v2sf_to_sse (op3, operands[3]));
1373 emit_insn (gen_movq_v2sf_to_sse (op2, operands[2]));
1374 emit_insn (gen_movq_v2sf_to_sse (op1, operands[1]));
1375
1376 emit_insn (gen_fmav4sf4 (op0, op1, op2, op3));
1377
1378 emit_move_insn (operands[0], lowpart_subreg (V2SFmode, op0, V4SFmode));
1379 DONE;
1380 })
1381
1382 (define_expand "fmsv2sf4"
1383 [(set (match_operand:V2SF 0 "register_operand")
1384 (fma:V2SF
1385 (match_operand:V2SF 1 "nonimmediate_operand")
1386 (match_operand:V2SF 2 "nonimmediate_operand")
1387 (neg:V2SF
1388 (match_operand:V2SF 3 "nonimmediate_operand"))))]
1389 "(TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL)
1390 && TARGET_MMX_WITH_SSE && ix86_partial_vec_fp_math"
1391 {
1392 rtx op3 = gen_reg_rtx (V4SFmode);
1393 rtx op2 = gen_reg_rtx (V4SFmode);
1394 rtx op1 = gen_reg_rtx (V4SFmode);
1395 rtx op0 = gen_reg_rtx (V4SFmode);
1396
1397 emit_insn (gen_movq_v2sf_to_sse (op3, operands[3]));
1398 emit_insn (gen_movq_v2sf_to_sse (op2, operands[2]));
1399 emit_insn (gen_movq_v2sf_to_sse (op1, operands[1]));
1400
1401 emit_insn (gen_fmsv4sf4 (op0, op1, op2, op3));
1402
1403 emit_move_insn (operands[0], lowpart_subreg (V2SFmode, op0, V4SFmode));
1404 DONE;
1405 })
1406
1407 (define_expand "fnmav2sf4"
1408 [(set (match_operand:V2SF 0 "register_operand")
1409 (fma:V2SF
1410 (neg:V2SF
1411 (match_operand:V2SF 1 "nonimmediate_operand"))
1412 (match_operand:V2SF 2 "nonimmediate_operand")
1413 (match_operand:V2SF 3 "nonimmediate_operand")))]
1414 "(TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL)
1415 && TARGET_MMX_WITH_SSE && ix86_partial_vec_fp_math"
1416 {
1417 rtx op3 = gen_reg_rtx (V4SFmode);
1418 rtx op2 = gen_reg_rtx (V4SFmode);
1419 rtx op1 = gen_reg_rtx (V4SFmode);
1420 rtx op0 = gen_reg_rtx (V4SFmode);
1421
1422 emit_insn (gen_movq_v2sf_to_sse (op3, operands[3]));
1423 emit_insn (gen_movq_v2sf_to_sse (op2, operands[2]));
1424 emit_insn (gen_movq_v2sf_to_sse (op1, operands[1]));
1425
1426 emit_insn (gen_fnmav4sf4 (op0, op1, op2, op3));
1427
1428 emit_move_insn (operands[0], lowpart_subreg (V2SFmode, op0, V4SFmode));
1429 DONE;
1430 })
1431
1432 (define_expand "fnmsv2sf4"
1433 [(set (match_operand:V2SF 0 "register_operand" "=v,v,x")
1434 (fma:V2SF
1435 (neg:V2SF
1436 (match_operand:V2SF 1 "nonimmediate_operand"))
1437 (match_operand:V2SF 2 "nonimmediate_operand")
1438 (neg:V2SF
1439 (match_operand:V2SF 3 "nonimmediate_operand"))))]
1440 "(TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL)
1441 && TARGET_MMX_WITH_SSE && ix86_partial_vec_fp_math"
1442 {
1443 rtx op3 = gen_reg_rtx (V4SFmode);
1444 rtx op2 = gen_reg_rtx (V4SFmode);
1445 rtx op1 = gen_reg_rtx (V4SFmode);
1446 rtx op0 = gen_reg_rtx (V4SFmode);
1447
1448 emit_insn (gen_movq_v2sf_to_sse (op3, operands[3]));
1449 emit_insn (gen_movq_v2sf_to_sse (op2, operands[2]));
1450 emit_insn (gen_movq_v2sf_to_sse (op1, operands[1]));
1451
1452 emit_insn (gen_fnmsv4sf4 (op0, op1, op2, op3));
1453
1454 emit_move_insn (operands[0], lowpart_subreg (V2SFmode, op0, V4SFmode));
1455 DONE;
1456 })
1457
1458 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1459 ;;
1460 ;; Parallel single-precision floating point conversion operations
1461 ;;
1462 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1463
1464 (define_expand "fix_truncv2sfv2si2"
1465 [(set (match_operand:V2SI 0 "register_operand")
1466 (fix:V2SI (match_operand:V2SF 1 "nonimmediate_operand")))]
1467 "TARGET_MMX_WITH_SSE && ix86_partial_vec_fp_math"
1468 {
1469 rtx op1 = gen_reg_rtx (V4SFmode);
1470 rtx op0 = gen_reg_rtx (V4SImode);
1471
1472 emit_insn (gen_movq_v2sf_to_sse (op1, operands[1]));
1473
1474 emit_insn (gen_fix_truncv4sfv4si2 (op0, op1));
1475
1476 emit_move_insn (operands[0], lowpart_subreg (V2SImode, op0, V4SImode));
1477 DONE;
1478 })
1479
1480 (define_expand "fixuns_truncv2sfv2si2"
1481 [(set (match_operand:V2SI 0 "register_operand")
1482 (unsigned_fix:V2SI (match_operand:V2SF 1 "nonimmediate_operand")))]
1483 "TARGET_AVX512VL && TARGET_MMX_WITH_SSE && ix86_partial_vec_fp_math"
1484 {
1485 rtx op1 = gen_reg_rtx (V4SFmode);
1486 rtx op0 = gen_reg_rtx (V4SImode);
1487
1488 emit_insn (gen_movq_v2sf_to_sse (op1, operands[1]));
1489
1490 emit_insn (gen_fixuns_truncv4sfv4si2 (op0, op1));
1491
1492 emit_move_insn (operands[0], lowpart_subreg (V2SImode, op0, V4SImode));
1493 DONE;
1494 })
1495
1496 (define_insn "mmx_fix_truncv2sfv2si2"
1497 [(set (match_operand:V2SI 0 "register_operand" "=y")
1498 (fix:V2SI (match_operand:V2SF 1 "nonimmediate_operand" "ym")))]
1499 "TARGET_3DNOW"
1500 "pf2id\t{%1, %0|%0, %1}"
1501 [(set_attr "type" "mmxcvt")
1502 (set_attr "prefix_extra" "1")
1503 (set_attr "mode" "V2SF")])
1504
1505 (define_expand "floatv2siv2sf2"
1506 [(set (match_operand:V2SF 0 "register_operand")
1507 (float:V2SF (match_operand:V2SI 1 "nonimmediate_operand")))]
1508 "TARGET_MMX_WITH_SSE && ix86_partial_vec_fp_math"
1509 {
1510 rtx op1 = gen_reg_rtx (V4SImode);
1511 rtx op0 = gen_reg_rtx (V4SFmode);
1512
1513 emit_insn (gen_movq_v2si_to_sse (op1, operands[1]));
1514
1515 emit_insn (gen_floatv4siv4sf2 (op0, op1));
1516
1517 emit_move_insn (operands[0], lowpart_subreg (V2SFmode, op0, V4SFmode));
1518 DONE;
1519 })
1520
1521 (define_expand "floatunsv2siv2sf2"
1522 [(set (match_operand:V2SF 0 "register_operand")
1523 (unsigned_float:V2SF (match_operand:V2SI 1 "nonimmediate_operand")))]
1524 "TARGET_AVX512VL && TARGET_MMX_WITH_SSE && ix86_partial_vec_fp_math"
1525 {
1526 rtx op1 = gen_reg_rtx (V4SImode);
1527 rtx op0 = gen_reg_rtx (V4SFmode);
1528
1529 emit_insn (gen_movq_v2si_to_sse (op1, operands[1]));
1530
1531 emit_insn (gen_floatunsv4siv4sf2 (op0, op1));
1532
1533 emit_move_insn (operands[0], lowpart_subreg (V2SFmode, op0, V4SFmode));
1534 DONE;
1535 })
1536
1537 (define_insn "mmx_floatv2siv2sf2"
1538 [(set (match_operand:V2SF 0 "register_operand" "=y")
1539 (float:V2SF (match_operand:V2SI 1 "nonimmediate_operand" "ym")))]
1540 "TARGET_3DNOW"
1541 "pi2fd\t{%1, %0|%0, %1}"
1542 [(set_attr "type" "mmxcvt")
1543 (set_attr "prefix_extra" "1")
1544 (set_attr "mode" "V2SF")])
1545
1546 (define_insn "mmx_pf2iw"
1547 [(set (match_operand:V2SI 0 "register_operand" "=y")
1548 (sign_extend:V2SI
1549 (ss_truncate:V2HI
1550 (fix:V2SI
1551 (match_operand:V2SF 1 "nonimmediate_operand" "ym")))))]
1552 "TARGET_3DNOW_A"
1553 "pf2iw\t{%1, %0|%0, %1}"
1554 [(set_attr "type" "mmxcvt")
1555 (set_attr "prefix_extra" "1")
1556 (set_attr "mode" "V2SF")])
1557
1558 (define_insn "mmx_pi2fw"
1559 [(set (match_operand:V2SF 0 "register_operand" "=y")
1560 (float:V2SF
1561 (sign_extend:V2SI
1562 (truncate:V2HI
1563 (match_operand:V2SI 1 "nonimmediate_operand" "ym")))))]
1564 "TARGET_3DNOW_A"
1565 "pi2fw\t{%1, %0|%0, %1}"
1566 [(set_attr "type" "mmxcvt")
1567 (set_attr "prefix_extra" "1")
1568 (set_attr "mode" "V2SF")])
1569
1570 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1571 ;;
1572 ;; Parallel single-precision floating point element swizzling
1573 ;;
1574 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1575
1576 (define_insn "mmx_pswapdv2sf2"
1577 [(set (match_operand:V2SF 0 "register_operand" "=y,x,Yv")
1578 (vec_select:V2SF
1579 (match_operand:V2SF 1 "register_mmxmem_operand" "ym,0,Yv")
1580 (parallel [(const_int 1) (const_int 0)])))]
1581 "TARGET_3DNOW_A || TARGET_MMX_WITH_SSE"
1582 "@
1583 pswapd\t{%1, %0|%0, %1}
1584 shufps\t{$0xe1, %1, %0|%0, %1, 0xe1}
1585 vshufps\t{$0xe1, %1, %1, %0|%0, %1, %1, 0xe1}"
1586 [(set_attr "isa" "*,sse_noavx,avx")
1587 (set_attr "mmx_isa" "native,*,*")
1588 (set_attr "type" "mmxcvt,ssemov,ssemov")
1589 (set_attr "prefix_extra" "1,*,*")
1590 (set_attr "mode" "V2SF,V4SF,V4SF")])
1591
1592 (define_insn "*mmx_movshdup"
1593 [(set (match_operand:V2SF 0 "register_operand" "=v,x")
1594 (vec_select:V2SF
1595 (match_operand:V2SF 1 "register_operand" "v,0")
1596 (parallel [(const_int 1) (const_int 1)])))]
1597 "TARGET_MMX_WITH_SSE"
1598 "@
1599 %vmovshdup\t{%1, %0|%0, %1}
1600 shufps\t{$0xe5, %0, %0|%0, %0, 0xe5}"
1601 [(set_attr "isa" "sse3,*")
1602 (set_attr "type" "sse,sseshuf1")
1603 (set_attr "length_immediate" "*,1")
1604 (set_attr "prefix_rep" "1,*")
1605 (set_attr "prefix" "maybe_vex,orig")
1606 (set_attr "mode" "V4SF")])
1607
1608 (define_insn "*mmx_movsldup"
1609 [(set (match_operand:V2SF 0 "register_operand" "=v,x")
1610 (vec_select:V2SF
1611 (match_operand:V2SF 1 "register_operand" "v,0")
1612 (parallel [(const_int 0) (const_int 0)])))]
1613 "TARGET_MMX_WITH_SSE"
1614 "@
1615 %vmovsldup\t{%1, %0|%0, %1}
1616 shufps\t{$0xe0, %0, %0|%0, %0, 0xe0}"
1617 [(set_attr "isa" "sse3,*")
1618 (set_attr "type" "sse,sseshuf1")
1619 (set_attr "length_immediate" "*,1")
1620 (set_attr "prefix_rep" "1,*")
1621 (set_attr "prefix" "maybe_vex,orig")
1622 (set_attr "mode" "V4SF")])
1623
1624 (define_insn_and_split "*vec_interleave_lowv2sf"
1625 [(set (match_operand:V2SF 0 "register_operand" "=x,v")
1626 (vec_select:V2SF
1627 (vec_concat:V4SF
1628 (match_operand:V2SF 1 "register_operand" "0,v")
1629 (match_operand:V2SF 2 "register_operand" "x,v"))
1630 (parallel [(const_int 0) (const_int 2)])))]
1631 "TARGET_MMX_WITH_SSE"
1632 "#"
1633 "&& reload_completed"
1634 [(const_int 0)]
1635 "ix86_split_mmx_punpck (operands, false); DONE;"
1636 [(set_attr "isa" "noavx,avx")
1637 (set_attr "type" "sselog")
1638 (set_attr "prefix" "orig,maybe_evex")
1639 (set_attr "mode" "V4SF")])
1640
1641 (define_insn_and_split "*vec_interleave_highv2sf"
1642 [(set (match_operand:V2SF 0 "register_operand" "=x,v")
1643 (vec_select:V2SF
1644 (vec_concat:V4SF
1645 (match_operand:V2SF 1 "register_operand" "0,v")
1646 (match_operand:V2SF 2 "register_operand" "x,v"))
1647 (parallel [(const_int 1) (const_int 3)])))]
1648 "TARGET_MMX_WITH_SSE"
1649 "#"
1650 "&& reload_completed"
1651 [(const_int 0)]
1652 "ix86_split_mmx_punpck (operands, true); DONE;"
1653 [(set_attr "isa" "noavx,avx")
1654 (set_attr "type" "sselog")
1655 (set_attr "prefix" "orig,vex")
1656 (set_attr "mode" "V4SF")])
1657
1658 (define_insn "*vec_dupv2sf"
1659 [(set (match_operand:V2SF 0 "register_operand" "=y,Yv,x")
1660 (vec_duplicate:V2SF
1661 (match_operand:SF 1 "register_operand" "0,Yv,0")))]
1662 "TARGET_MMX || TARGET_MMX_WITH_SSE"
1663 "@
1664 punpckldq\t%0, %0
1665 %vmovsldup\t{%1, %0|%0, %1}
1666 shufps\t{$0xe0, %0, %0|%0, %0, 0xe0}"
1667 [(set_attr "isa" "*,sse3,sse_noavx")
1668 (set_attr "mmx_isa" "native,*,*")
1669 (set_attr "type" "mmxcvt,sse,sseshuf1")
1670 (set_attr "length_immediate" "*,*,1")
1671 (set_attr "prefix_rep" "*,1,*")
1672 (set_attr "prefix" "*,maybe_vex,orig")
1673 (set_attr "mode" "DI,V4SF,V4SF")])
1674
1675 (define_insn "*mmx_movss_<mode>"
1676 [(set (match_operand:V2FI 0 "register_operand" "=x,v")
1677 (vec_merge:V2FI
1678 (match_operand:V2FI 2 "register_operand" " x,v")
1679 (match_operand:V2FI 1 "register_operand" " 0,v")
1680 (const_int 1)))]
1681 "TARGET_MMX_WITH_SSE"
1682 "@
1683 movss\t{%2, %0|%0, %2}
1684 vmovss\t{%2, %1, %0|%0, %1, %2}"
1685 [(set_attr "isa" "noavx,avx")
1686 (set_attr "type" "ssemov")
1687 (set_attr "prefix" "orig,maybe_evex")
1688 (set_attr "mode" "SF")])
1689
1690 (define_insn "*mmx_concatv2sf"
1691 [(set (match_operand:V2SF 0 "register_operand" "=y,y")
1692 (vec_concat:V2SF
1693 (match_operand:SF 1 "nonimmediate_operand" " 0,rm")
1694 (match_operand:SF 2 "nonimm_or_0_operand" "ym,C")))]
1695 "TARGET_MMX && !TARGET_SSE"
1696 "@
1697 punpckldq\t{%2, %0|%0, %2}
1698 movd\t{%1, %0|%0, %1}"
1699 [(set_attr "type" "mmxcvt,mmxmov")
1700 (set_attr "mode" "DI")])
1701
1702 (define_expand "vec_setv2sf"
1703 [(match_operand:V2SF 0 "register_operand")
1704 (match_operand:SF 1 "register_operand")
1705 (match_operand 2 "vec_setm_mmx_operand")]
1706 "TARGET_MMX || TARGET_MMX_WITH_SSE"
1707 {
1708 if (CONST_INT_P (operands[2]))
1709 ix86_expand_vector_set (TARGET_MMX_WITH_SSE, operands[0], operands[1],
1710 INTVAL (operands[2]));
1711 else
1712 ix86_expand_vector_set_var (operands[0], operands[1], operands[2]);
1713 DONE;
1714 })
1715
1716 ;; Avoid combining registers from different units in a single alternative,
1717 ;; see comment above inline_secondary_memory_needed function in i386.cc
1718 (define_insn_and_split "*vec_extractv2sf_0"
1719 [(set (match_operand:SF 0 "nonimmediate_operand" "=x, m,y ,m,f,r")
1720 (vec_select:SF
1721 (match_operand:V2SF 1 "nonimmediate_operand" " xm,x,ym,y,m,m")
1722 (parallel [(const_int 0)])))]
1723 "(TARGET_MMX || TARGET_MMX_WITH_SSE)
1724 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
1725 "#"
1726 "&& reload_completed"
1727 [(set (match_dup 0) (match_dup 1))]
1728 "operands[1] = gen_lowpart (SFmode, operands[1]);"
1729 [(set_attr "mmx_isa" "*,*,native,native,*,*")])
1730
1731 ;; Avoid combining registers from different units in a single alternative,
1732 ;; see comment above inline_secondary_memory_needed function in i386.cc
1733 (define_insn "*vec_extractv2sf_1"
1734 [(set (match_operand:SF 0 "nonimmediate_operand" "=y,x,x,y,x,f,r")
1735 (vec_select:SF
1736 (match_operand:V2SF 1 "nonimmediate_operand" " 0,x,0,o,o,o,o")
1737 (parallel [(const_int 1)])))]
1738 "(TARGET_MMX || TARGET_MMX_WITH_SSE)
1739 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
1740 "@
1741 punpckhdq\t%0, %0
1742 %vmovshdup\t{%1, %0|%0, %1}
1743 shufps\t{$0xe5, %0, %0|%0, %0, 0xe5}
1744 #
1745 #
1746 #
1747 #"
1748 [(set_attr "isa" "*,sse3,noavx,*,*,*,*")
1749 (set_attr "mmx_isa" "native,*,*,native,*,*,*")
1750 (set_attr "type" "mmxcvt,sse,sseshuf1,mmxmov,ssemov,fmov,imov")
1751 (set (attr "length_immediate")
1752 (if_then_else (eq_attr "alternative" "2")
1753 (const_string "1")
1754 (const_string "*")))
1755 (set (attr "prefix_rep")
1756 (if_then_else (eq_attr "alternative" "1")
1757 (const_string "1")
1758 (const_string "*")))
1759 (set_attr "prefix" "orig,maybe_vex,orig,orig,orig,orig,orig")
1760 (set_attr "mode" "DI,V4SF,V4SF,SF,SF,SF,SF")])
1761
1762 (define_split
1763 [(set (match_operand:SF 0 "register_operand")
1764 (vec_select:SF
1765 (match_operand:V2SF 1 "memory_operand")
1766 (parallel [(const_int 1)])))]
1767 "(TARGET_MMX || TARGET_MMX_WITH_SSE) && reload_completed"
1768 [(set (match_dup 0) (match_dup 1))]
1769 "operands[1] = adjust_address (operands[1], SFmode, 4);")
1770
1771 (define_expand "vec_extractv2sfsf"
1772 [(match_operand:SF 0 "register_operand")
1773 (match_operand:V2SF 1 "register_operand")
1774 (match_operand 2 "const_int_operand")]
1775 "TARGET_MMX || TARGET_MMX_WITH_SSE"
1776 {
1777 ix86_expand_vector_extract (TARGET_MMX_WITH_SSE, operands[0],
1778 operands[1], INTVAL (operands[2]));
1779 DONE;
1780 })
1781
1782 (define_expand "vec_initv2sfsf"
1783 [(match_operand:V2SF 0 "register_operand")
1784 (match_operand 1)]
1785 "(TARGET_MMX || TARGET_MMX_WITH_SSE) && TARGET_SSE"
1786 {
1787 ix86_expand_vector_init (TARGET_MMX_WITH_SSE, operands[0],
1788 operands[1]);
1789 DONE;
1790 })
1791
1792 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1793 ;;
1794 ;; Parallel single-precision floating point rounding operations.
1795 ;;
1796 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1797
1798 (define_expand "nearbyintv2sf2"
1799 [(match_operand:V2SF 0 "register_operand")
1800 (match_operand:V2SF 1 "nonimmediate_operand")]
1801 "TARGET_SSE4_1 && TARGET_MMX_WITH_SSE && ix86_partial_vec_fp_math"
1802 {
1803 rtx op1 = gen_reg_rtx (V4SFmode);
1804 rtx op0 = gen_reg_rtx (V4SFmode);
1805
1806 emit_insn (gen_movq_v2sf_to_sse (op1, operands[1]));
1807
1808 emit_insn (gen_nearbyintv4sf2 (op0, op1));
1809
1810 emit_move_insn (operands[0], lowpart_subreg (V2SFmode, op0, V4SFmode));
1811 DONE;
1812 })
1813
1814 (define_expand "rintv2sf2"
1815 [(match_operand:V2SF 0 "register_operand")
1816 (match_operand:V2SF 1 "nonimmediate_operand")]
1817 "TARGET_SSE4_1 && TARGET_MMX_WITH_SSE && ix86_partial_vec_fp_math"
1818 {
1819 rtx op1 = gen_reg_rtx (V4SFmode);
1820 rtx op0 = gen_reg_rtx (V4SFmode);
1821
1822 emit_insn (gen_movq_v2sf_to_sse (op1, operands[1]));
1823
1824 emit_insn (gen_rintv4sf2 (op0, op1));
1825
1826 emit_move_insn (operands[0], lowpart_subreg (V2SFmode, op0, V4SFmode));
1827 DONE;
1828 })
1829
1830 (define_expand "lrintv2sfv2si2"
1831 [(match_operand:V2SI 0 "register_operand")
1832 (match_operand:V2SF 1 "nonimmediate_operand")]
1833 "TARGET_SSE4_1 && !flag_trapping_math
1834 && TARGET_MMX_WITH_SSE && ix86_partial_vec_fp_math"
1835 {
1836 rtx op1 = gen_reg_rtx (V4SFmode);
1837 rtx op0 = gen_reg_rtx (V4SImode);
1838
1839 emit_insn (gen_movq_v2sf_to_sse (op1, operands[1]));
1840
1841 emit_insn (gen_lrintv4sfv4si2 (op0, op1));
1842
1843 emit_move_insn (operands[0], lowpart_subreg (V2SImode, op0, V4SImode));
1844 DONE;
1845 })
1846
1847 (define_expand "ceilv2sf2"
1848 [(match_operand:V2SF 0 "register_operand")
1849 (match_operand:V2SF 1 "nonimmediate_operand")]
1850 "TARGET_SSE4_1 && !flag_trapping_math
1851 && TARGET_MMX_WITH_SSE && ix86_partial_vec_fp_math"
1852 {
1853 rtx op1 = gen_reg_rtx (V4SFmode);
1854 rtx op0 = gen_reg_rtx (V4SFmode);
1855
1856 emit_insn (gen_movq_v2sf_to_sse (op1, operands[1]));
1857
1858 emit_insn (gen_ceilv4sf2 (op0, op1));
1859
1860 emit_move_insn (operands[0], lowpart_subreg (V2SFmode, op0, V4SFmode));
1861 DONE;
1862 })
1863
1864 (define_expand "lceilv2sfv2si2"
1865 [(match_operand:V2SI 0 "register_operand")
1866 (match_operand:V2SF 1 "nonimmediate_operand")]
1867 "TARGET_SSE4_1 && !flag_trapping_math
1868 && TARGET_MMX_WITH_SSE && ix86_partial_vec_fp_math"
1869 {
1870 rtx op1 = gen_reg_rtx (V4SFmode);
1871 rtx op0 = gen_reg_rtx (V4SImode);
1872
1873 emit_insn (gen_movq_v2sf_to_sse (op1, operands[1]));
1874
1875 emit_insn (gen_lceilv4sfv4si2 (op0, op1));
1876
1877 emit_move_insn (operands[0], lowpart_subreg (V2SImode, op0, V4SImode));
1878 DONE;
1879 })
1880
1881 (define_expand "floorv2sf2"
1882 [(match_operand:V2SF 0 "register_operand")
1883 (match_operand:V2SF 1 "nonimmediate_operand")]
1884 "TARGET_SSE4_1 && !flag_trapping_math
1885 && TARGET_MMX_WITH_SSE && ix86_partial_vec_fp_math"
1886 {
1887 rtx op1 = gen_reg_rtx (V4SFmode);
1888 rtx op0 = gen_reg_rtx (V4SFmode);
1889
1890 emit_insn (gen_movq_v2sf_to_sse (op1, operands[1]));
1891
1892 emit_insn (gen_floorv4sf2 (op0, op1));
1893
1894 emit_move_insn (operands[0], lowpart_subreg (V2SFmode, op0, V4SFmode));
1895 DONE;
1896 })
1897
1898 (define_expand "lfloorv2sfv2si2"
1899 [(match_operand:V2SI 0 "register_operand")
1900 (match_operand:V2SF 1 "nonimmediate_operand")]
1901 "TARGET_SSE4_1 && !flag_trapping_math
1902 && TARGET_MMX_WITH_SSE && ix86_partial_vec_fp_math"
1903 {
1904 rtx op1 = gen_reg_rtx (V4SFmode);
1905 rtx op0 = gen_reg_rtx (V4SImode);
1906
1907 emit_insn (gen_movq_v2sf_to_sse (op1, operands[1]));
1908
1909 emit_insn (gen_lfloorv4sfv4si2 (op0, op1));
1910
1911 emit_move_insn (operands[0], lowpart_subreg (V2SImode, op0, V4SImode));
1912 DONE;
1913 })
1914
1915 (define_expand "btruncv2sf2"
1916 [(match_operand:V2SF 0 "register_operand")
1917 (match_operand:V2SF 1 "nonimmediate_operand")]
1918 "TARGET_SSE4_1 && !flag_trapping_math
1919 && TARGET_MMX_WITH_SSE && ix86_partial_vec_fp_math"
1920 {
1921 rtx op1 = gen_reg_rtx (V4SFmode);
1922 rtx op0 = gen_reg_rtx (V4SFmode);
1923
1924 emit_insn (gen_movq_v2sf_to_sse (op1, operands[1]));
1925
1926 emit_insn (gen_btruncv4sf2 (op0, op1));
1927
1928 emit_move_insn (operands[0], lowpart_subreg (V2SFmode, op0, V4SFmode));
1929 DONE;
1930 })
1931
1932 (define_expand "roundv2sf2"
1933 [(match_operand:V2SF 0 "register_operand")
1934 (match_operand:V2SF 1 "nonimmediate_operand")]
1935 "TARGET_SSE4_1 && !flag_trapping_math
1936 && TARGET_MMX_WITH_SSE && ix86_partial_vec_fp_math"
1937 {
1938 rtx op1 = gen_reg_rtx (V4SFmode);
1939 rtx op0 = gen_reg_rtx (V4SFmode);
1940
1941 emit_insn (gen_movq_v2sf_to_sse (op1, operands[1]));
1942
1943 emit_insn (gen_roundv4sf2 (op0, op1));
1944
1945 emit_move_insn (operands[0], lowpart_subreg (V2SFmode, op0, V4SFmode));
1946 DONE;
1947 })
1948
1949 (define_expand "lroundv2sfv2si2"
1950 [(match_operand:V2SI 0 "register_operand")
1951 (match_operand:V2SF 1 "nonimmediate_operand")]
1952 "TARGET_SSE4_1 && !flag_trapping_math
1953 && TARGET_MMX_WITH_SSE && ix86_partial_vec_fp_math"
1954 {
1955 rtx op1 = gen_reg_rtx (V4SFmode);
1956 rtx op0 = gen_reg_rtx (V4SImode);
1957
1958 emit_insn (gen_movq_v2sf_to_sse (op1, operands[1]));
1959
1960 emit_insn (gen_lroundv4sfv4si2 (op0, op1));
1961
1962 emit_move_insn (operands[0], lowpart_subreg (V2SImode, op0, V4SImode));
1963 DONE;
1964 })
1965
1966 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1967 ;;
1968 ;; Parallel half-precision floating point arithmetic
1969 ;;
1970 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1971
1972 (define_mode_iterator VHF_32_64 [V2HF (V4HF "TARGET_MMX_WITH_SSE")])
1973
1974 (define_expand "divv4hf3"
1975 [(set (match_operand:V4HF 0 "register_operand")
1976 (div:V4HF
1977 (match_operand:V4HF 1 "nonimmediate_operand")
1978 (match_operand:V4HF 2 "register_operand")))]
1979 "TARGET_AVX512FP16 && TARGET_AVX512VL && ix86_partial_vec_fp_math"
1980 {
1981 rtx op2 = gen_reg_rtx (V8HFmode);
1982 rtx op1 = gen_reg_rtx (V8HFmode);
1983 rtx op0 = gen_reg_rtx (V8HFmode);
1984
1985 emit_insn (gen_movq_v4hf_to_sse (op1, operands[1]));
1986 rtx tmp = gen_rtx_VEC_CONCAT (V8HFmode, operands[2],
1987 force_reg (V4HFmode, CONST1_RTX (V4HFmode)));
1988 emit_insn (gen_rtx_SET (op2, tmp));
1989 emit_insn (gen_divv8hf3 (op0, op1, op2));
1990 emit_move_insn (operands[0], lowpart_subreg (V4HFmode, op0, V8HFmode));
1991 DONE;
1992 })
1993
1994 (define_mode_attr mov_to_sse_suffix
1995 [(V2HF "d") (V4HF "q") (V2HI "d") (V4HI "q")])
1996
1997 (define_mode_attr mmxxmmmode
1998 [(V2HF "V8HF") (V2HI "V8HI") (V2BF "V8BF")
1999 (V4HF "V8HF") (V4HI "V8HI") (V4BF "V8BF")])
2000
2001 (define_mode_attr mmxxmmmodelower
2002 [(V2HF "v8hf") (V2HI "v8hi") (V2BF "v8bf")
2003 (V4HF "v8hf") (V4HI "v8hi") (V4BF "v8bf")])
2004
2005 (define_expand "movd_<mode>_to_sse"
2006 [(set (match_operand:<mmxxmmmode> 0 "register_operand")
2007 (vec_merge:<mmxxmmmode>
2008 (vec_duplicate:<mmxxmmmode>
2009 (match_operand:V2FI_32 1 "nonimmediate_operand"))
2010 (match_dup 2)
2011 (const_int 3)))]
2012 "TARGET_SSE"
2013 {
2014 if (!flag_trapping_math)
2015 {
2016 rtx op1 = force_reg (<MODE>mode, operands[1]);
2017 emit_move_insn (operands[0],
2018 lowpart_subreg (<mmxxmmmode>mode, op1, <MODE>mode));
2019 DONE;
2020 }
2021 operands[2] = CONST0_RTX (<mmxxmmmode>mode);
2022 })
2023
2024 (define_expand "movd_<mode>_to_sse_reg"
2025 [(set (match_operand:<mmxxmmmode> 0 "register_operand")
2026 (vec_merge:<mmxxmmmode>
2027 (vec_duplicate:<mmxxmmmode>
2028 (match_operand:V2FI_32 1 "nonimmediate_operand"))
2029 (match_operand:<mmxxmmmode> 2 "register_operand")
2030 (const_int 3)))]
2031 "TARGET_SSE")
2032
2033 (define_expand "<insn><mode>3"
2034 [(set (match_operand:VHF_32_64 0 "register_operand")
2035 (plusminusmult:VHF_32_64
2036 (match_operand:VHF_32_64 1 "nonimmediate_operand")
2037 (match_operand:VHF_32_64 2 "nonimmediate_operand")))]
2038 "TARGET_AVX512FP16 && TARGET_AVX512VL && ix86_partial_vec_fp_math"
2039 {
2040 rtx op2 = gen_reg_rtx (V8HFmode);
2041 rtx op1 = gen_reg_rtx (V8HFmode);
2042 rtx op0 = gen_reg_rtx (V8HFmode);
2043
2044 emit_insn (gen_mov<mov_to_sse_suffix>_<mode>_to_sse (op2, operands[2]));
2045 emit_insn (gen_mov<mov_to_sse_suffix>_<mode>_to_sse (op1, operands[1]));
2046 emit_insn (gen_<insn>v8hf3 (op0, op1, op2));
2047
2048 emit_move_insn (operands[0], lowpart_subreg (<MODE>mode, op0, V8HFmode));
2049 DONE;
2050 })
2051
2052 (define_expand "divv2hf3"
2053 [(set (match_operand:V2HF 0 "register_operand")
2054 (div:V2HF
2055 (match_operand:V2HF 1 "nonimmediate_operand")
2056 (match_operand:V2HF 2 "nonimmediate_operand")))]
2057 "TARGET_AVX512FP16 && TARGET_AVX512VL && ix86_partial_vec_fp_math"
2058 {
2059 rtx op2 = gen_reg_rtx (V8HFmode);
2060 rtx op1 = gen_reg_rtx (V8HFmode);
2061 rtx op0 = gen_reg_rtx (V8HFmode);
2062
2063 emit_insn (gen_movd_v2hf_to_sse_reg (op2, operands[2],
2064 force_reg (V8HFmode, CONST1_RTX (V8HFmode))));
2065 emit_insn (gen_movd_v2hf_to_sse (op1, operands[1]));
2066 emit_insn (gen_divv8hf3 (op0, op1, op2));
2067
2068 emit_move_insn (operands[0], lowpart_subreg (V2HFmode, op0, V8HFmode));
2069 DONE;
2070 })
2071
2072 (define_expand "<code><mode>3"
2073 [(set (match_operand:VHF_32_64 0 "register_operand")
2074 (smaxmin:VHF_32_64
2075 (match_operand:VHF_32_64 1 "nonimmediate_operand")
2076 (match_operand:VHF_32_64 2 "nonimmediate_operand")))]
2077 "TARGET_AVX512FP16 && TARGET_AVX512VL && ix86_partial_vec_fp_math"
2078 {
2079 rtx op2 = gen_reg_rtx (V8HFmode);
2080 rtx op1 = gen_reg_rtx (V8HFmode);
2081 rtx op0 = gen_reg_rtx (V8HFmode);
2082
2083 emit_insn (gen_mov<mov_to_sse_suffix>_<mode>_to_sse (op2, operands[2]));
2084 emit_insn (gen_mov<mov_to_sse_suffix>_<mode>_to_sse (op1, operands[1]));
2085
2086 emit_insn (gen_<code>v8hf3 (op0, op1, op2));
2087
2088 emit_move_insn (operands[0], lowpart_subreg (<MODE>mode, op0, V8HFmode));
2089 DONE;
2090 })
2091
2092 (define_expand "sqrt<mode>2"
2093 [(set (match_operand:VHF_32_64 0 "register_operand")
2094 (sqrt:VHF_32_64
2095 (match_operand:VHF_32_64 1 "nonimmediate_operand")))]
2096 "TARGET_AVX512FP16 && TARGET_AVX512VL && ix86_partial_vec_fp_math"
2097 {
2098 rtx op1 = gen_reg_rtx (V8HFmode);
2099 rtx op0 = gen_reg_rtx (V8HFmode);
2100
2101 emit_insn (gen_mov<mov_to_sse_suffix>_<mode>_to_sse (op1, operands[1]));
2102 emit_insn (gen_sqrtv8hf2 (op0, op1));
2103 emit_move_insn (operands[0], lowpart_subreg (<MODE>mode, op0, V8HFmode));
2104 DONE;
2105 })
2106
2107 (define_expand "<code><mode>2"
2108 [(set (match_operand:VHF_32_64 0 "register_operand")
2109 (absneg:VHF_32_64
2110 (match_operand:VHF_32_64 1 "register_operand")))]
2111 "TARGET_SSE"
2112 "ix86_expand_fp_absneg_operator (<CODE>, <MODE>mode, operands); DONE;")
2113
2114 (define_insn_and_split "*mmx_<code><mode>"
2115 [(set (match_operand:VHF_32_64 0 "register_operand" "=x,x,x")
2116 (absneg:VHF_32_64
2117 (match_operand:VHF_32_64 1 "register_operand" "0,x,x")))
2118 (use (match_operand:VHF_32_64 2 "register_operand" "x,0,x"))]
2119 "TARGET_SSE"
2120 "#"
2121 "&& reload_completed"
2122 [(set (match_dup 0)
2123 (<absneg_op>:<MODE> (match_dup 1) (match_dup 2)))]
2124 {
2125 if (!TARGET_AVX && operands_match_p (operands[0], operands[2]))
2126 std::swap (operands[1], operands[2]);
2127 }
2128 [(set_attr "isa" "noavx,noavx,avx")])
2129
2130 (define_insn_and_split "*mmx_nabs<mode>2"
2131 [(set (match_operand:VHF_32_64 0 "register_operand" "=x,x,x")
2132 (neg:VHF_32_64
2133 (abs:VHF_32_64
2134 (match_operand:VHF_32_64 1 "register_operand" "0,x,x"))))
2135 (use (match_operand:VHF_32_64 2 "register_operand" "x,0,x"))]
2136 "TARGET_SSE"
2137 "#"
2138 "&& reload_completed"
2139 [(set (match_dup 0)
2140 (ior:<MODE> (match_dup 1) (match_dup 2)))])
2141
2142 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2143 ;;
2144 ;; Parallel half-precision floating point comparisons
2145 ;;
2146 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2147
2148 (define_expand "vec_cmpv4hfqi"
2149 [(set (match_operand:QI 0 "register_operand")
2150 (match_operator:QI 1 ""
2151 [(match_operand:V4HF 2 "nonimmediate_operand")
2152 (match_operand:V4HF 3 "nonimmediate_operand")]))]
2153 "TARGET_MMX_WITH_SSE && TARGET_AVX512FP16 && TARGET_AVX512VL
2154 && ix86_partial_vec_fp_math"
2155 {
2156 rtx ops[4];
2157 ops[3] = gen_reg_rtx (V8HFmode);
2158 ops[2] = gen_reg_rtx (V8HFmode);
2159
2160 emit_insn (gen_movq_v4hf_to_sse (ops[3], operands[3]));
2161 emit_insn (gen_movq_v4hf_to_sse (ops[2], operands[2]));
2162 emit_insn (gen_vec_cmpv8hfqi (operands[0], operands[1], ops[2], ops[3]));
2163 DONE;
2164 })
2165
2166 (define_expand "vcond_mask_<mode>v4hi"
2167 [(set (match_operand:V4F_64 0 "register_operand")
2168 (vec_merge:V4F_64
2169 (match_operand:V4F_64 1 "register_operand")
2170 (match_operand:V4F_64 2 "register_operand")
2171 (match_operand:V4HI 3 "register_operand")))]
2172 "TARGET_MMX_WITH_SSE && TARGET_SSE4_1"
2173 {
2174 ix86_expand_sse_movcc (operands[0], operands[3],
2175 operands[1], operands[2]);
2176 DONE;
2177 })
2178
2179 (define_expand "vcond_mask_<mode>qi"
2180 [(set (match_operand:V4FI_64 0 "register_operand")
2181 (vec_merge:V4FI_64
2182 (match_operand:V4FI_64 1 "register_operand")
2183 (match_operand:V4FI_64 2 "register_operand")
2184 (match_operand:QI 3 "register_operand")))]
2185 "TARGET_MMX_WITH_SSE && TARGET_AVX512BW && TARGET_AVX512VL"
2186 {
2187 rtx op0 = gen_reg_rtx (<mmxxmmmode>mode);
2188 operands[1] = lowpart_subreg (<mmxxmmmode>mode, operands[1], <MODE>mode);
2189 operands[2] = lowpart_subreg (<mmxxmmmode>mode, operands[2], <MODE>mode);
2190 emit_insn (gen_vcond_mask_<mmxxmmmodelower>qi (op0, operands[1],
2191 operands[2], operands[3]));
2192 emit_move_insn (operands[0],
2193 lowpart_subreg (<MODE>mode, op0, <mmxxmmmode>mode));
2194 DONE;
2195 })
2196
2197 (define_expand "vec_cmpv2hfqi"
2198 [(set (match_operand:QI 0 "register_operand")
2199 (match_operator:QI 1 ""
2200 [(match_operand:V2HF 2 "nonimmediate_operand")
2201 (match_operand:V2HF 3 "nonimmediate_operand")]))]
2202 "TARGET_AVX512FP16 && TARGET_AVX512VL
2203 && ix86_partial_vec_fp_math"
2204 {
2205 rtx ops[4];
2206 ops[3] = gen_reg_rtx (V8HFmode);
2207 ops[2] = gen_reg_rtx (V8HFmode);
2208
2209 emit_insn (gen_movd_v2hf_to_sse (ops[3], operands[3]));
2210 emit_insn (gen_movd_v2hf_to_sse (ops[2], operands[2]));
2211 emit_insn (gen_vec_cmpv8hfqi (operands[0], operands[1], ops[2], ops[3]));
2212 DONE;
2213 })
2214
2215 (define_expand "vcond_mask_<mode>v2hi"
2216 [(set (match_operand:V2F_32 0 "register_operand")
2217 (vec_merge:V2F_32
2218 (match_operand:V2F_32 1 "register_operand")
2219 (match_operand:V2F_32 2 "register_operand")
2220 (match_operand:V2HI 3 "register_operand")))]
2221 "TARGET_SSE4_1"
2222 {
2223 ix86_expand_sse_movcc (operands[0], operands[3],
2224 operands[1], operands[2]);
2225 DONE;
2226 })
2227
2228 (define_expand "vcond_mask_<mode>qi"
2229 [(set (match_operand:V2FI_32 0 "register_operand")
2230 (vec_merge:V2FI_32
2231 (match_operand:V2FI_32 1 "register_operand")
2232 (match_operand:V2FI_32 2 "register_operand")
2233 (match_operand:QI 3 "register_operand")))]
2234 "TARGET_AVX512BW && TARGET_AVX512VL"
2235 {
2236 rtx op0 = gen_reg_rtx (<mmxxmmmode>mode);
2237 operands[1] = lowpart_subreg (<mmxxmmmode>mode, operands[1], <MODE>mode);
2238 operands[2] = lowpart_subreg (<mmxxmmmode>mode, operands[2], <MODE>mode);
2239 emit_insn (gen_vcond_mask_<mmxxmmmodelower>qi (op0, operands[1],
2240 operands[2], operands[3]));
2241 emit_move_insn (operands[0],
2242 lowpart_subreg (<MODE>mode, op0, <mmxxmmmode>mode));
2243 DONE;
2244 })
2245
2246 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2247 ;;
2248 ;; Parallel half-precision floating point rounding operations.
2249 ;;
2250 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2251
2252 (define_expand "btrunc<mode>2"
2253 [(match_operand:VHF_32_64 0 "register_operand")
2254 (match_operand:VHF_32_64 1 "nonimmediate_operand")]
2255 "TARGET_AVX512FP16 && TARGET_AVX512VL
2256 && ix86_partial_vec_fp_math
2257 && !flag_trapping_math"
2258 {
2259 rtx op1 = gen_reg_rtx (V8HFmode);
2260 rtx op0 = gen_reg_rtx (V8HFmode);
2261
2262 emit_insn (gen_mov<mov_to_sse_suffix>_<mode>_to_sse (op1, operands[1]));
2263 emit_insn (gen_btruncv8hf2 (op0, op1));
2264 emit_move_insn (operands[0], lowpart_subreg (<MODE>mode, op0, V8HFmode));
2265
2266 DONE;
2267 })
2268
2269 (define_expand "nearbyint<mode>2"
2270 [(match_operand:VHF_32_64 0 "register_operand")
2271 (match_operand:VHF_32_64 1 "nonimmediate_operand")]
2272 "TARGET_AVX512FP16 && TARGET_AVX512VL
2273 && ix86_partial_vec_fp_math"
2274 {
2275 rtx op1 = gen_reg_rtx (V8HFmode);
2276 rtx op0 = gen_reg_rtx (V8HFmode);
2277
2278 emit_insn (gen_mov<mov_to_sse_suffix>_<mode>_to_sse (op1, operands[1]));
2279 emit_insn (gen_nearbyintv8hf2 (op0, op1));
2280 emit_move_insn (operands[0], lowpart_subreg (<MODE>mode, op0, V8HFmode));
2281
2282 DONE;
2283 })
2284
2285 (define_expand "rint<mode>2"
2286 [(match_operand:VHF_32_64 0 "register_operand")
2287 (match_operand:VHF_32_64 1 "nonimmediate_operand")]
2288 "TARGET_AVX512FP16 && TARGET_AVX512VL
2289 && ix86_partial_vec_fp_math"
2290 {
2291 rtx op1 = gen_reg_rtx (V8HFmode);
2292 rtx op0 = gen_reg_rtx (V8HFmode);
2293
2294 emit_insn (gen_mov<mov_to_sse_suffix>_<mode>_to_sse (op1, operands[1]));
2295 emit_insn (gen_rintv8hf2 (op0, op1));
2296 emit_move_insn (operands[0], lowpart_subreg (<MODE>mode, op0, V8HFmode));
2297
2298 DONE;
2299 })
2300
2301 (define_expand "lrint<mode><mmxintvecmodelower>2"
2302 [(match_operand:<mmxintvecmode> 0 "register_operand")
2303 (match_operand:VHF_32_64 1 "nonimmediate_operand")]
2304 "TARGET_AVX512FP16 && TARGET_AVX512VL
2305 && ix86_partial_vec_fp_math"
2306 {
2307 rtx op1 = gen_reg_rtx (V8HFmode);
2308 rtx op0 = gen_reg_rtx (V8HFmode);
2309
2310 emit_insn (gen_mov<mov_to_sse_suffix>_<mode>_to_sse (op1, operands[1]));
2311 emit_insn (gen_lrintv8hfv8hi2 (op0, op1));
2312 emit_move_insn (operands[0], lowpart_subreg (<MODE>mode, op0, V8HFmode));
2313
2314 DONE;
2315 })
2316
2317 (define_expand "floor<mode>2"
2318 [(match_operand:VHF_32_64 0 "register_operand")
2319 (match_operand:VHF_32_64 1 "nonimmediate_operand")]
2320 "TARGET_AVX512FP16 && TARGET_AVX512VL
2321 && ix86_partial_vec_fp_math
2322 && !flag_trapping_math"
2323 {
2324 rtx op1 = gen_reg_rtx (V8HFmode);
2325 rtx op0 = gen_reg_rtx (V8HFmode);
2326
2327 emit_insn (gen_mov<mov_to_sse_suffix>_<mode>_to_sse (op1, operands[1]));
2328 emit_insn (gen_floorv8hf2 (op0, op1));
2329 emit_move_insn (operands[0], lowpart_subreg (<MODE>mode, op0, V8HFmode));
2330
2331 DONE;
2332 })
2333
2334 (define_expand "lfloor<mode><mmxintvecmodelower>2"
2335 [(match_operand:<mmxintvecmode> 0 "register_operand")
2336 (match_operand:VHF_32_64 1 "nonimmediate_operand")]
2337 "TARGET_AVX512FP16 && TARGET_AVX512VL
2338 && ix86_partial_vec_fp_math
2339 && !flag_trapping_math"
2340 {
2341 rtx op1 = gen_reg_rtx (V8HFmode);
2342 rtx op0 = gen_reg_rtx (V8HFmode);
2343
2344 emit_insn (gen_mov<mov_to_sse_suffix>_<mode>_to_sse (op1, operands[1]));
2345 emit_insn (gen_lfloorv8hfv8hi2 (op0, op1));
2346 emit_move_insn (operands[0], lowpart_subreg (<MODE>mode, op0, V8HFmode));
2347
2348 DONE;
2349 })
2350
2351 (define_expand "ceil<mode>2"
2352 [(match_operand:VHF_32_64 0 "register_operand")
2353 (match_operand:VHF_32_64 1 "nonimmediate_operand")]
2354 "TARGET_AVX512FP16 && TARGET_AVX512VL
2355 && ix86_partial_vec_fp_math
2356 && !flag_trapping_math"
2357 {
2358 rtx op1 = gen_reg_rtx (V8HFmode);
2359 rtx op0 = gen_reg_rtx (V8HFmode);
2360
2361 emit_insn (gen_mov<mov_to_sse_suffix>_<mode>_to_sse (op1, operands[1]));
2362 emit_insn (gen_ceilv8hf2 (op0, op1));
2363 emit_move_insn (operands[0], lowpart_subreg (<MODE>mode, op0, V8HFmode));
2364
2365 DONE;
2366 })
2367
2368 (define_expand "lceil<mode><mmxintvecmodelower>2"
2369 [(match_operand:<mmxintvecmode> 0 "register_operand")
2370 (match_operand:VHF_32_64 1 "nonimmediate_operand")]
2371 "TARGET_AVX512FP16 && TARGET_AVX512VL
2372 && ix86_partial_vec_fp_math
2373 && !flag_trapping_math"
2374 {
2375 rtx op1 = gen_reg_rtx (V8HFmode);
2376 rtx op0 = gen_reg_rtx (V8HFmode);
2377
2378 emit_insn (gen_mov<mov_to_sse_suffix>_<mode>_to_sse (op1, operands[1]));
2379 emit_insn (gen_lceilv8hfv8hi2 (op0, op1));
2380 emit_move_insn (operands[0], lowpart_subreg (<MODE>mode, op0, V8HFmode));
2381
2382 DONE;
2383 })
2384
2385 (define_expand "round<mode>2"
2386 [(match_operand:VHF_32_64 0 "register_operand")
2387 (match_operand:VHF_32_64 1 "nonimmediate_operand")]
2388 "TARGET_AVX512FP16 && TARGET_AVX512VL
2389 && ix86_partial_vec_fp_math
2390 && !flag_trapping_math"
2391 {
2392 rtx op1 = gen_reg_rtx (V8HFmode);
2393 rtx op0 = gen_reg_rtx (V8HFmode);
2394
2395 emit_insn (gen_mov<mov_to_sse_suffix>_<mode>_to_sse (op1, operands[1]));
2396 emit_insn (gen_roundv8hf2 (op0, op1));
2397 emit_move_insn (operands[0], lowpart_subreg (<MODE>mode, op0, V8HFmode));
2398
2399 DONE;
2400 })
2401
2402 (define_expand "lround<mode><mmxintvecmodelower>2"
2403 [(match_operand:<mmxintvecmode> 0 "register_operand")
2404 (match_operand:VHF_32_64 1 "nonimmediate_operand")]
2405 "TARGET_AVX512FP16 && TARGET_AVX512VL
2406 && ix86_partial_vec_fp_math
2407 && !flag_trapping_math"
2408 {
2409 rtx op1 = gen_reg_rtx (V8HFmode);
2410 rtx op0 = gen_reg_rtx (V8HFmode);
2411
2412 emit_insn (gen_mov<mov_to_sse_suffix>_<mode>_to_sse (op1, operands[1]));
2413 emit_insn (gen_lroundv8hfv8hi2 (op0, op1));
2414 emit_move_insn (operands[0], lowpart_subreg (<MODE>mode, op0, V8HFmode));
2415
2416 DONE;
2417 })
2418
2419 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2420 ;;
2421 ;; Parallel half-precision floating point logical operations
2422 ;;
2423 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2424
2425 (define_insn "*mmx_andnot<mode>3"
2426 [(set (match_operand:VHF_32_64 0 "register_operand" "=x,x")
2427 (and:VHF_32_64
2428 (not:VHF_32_64
2429 (match_operand:VHF_32_64 1 "register_operand" "0,x"))
2430 (match_operand:VHF_32_64 2 "register_operand" "x,x")))]
2431 "TARGET_SSE"
2432 "@
2433 andnps\t{%2, %0|%0, %2}
2434 vandnps\t{%2, %1, %0|%0, %1, %2}"
2435 [(set_attr "isa" "noavx,avx")
2436 (set_attr "type" "sselog")
2437 (set_attr "prefix" "orig,vex")
2438 (set_attr "mode" "V4SF")])
2439
2440 (define_insn "<code><mode>3"
2441 [(set (match_operand:VHF_32_64 0 "register_operand" "=x,x")
2442 (any_logic:VHF_32_64
2443 (match_operand:VHF_32_64 1 "register_operand" "%0,x")
2444 (match_operand:VHF_32_64 2 "register_operand" " x,x")))]
2445 "TARGET_SSE"
2446 "@
2447 <logic>ps\t{%2, %0|%0, %2}
2448 v<logic>ps\t{%2, %1, %0|%0, %1, %2}"
2449 [(set_attr "isa" "noavx,avx")
2450 (set_attr "type" "sselog,sselog")
2451 (set_attr "prefix" "orig,vex")
2452 (set_attr "mode" "V4SF")])
2453
2454 (define_expand "copysign<mode>3"
2455 [(set (match_dup 4)
2456 (and:VHF_32_64
2457 (not:VHF_32_64 (match_dup 3))
2458 (match_operand:VHF_32_64 1 "register_operand")))
2459 (set (match_dup 5)
2460 (and:VHF_32_64 (match_dup 3)
2461 (match_operand:VHF_32_64 2 "register_operand")))
2462 (set (match_operand:VHF_32_64 0 "register_operand")
2463 (ior:VHF_32_64 (match_dup 4) (match_dup 5)))]
2464 "TARGET_SSE"
2465 {
2466 operands[3] = ix86_build_signbit_mask (<MODE>mode, true, false);
2467
2468 operands[4] = gen_reg_rtx (<MODE>mode);
2469 operands[5] = gen_reg_rtx (<MODE>mode);
2470 })
2471
2472 (define_expand "xorsign<mode>3"
2473 [(set (match_dup 4)
2474 (and:VHF_32_64 (match_dup 3)
2475 (match_operand:VHF_32_64 2 "register_operand")))
2476 (set (match_operand:VHF_32_64 0 "register_operand")
2477 (xor:VHF_32_64 (match_dup 4)
2478 (match_operand:VHF_32_64 1 "register_operand")))]
2479 "TARGET_SSE"
2480 {
2481 operands[3] = ix86_build_signbit_mask (<MODE>mode, true, false);
2482
2483 operands[4] = gen_reg_rtx (<MODE>mode);
2484 })
2485
2486 (define_expand "signbit<mode>2"
2487 [(set (match_operand:<mmxintvecmode> 0 "register_operand")
2488 (lshiftrt:<mmxintvecmode>
2489 (subreg:<mmxintvecmode>
2490 (match_operand:VHF_32_64 1 "register_operand") 0)
2491 (match_dup 2)))]
2492 "TARGET_SSE2"
2493 {
2494 operands[1] = force_reg (<MODE>mode, operands[1]);
2495 operands[2] = GEN_INT (GET_MODE_UNIT_BITSIZE (<MODE>mode)-1);
2496 })
2497
2498 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2499 ;;
2500 ;; Parallel half-precision FMA multiply/accumulate instructions.
2501 ;;
2502 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2503
2504 (define_expand "fma<mode>4"
2505 [(set (match_operand:VHF_32_64 0 "register_operand")
2506 (fma:VHF_32_64
2507 (match_operand:VHF_32_64 1 "nonimmediate_operand")
2508 (match_operand:VHF_32_64 2 "nonimmediate_operand")
2509 (match_operand:VHF_32_64 3 "nonimmediate_operand")))]
2510 "TARGET_AVX512FP16 && TARGET_AVX512VL && ix86_partial_vec_fp_math"
2511 {
2512 rtx op3 = gen_reg_rtx (V8HFmode);
2513 rtx op2 = gen_reg_rtx (V8HFmode);
2514 rtx op1 = gen_reg_rtx (V8HFmode);
2515 rtx op0 = gen_reg_rtx (V8HFmode);
2516
2517 emit_insn (gen_mov<mov_to_sse_suffix>_<mode>_to_sse (op3, operands[3]));
2518 emit_insn (gen_mov<mov_to_sse_suffix>_<mode>_to_sse (op2, operands[2]));
2519 emit_insn (gen_mov<mov_to_sse_suffix>_<mode>_to_sse (op1, operands[1]));
2520
2521 emit_insn (gen_fmav8hf4 (op0, op1, op2, op3));
2522
2523 emit_move_insn (operands[0], lowpart_subreg (<MODE>mode, op0, V8HFmode));
2524 DONE;
2525 })
2526
2527 (define_expand "fms<mode>4"
2528 [(set (match_operand:VHF_32_64 0 "register_operand")
2529 (fma:VHF_32_64
2530 (match_operand:VHF_32_64 1 "nonimmediate_operand")
2531 (match_operand:VHF_32_64 2 "nonimmediate_operand")
2532 (neg:VHF_32_64
2533 (match_operand:VHF_32_64 3 "nonimmediate_operand"))))]
2534 "TARGET_AVX512FP16 && TARGET_AVX512VL && ix86_partial_vec_fp_math"
2535 {
2536 rtx op3 = gen_reg_rtx (V8HFmode);
2537 rtx op2 = gen_reg_rtx (V8HFmode);
2538 rtx op1 = gen_reg_rtx (V8HFmode);
2539 rtx op0 = gen_reg_rtx (V8HFmode);
2540
2541 emit_insn (gen_mov<mov_to_sse_suffix>_<mode>_to_sse (op3, operands[3]));
2542 emit_insn (gen_mov<mov_to_sse_suffix>_<mode>_to_sse (op2, operands[2]));
2543 emit_insn (gen_mov<mov_to_sse_suffix>_<mode>_to_sse (op1, operands[1]));
2544
2545 emit_insn (gen_fmsv8hf4 (op0, op1, op2, op3));
2546
2547 emit_move_insn (operands[0], lowpart_subreg (<MODE>mode, op0, V8HFmode));
2548 DONE;
2549 })
2550
2551 (define_expand "fnma<mode>4"
2552 [(set (match_operand:VHF_32_64 0 "register_operand")
2553 (fma:VHF_32_64
2554 (neg:VHF_32_64
2555 (match_operand:VHF_32_64 1 "nonimmediate_operand"))
2556 (match_operand:VHF_32_64 2 "nonimmediate_operand")
2557 (match_operand:VHF_32_64 3 "nonimmediate_operand")))]
2558 "TARGET_AVX512FP16 && TARGET_AVX512VL && ix86_partial_vec_fp_math"
2559 {
2560 rtx op3 = gen_reg_rtx (V8HFmode);
2561 rtx op2 = gen_reg_rtx (V8HFmode);
2562 rtx op1 = gen_reg_rtx (V8HFmode);
2563 rtx op0 = gen_reg_rtx (V8HFmode);
2564
2565 emit_insn (gen_mov<mov_to_sse_suffix>_<mode>_to_sse (op3, operands[3]));
2566 emit_insn (gen_mov<mov_to_sse_suffix>_<mode>_to_sse (op2, operands[2]));
2567 emit_insn (gen_mov<mov_to_sse_suffix>_<mode>_to_sse (op1, operands[1]));
2568
2569 emit_insn (gen_fnmav8hf4 (op0, op1, op2, op3));
2570
2571 emit_move_insn (operands[0], lowpart_subreg (<MODE>mode, op0, V8HFmode));
2572 DONE;
2573 })
2574
2575 (define_expand "fnms<mode>4"
2576 [(set (match_operand:VHF_32_64 0 "register_operand" "=v,v,x")
2577 (fma:VHF_32_64
2578 (neg:VHF_32_64
2579 (match_operand:VHF_32_64 1 "nonimmediate_operand"))
2580 (match_operand:VHF_32_64 2 "nonimmediate_operand")
2581 (neg:VHF_32_64
2582 (match_operand:VHF_32_64 3 "nonimmediate_operand"))))]
2583 "TARGET_AVX512FP16 && TARGET_AVX512VL && ix86_partial_vec_fp_math"
2584 {
2585 rtx op3 = gen_reg_rtx (V8HFmode);
2586 rtx op2 = gen_reg_rtx (V8HFmode);
2587 rtx op1 = gen_reg_rtx (V8HFmode);
2588 rtx op0 = gen_reg_rtx (V8HFmode);
2589
2590 emit_insn (gen_mov<mov_to_sse_suffix>_<mode>_to_sse (op3, operands[3]));
2591 emit_insn (gen_mov<mov_to_sse_suffix>_<mode>_to_sse (op2, operands[2]));
2592 emit_insn (gen_mov<mov_to_sse_suffix>_<mode>_to_sse (op1, operands[1]));
2593
2594 emit_insn (gen_fnmsv8hf4 (op0, op1, op2, op3));
2595
2596 emit_move_insn (operands[0], lowpart_subreg (<MODE>mode, op0, V8HFmode));
2597 DONE;
2598 })
2599
2600 (define_expand "vec_fmaddsubv4hf4"
2601 [(match_operand:V4HF 0 "register_operand")
2602 (match_operand:V4HF 1 "nonimmediate_operand")
2603 (match_operand:V4HF 2 "nonimmediate_operand")
2604 (match_operand:V4HF 3 "nonimmediate_operand")]
2605 "TARGET_AVX512FP16 && TARGET_AVX512VL
2606 && TARGET_MMX_WITH_SSE
2607 && ix86_partial_vec_fp_math"
2608 {
2609 rtx op3 = gen_reg_rtx (V8HFmode);
2610 rtx op2 = gen_reg_rtx (V8HFmode);
2611 rtx op1 = gen_reg_rtx (V8HFmode);
2612 rtx op0 = gen_reg_rtx (V8HFmode);
2613
2614 emit_insn (gen_movq_v4hf_to_sse (op3, operands[3]));
2615 emit_insn (gen_movq_v4hf_to_sse (op2, operands[2]));
2616 emit_insn (gen_movq_v4hf_to_sse (op1, operands[1]));
2617
2618 emit_insn (gen_vec_fmaddsubv8hf4 (op0, op1, op2, op3));
2619
2620 emit_move_insn (operands[0], lowpart_subreg (V4HFmode, op0, V8HFmode));
2621 DONE;
2622 })
2623
2624 (define_expand "vec_fmsubaddv4hf4"
2625 [(match_operand:V4HF 0 "register_operand")
2626 (match_operand:V4HF 1 "nonimmediate_operand")
2627 (match_operand:V4HF 2 "nonimmediate_operand")
2628 (match_operand:V4HF 3 "nonimmediate_operand")]
2629 "TARGET_AVX512FP16 && TARGET_AVX512VL
2630 && ix86_partial_vec_fp_math
2631 && TARGET_MMX_WITH_SSE"
2632 {
2633 rtx op3 = gen_reg_rtx (V8HFmode);
2634 rtx op2 = gen_reg_rtx (V8HFmode);
2635 rtx op1 = gen_reg_rtx (V8HFmode);
2636 rtx op0 = gen_reg_rtx (V8HFmode);
2637
2638 emit_insn (gen_movq_v4hf_to_sse (op3, operands[3]));
2639 emit_insn (gen_movq_v4hf_to_sse (op2, operands[2]));
2640 emit_insn (gen_movq_v4hf_to_sse (op1, operands[1]));
2641
2642 emit_insn (gen_vec_fmsubaddv8hf4 (op0, op1, op2, op3));
2643
2644 emit_move_insn (operands[0], lowpart_subreg (V4HFmode, op0, V8HFmode));
2645 DONE;
2646 })
2647
2648 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2649 ;;
2650 ;; Parallel half-precision floating point complex type operations
2651 ;;
2652 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2653
2654 (define_expand "cmlav4hf4"
2655 [(match_operand:V4HF 0 "register_operand")
2656 (match_operand:V4HF 1 "vector_operand")
2657 (match_operand:V4HF 2 "vector_operand")
2658 (match_operand:V4HF 3 "vector_operand")]
2659 "TARGET_AVX512FP16 && TARGET_AVX512VL"
2660 {
2661 rtx op3 = gen_reg_rtx (V8HFmode);
2662 rtx op2 = gen_reg_rtx (V8HFmode);
2663 rtx op1 = gen_reg_rtx (V8HFmode);
2664 rtx op0 = gen_reg_rtx (V8HFmode);
2665
2666 emit_insn (gen_movq_v4hf_to_sse (op3, operands[3]));
2667 emit_insn (gen_movq_v4hf_to_sse (op2, operands[2]));
2668 emit_insn (gen_movq_v4hf_to_sse (op1, operands[1]));
2669
2670 emit_insn (gen_cmlav8hf4 (op0, op1, op2, op3));
2671
2672 emit_move_insn (operands[0], lowpart_subreg (V4HFmode, op0, V8HFmode));
2673 DONE;
2674 })
2675
2676 (define_expand "cmla_conjv4hf4"
2677 [(match_operand:V4HF 0 "register_operand")
2678 (match_operand:V4HF 1 "vector_operand")
2679 (match_operand:V4HF 2 "vector_operand")
2680 (match_operand:V4HF 3 "vector_operand")]
2681 "TARGET_AVX512FP16 && TARGET_AVX512VL"
2682 {
2683 rtx op3 = gen_reg_rtx (V8HFmode);
2684 rtx op2 = gen_reg_rtx (V8HFmode);
2685 rtx op1 = gen_reg_rtx (V8HFmode);
2686 rtx op0 = gen_reg_rtx (V8HFmode);
2687
2688 emit_insn (gen_movq_v4hf_to_sse (op3, operands[3]));
2689 emit_insn (gen_movq_v4hf_to_sse (op2, operands[2]));
2690 emit_insn (gen_movq_v4hf_to_sse (op1, operands[1]));
2691
2692 emit_insn (gen_cmla_conjv8hf4 (op0, op1, op2, op3));
2693
2694 emit_move_insn (operands[0], lowpart_subreg (V4HFmode, op0, V8HFmode));
2695 DONE;
2696 })
2697
2698 (define_expand "cmulv4hf3"
2699 [(match_operand:V4HF 0 "register_operand")
2700 (match_operand:V4HF 1 "vector_operand")
2701 (match_operand:V4HF 2 "vector_operand")]
2702 "TARGET_AVX512FP16 && TARGET_AVX512VL"
2703 {
2704 rtx op2 = gen_reg_rtx (V8HFmode);
2705 rtx op1 = gen_reg_rtx (V8HFmode);
2706 rtx op0 = gen_reg_rtx (V8HFmode);
2707
2708 emit_insn (gen_movq_v4hf_to_sse (op2, operands[2]));
2709 emit_insn (gen_movq_v4hf_to_sse (op1, operands[1]));
2710
2711 emit_insn (gen_cmulv8hf3 (op0, op1, op2));
2712 emit_move_insn (operands[0], lowpart_subreg (V4HFmode, op0, V8HFmode));
2713 DONE;
2714 })
2715
2716 (define_expand "cmul_conjv4hf3"
2717 [(match_operand:V4HF 0 "register_operand")
2718 (match_operand:V4HF 1 "vector_operand")
2719 (match_operand:V4HF 2 "vector_operand")]
2720 "TARGET_AVX512FP16 && TARGET_AVX512VL"
2721 {
2722 rtx op2 = gen_reg_rtx (V8HFmode);
2723 rtx op1 = gen_reg_rtx (V8HFmode);
2724 rtx op0 = gen_reg_rtx (V8HFmode);
2725
2726 emit_insn (gen_movq_v4hf_to_sse (op2, operands[2]));
2727 emit_insn (gen_movq_v4hf_to_sse (op1, operands[1]));
2728
2729 emit_insn (gen_cmul_conjv8hf3 (op0, op1, op2));
2730 emit_move_insn (operands[0], lowpart_subreg (V4HFmode, op0, V8HFmode));
2731 DONE;
2732 })
2733
2734 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2735 ;;
2736 ;; Parallel half-precision floating point conversion operations
2737 ;;
2738 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2739
2740 (define_expand "fix<fixunssuffix>_trunc<mode><mmxintvecmodelower>2"
2741 [(set (match_operand:<mmxintvecmode> 0 "register_operand")
2742 (any_fix:<mmxintvecmode>
2743 (match_operand:VHF_32_64 1 "nonimmediate_operand")))]
2744 "TARGET_AVX512FP16 && TARGET_AVX512VL && ix86_partial_vec_fp_math"
2745 {
2746 rtx op1 = gen_reg_rtx (V8HFmode);
2747 rtx op0 = gen_reg_rtx (V8HImode);
2748
2749 emit_insn (gen_mov<mov_to_sse_suffix>_<mode>_to_sse (op1, operands[1]));
2750
2751 emit_insn (gen_fix<fixunssuffix>_truncv8hfv8hi2 (op0, op1));
2752
2753 emit_move_insn (operands[0],
2754 lowpart_subreg (<mmxintvecmode>mode, op0, V8HImode));
2755 DONE;
2756 })
2757
2758 (define_expand "fix<fixunssuffix>_truncv2hfv2si2"
2759 [(set (match_operand:V2SI 0 "register_operand")
2760 (any_fix:V2SI
2761 (match_operand:V2HF 1 "nonimmediate_operand")))]
2762 "TARGET_AVX512FP16 && TARGET_AVX512VL
2763 && TARGET_MMX_WITH_SSE && ix86_partial_vec_fp_math"
2764 {
2765 rtx op1 = gen_reg_rtx (V8HFmode);
2766 rtx op0 = gen_reg_rtx (V4SImode);
2767
2768 emit_insn (gen_movd_v2hf_to_sse (op1, operands[1]));
2769
2770 emit_insn (gen_avx512fp16_fix<fixunssuffix>_truncv4si2 (op0, op1));
2771
2772 emit_move_insn (operands[0], lowpart_subreg (V2SImode, op0, V4SImode));
2773 DONE;
2774 })
2775
2776 (define_expand "float<floatunssuffix><mmxintvecmodelower><mode>2"
2777 [(set (match_operand:VHF_32_64 0 "register_operand")
2778 (any_float:VHF_32_64
2779 (match_operand:<mmxintvecmode> 1 "nonimmediate_operand")))]
2780 "TARGET_AVX512FP16 && TARGET_AVX512VL && ix86_partial_vec_fp_math"
2781 {
2782 rtx op1 = gen_reg_rtx (V8HImode);
2783 rtx op0 = gen_reg_rtx (V8HFmode);
2784
2785 rtx (*gen_movd_sse) (rtx, rtx)
2786 = gen_mov<mov_to_sse_suffix>_<mmxintvecmodelower>_to_sse;
2787 emit_insn (gen_movd_sse (op1, operands[1]));
2788
2789 emit_insn (gen_float<floatunssuffix>v8hiv8hf2 (op0, op1));
2790
2791 emit_move_insn (operands[0],
2792 lowpart_subreg (<MODE>mode, op0, V8HFmode));
2793 DONE;
2794 })
2795
2796 (define_expand "float<floatunssuffix>v2siv2hf2"
2797 [(set (match_operand:V2HF 0 "register_operand")
2798 (any_float:V2HF
2799 (match_operand:V2SI 1 "nonimmediate_operand")))]
2800 "TARGET_AVX512FP16 && TARGET_AVX512VL
2801 && TARGET_MMX_WITH_SSE && ix86_partial_vec_fp_math"
2802 {
2803 rtx op1 = gen_reg_rtx (V4SImode);
2804 rtx op0 = gen_reg_rtx (V8HFmode);
2805
2806 emit_insn (gen_movq_v2si_to_sse (op1, operands[1]));
2807
2808 emit_insn (gen_avx512fp16_float<floatunssuffix>v4siv4hf2 (op0, op1));
2809
2810 emit_move_insn (operands[0], lowpart_subreg (V2HFmode, op0, V8HFmode));
2811 DONE;
2812 })
2813
2814 (define_expand "extendv2hfv2sf2"
2815 [(set (match_operand:V2SF 0 "register_operand")
2816 (float_extend:V2SF
2817 (match_operand:V2HF 1 "nonimmediate_operand")))]
2818 "TARGET_AVX512FP16 && TARGET_AVX512VL
2819 && TARGET_MMX_WITH_SSE && ix86_partial_vec_fp_math"
2820 {
2821 rtx op1 = gen_reg_rtx (V8HFmode);
2822 rtx op0 = gen_reg_rtx (V4SFmode);
2823
2824 emit_insn (gen_movd_v2hf_to_sse (op1, operands[1]));
2825
2826 emit_insn (gen_avx512fp16_float_extend_phv4sf2 (op0, op1));
2827
2828 emit_move_insn (operands[0], lowpart_subreg (V2SFmode, op0, V4SFmode));
2829 DONE;
2830 })
2831
2832 (define_expand "truncv2sfv2hf2"
2833 [(set (match_operand:V2HF 0 "register_operand")
2834 (float_truncate:V2HF
2835 (match_operand:V2SF 1 "nonimmediate_operand")))]
2836 "TARGET_AVX512FP16 && TARGET_AVX512VL
2837 && TARGET_MMX_WITH_SSE && ix86_partial_vec_fp_math"
2838 {
2839 rtx op1 = gen_reg_rtx (V4SFmode);
2840 rtx op0 = gen_reg_rtx (V8HFmode);
2841
2842 emit_insn (gen_movq_v2sf_to_sse (op1, operands[1]));
2843
2844 emit_insn (gen_avx512fp16_truncv4sfv4hf2 (op0, op1));
2845
2846 emit_move_insn (operands[0], lowpart_subreg (V2HFmode, op0, V8HFmode));
2847 DONE;
2848 })
2849
2850 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2851 ;;
2852 ;; Parallel integral arithmetic
2853 ;;
2854 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2855
2856 (define_expand "neg<mode>2"
2857 [(set (match_operand:MMXMODEI 0 "register_operand")
2858 (minus:MMXMODEI
2859 (match_dup 2)
2860 (match_operand:MMXMODEI 1 "register_operand")))]
2861 "TARGET_MMX_WITH_SSE"
2862 "operands[2] = force_reg (<MODE>mode, CONST0_RTX (<MODE>mode));")
2863
2864 (define_expand "neg<mode>2"
2865 [(set (match_operand:VI_32 0 "register_operand")
2866 (minus:VI_32
2867 (match_dup 2)
2868 (match_operand:VI_32 1 "register_operand")))]
2869 "TARGET_SSE2"
2870 "operands[2] = force_reg (<MODE>mode, CONST0_RTX (<MODE>mode));")
2871
2872 (define_insn "negv2qi2"
2873 [(set (match_operand:V2QI 0 "register_operand" "=?Q,&Yw")
2874 (neg:V2QI
2875 (match_operand:V2QI 1 "register_operand" "0,Yw")))
2876 (clobber (reg:CC FLAGS_REG))]
2877 "!TARGET_PARTIAL_REG_STALL || optimize_size || TARGET_SSE2"
2878 "#"
2879 [(set_attr "isa" "*,sse2")
2880 (set_attr "type" "multi")
2881 (set_attr "mode" "QI,TI")
2882 (set (attr "enabled")
2883 (cond [(and (eq_attr "alternative" "0")
2884 (and (match_test "TARGET_PARTIAL_REG_STALL")
2885 (not (match_test "optimize_function_for_size_p (cfun)"))))
2886 (symbol_ref "false")
2887 ]
2888 (const_string "*")))])
2889
2890 (define_split
2891 [(set (match_operand:V2QI 0 "general_reg_operand")
2892 (neg:V2QI
2893 (match_operand:V2QI 1 "general_reg_operand")))
2894 (clobber (reg:CC FLAGS_REG))]
2895 "(!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))
2896 && reload_completed"
2897 [(parallel
2898 [(set (strict_low_part (match_dup 0))
2899 (neg:QI (match_dup 1)))
2900 (clobber (reg:CC FLAGS_REG))])
2901 (parallel
2902 [(set (zero_extract:HI (match_dup 2) (const_int 8) (const_int 8))
2903 (subreg:HI
2904 (neg:QI
2905 (subreg:QI
2906 (zero_extract:HI (match_dup 3)
2907 (const_int 8)
2908 (const_int 8)) 0)) 0))
2909 (clobber (reg:CC FLAGS_REG))])]
2910 {
2911 operands[3] = lowpart_subreg (HImode, operands[1], V2QImode);
2912 operands[2] = lowpart_subreg (HImode, operands[0], V2QImode);
2913 operands[1] = lowpart_subreg (QImode, operands[1], V2QImode);
2914 operands[0] = lowpart_subreg (QImode, operands[0], V2QImode);
2915 })
2916
2917 (define_split
2918 [(set (match_operand:V2QI 0 "sse_reg_operand")
2919 (neg:V2QI
2920 (match_operand:V2QI 1 "sse_reg_operand")))
2921 (clobber (reg:CC FLAGS_REG))]
2922 "TARGET_SSE2 && reload_completed"
2923 [(set (match_dup 0) (match_dup 2))
2924 (set (match_dup 0)
2925 (minus:V16QI (match_dup 0) (match_dup 1)))]
2926 {
2927 operands[2] = CONST0_RTX (V16QImode);
2928 operands[1] = lowpart_subreg (V16QImode, operands[1], V2QImode);
2929 operands[0] = lowpart_subreg (V16QImode, operands[0], V2QImode);
2930 })
2931
2932 (define_expand "mmx_<insn><mode>3"
2933 [(set (match_operand:MMXMODEI8 0 "register_operand")
2934 (plusminus:MMXMODEI8
2935 (match_operand:MMXMODEI8 1 "register_mmxmem_operand")
2936 (match_operand:MMXMODEI8 2 "register_mmxmem_operand")))]
2937 "TARGET_MMX || TARGET_MMX_WITH_SSE"
2938 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
2939
2940 (define_expand "<insn><mode>3"
2941 [(set (match_operand:MMXMODEI 0 "register_operand")
2942 (plusminus:MMXMODEI
2943 (match_operand:MMXMODEI 1 "register_operand")
2944 (match_operand:MMXMODEI 2 "register_operand")))]
2945 "TARGET_MMX_WITH_SSE")
2946
2947 (define_insn "*mmx_<insn><mode>3"
2948 [(set (match_operand:MMXMODEI8 0 "register_operand" "=y,x,<Yv_Yw>")
2949 (plusminus:MMXMODEI8
2950 (match_operand:MMXMODEI8 1 "register_mmxmem_operand"
2951 "<comm>0,0,<Yv_Yw>")
2952 (match_operand:MMXMODEI8 2 "register_mmxmem_operand"
2953 "ym,x,<Yv_Yw>")))]
2954 "(TARGET_MMX || TARGET_MMX_WITH_SSE)
2955 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
2956 "@
2957 p<plusminus_mnemonic><mmxvecsize>\t{%2, %0|%0, %2}
2958 p<plusminus_mnemonic><mmxvecsize>\t{%2, %0|%0, %2}
2959 vp<plusminus_mnemonic><mmxvecsize>\t{%2, %1, %0|%0, %1, %2}"
2960 [(set_attr "isa" "*,sse2_noavx,avx")
2961 (set_attr "mmx_isa" "native,*,*")
2962 (set_attr "type" "mmxadd,sseadd,sseadd")
2963 (set_attr "mode" "DI,TI,TI")])
2964
2965 (define_insn "<insn><mode>3"
2966 [(set (match_operand:VI_32 0 "register_operand" "=x,Yw")
2967 (plusminus:VI_32
2968 (match_operand:VI_32 1 "register_operand" "<comm>0,Yw")
2969 (match_operand:VI_32 2 "register_operand" "x,Yw")))]
2970 "TARGET_SSE2"
2971 "@
2972 p<plusminus_mnemonic><mmxvecsize>\t{%2, %0|%0, %2}
2973 vp<plusminus_mnemonic><mmxvecsize>\t{%2, %1, %0|%0, %1, %2}"
2974 [(set_attr "isa" "noavx,avx")
2975 (set_attr "type" "sseadd")
2976 (set_attr "mode" "TI")])
2977
2978 (define_insn "<insn>v2qi3"
2979 [(set (match_operand:V2QI 0 "register_operand" "=?Q,x,Yw")
2980 (plusminus:V2QI
2981 (match_operand:V2QI 1 "register_operand" "<comm>0,0,Yw")
2982 (match_operand:V2QI 2 "register_operand" "Q,x,Yw")))
2983 (clobber (reg:CC FLAGS_REG))]
2984 "!TARGET_PARTIAL_REG_STALL || optimize_size || TARGET_SSE2"
2985 "#"
2986 [(set_attr "isa" "*,sse2_noavx,avx")
2987 (set_attr "type" "multi,sseadd,sseadd")
2988 (set_attr "mode" "QI,TI,TI")
2989 (set (attr "enabled")
2990 (cond [(and (eq_attr "alternative" "0")
2991 (and (match_test "TARGET_PARTIAL_REG_STALL")
2992 (not (match_test "optimize_function_for_size_p (cfun)"))))
2993 (symbol_ref "false")
2994 ]
2995 (const_string "*")))])
2996
2997 (define_split
2998 [(set (match_operand:V2QI 0 "general_reg_operand")
2999 (plusminus:V2QI
3000 (match_operand:V2QI 1 "general_reg_operand")
3001 (match_operand:V2QI 2 "general_reg_operand")))
3002 (clobber (reg:CC FLAGS_REG))]
3003 "(!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))
3004 && reload_completed"
3005 [(parallel
3006 [(set (strict_low_part (match_dup 0))
3007 (plusminus:QI (match_dup 1) (match_dup 2)))
3008 (clobber (reg:CC FLAGS_REG))])
3009 (parallel
3010 [(set (zero_extract:HI (match_dup 3) (const_int 8) (const_int 8))
3011 (subreg:HI
3012 (plusminus:QI
3013 (subreg:QI
3014 (zero_extract:HI (match_dup 4)
3015 (const_int 8)
3016 (const_int 8)) 0)
3017 (subreg:QI
3018 (zero_extract:HI (match_dup 5)
3019 (const_int 8)
3020 (const_int 8)) 0)) 0))
3021 (clobber (reg:CC FLAGS_REG))])]
3022 {
3023 operands[5] = lowpart_subreg (HImode, operands[2], V2QImode);
3024 operands[4] = lowpart_subreg (HImode, operands[1], V2QImode);
3025 operands[3] = lowpart_subreg (HImode, operands[0], V2QImode);
3026 operands[2] = lowpart_subreg (QImode, operands[2], V2QImode);
3027 operands[1] = lowpart_subreg (QImode, operands[1], V2QImode);
3028 operands[0] = lowpart_subreg (QImode, operands[0], V2QImode);
3029 })
3030
3031 (define_split
3032 [(set (match_operand:V2QI 0 "sse_reg_operand")
3033 (plusminus:V2QI
3034 (match_operand:V2QI 1 "sse_reg_operand")
3035 (match_operand:V2QI 2 "sse_reg_operand")))
3036 (clobber (reg:CC FLAGS_REG))]
3037 "TARGET_SSE2 && reload_completed"
3038 [(set (match_dup 0)
3039 (plusminus:V16QI (match_dup 1) (match_dup 2)))]
3040 {
3041 operands[2] = lowpart_subreg (V16QImode, operands[2], V2QImode);
3042 operands[1] = lowpart_subreg (V16QImode, operands[1], V2QImode);
3043 operands[0] = lowpart_subreg (V16QImode, operands[0], V2QImode);
3044 })
3045
3046 (define_expand "mmx_<insn><mode>3"
3047 [(set (match_operand:MMXMODE12 0 "register_operand")
3048 (sat_plusminus:MMXMODE12
3049 (match_operand:MMXMODE12 1 "register_mmxmem_operand")
3050 (match_operand:MMXMODE12 2 "register_mmxmem_operand")))]
3051 "TARGET_MMX || TARGET_MMX_WITH_SSE"
3052 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
3053
3054 (define_insn "*mmx_<insn><mode>3"
3055 [(set (match_operand:MMXMODE12 0 "register_operand" "=y,x,Yw")
3056 (sat_plusminus:MMXMODE12
3057 (match_operand:MMXMODE12 1 "register_mmxmem_operand" "<comm>0,0,Yw")
3058 (match_operand:MMXMODE12 2 "register_mmxmem_operand" "ym,x,Yw")))]
3059 "(TARGET_MMX || TARGET_MMX_WITH_SSE)
3060 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
3061 "@
3062 p<plusminus_mnemonic><mmxvecsize>\t{%2, %0|%0, %2}
3063 p<plusminus_mnemonic><mmxvecsize>\t{%2, %0|%0, %2}
3064 vp<plusminus_mnemonic><mmxvecsize>\t{%2, %1, %0|%0, %1, %2}"
3065 [(set_attr "isa" "*,sse2_noavx,avx")
3066 (set_attr "mmx_isa" "native,*,*")
3067 (set_attr "type" "mmxadd,sseadd,sseadd")
3068 (set_attr "mode" "DI,TI,TI")])
3069
3070 (define_insn "*<insn><mode>3"
3071 [(set (match_operand:VI_16_32 0 "register_operand" "=x,Yw")
3072 (sat_plusminus:VI_16_32
3073 (match_operand:VI_16_32 1 "register_operand" "<comm>0,Yw")
3074 (match_operand:VI_16_32 2 "register_operand" "x,Yw")))]
3075 "TARGET_SSE2"
3076 "@
3077 p<plusminus_mnemonic><mmxvecsize>\t{%2, %0|%0, %2}
3078 vp<plusminus_mnemonic><mmxvecsize>\t{%2, %1, %0|%0, %1, %2}"
3079 [(set_attr "isa" "noavx,avx")
3080 (set_attr "type" "sseadd")
3081 (set_attr "mode" "TI")])
3082
3083 (define_insn "mulv2si3"
3084 [(set (match_operand:V2SI 0 "register_operand" "=Yr,*x,v")
3085 (mult:V2SI
3086 (match_operand:V2SI 1 "register_operand" "%0,0,v")
3087 (match_operand:V2SI 2 "register_operand" "Yr,*x,v")))]
3088 "TARGET_SSE4_1 && TARGET_MMX_WITH_SSE"
3089 "@
3090 pmulld\t{%2, %0|%0, %2}
3091 pmulld\t{%2, %0|%0, %2}
3092 vpmulld\t{%2, %1, %0|%0, %1, %2}"
3093 [(set_attr "isa" "noavx,noavx,avx")
3094 (set_attr "type" "sseimul")
3095 (set_attr "prefix_extra" "1")
3096 (set_attr "prefix" "orig,orig,vex")
3097 (set_attr "btver2_decode" "vector")
3098 (set_attr "mode" "TI")])
3099
3100 (define_expand "mmx_mulv4hi3"
3101 [(set (match_operand:V4HI 0 "register_operand")
3102 (mult:V4HI (match_operand:V4HI 1 "register_mmxmem_operand")
3103 (match_operand:V4HI 2 "register_mmxmem_operand")))]
3104 "TARGET_MMX || TARGET_MMX_WITH_SSE"
3105 "ix86_fixup_binary_operands_no_copy (MULT, V4HImode, operands);")
3106
3107 (define_expand "mulv4hi3"
3108 [(set (match_operand:V4HI 0 "register_operand")
3109 (mult:V4HI (match_operand:V4HI 1 "register_operand")
3110 (match_operand:V4HI 2 "register_operand")))]
3111 "TARGET_MMX_WITH_SSE")
3112
3113 (define_insn "*mmx_mulv4hi3"
3114 [(set (match_operand:V4HI 0 "register_operand" "=y,x,Yw")
3115 (mult:V4HI (match_operand:V4HI 1 "register_mmxmem_operand" "%0,0,Yw")
3116 (match_operand:V4HI 2 "register_mmxmem_operand" "ym,x,Yw")))]
3117 "(TARGET_MMX || TARGET_MMX_WITH_SSE)
3118 && ix86_binary_operator_ok (MULT, V4HImode, operands)"
3119 "@
3120 pmullw\t{%2, %0|%0, %2}
3121 pmullw\t{%2, %0|%0, %2}
3122 vpmullw\t{%2, %1, %0|%0, %1, %2}"
3123 [(set_attr "isa" "*,sse2_noavx,avx")
3124 (set_attr "mmx_isa" "native,*,*")
3125 (set_attr "type" "mmxmul,ssemul,ssemul")
3126 (set_attr "mode" "DI,TI,TI")])
3127
3128 (define_insn "mulv2hi3"
3129 [(set (match_operand:V2HI 0 "register_operand" "=x,Yw")
3130 (mult:V2HI (match_operand:V2HI 1 "register_operand" "%0,Yw")
3131 (match_operand:V2HI 2 "register_operand" "x,Yw")))]
3132 "TARGET_SSE2"
3133 "@
3134 pmullw\t{%2, %0|%0, %2}
3135 vpmullw\t{%2, %1, %0|%0, %1, %2}"
3136 [(set_attr "isa" "noavx,avx")
3137 (set_attr "type" "ssemul")
3138 (set_attr "mode" "TI")])
3139
3140 (define_expand "mulv8qi3"
3141 [(set (match_operand:V8QI 0 "register_operand")
3142 (mult:V8QI (match_operand:V8QI 1 "register_operand")
3143 (match_operand:V8QI 2 "register_operand")))]
3144 "TARGET_MMX_WITH_SSE"
3145 {
3146 ix86_expand_vecop_qihi_partial (MULT, operands[0], operands[1], operands[2]);
3147 DONE;
3148 })
3149
3150 (define_expand "mulv4qi3"
3151 [(set (match_operand:V4QI 0 "register_operand")
3152 (mult:V4QI (match_operand:V4QI 1 "register_operand")
3153 (match_operand:V4QI 2 "register_operand")))]
3154 "TARGET_SSE2"
3155 {
3156 ix86_expand_vecop_qihi_partial (MULT, operands[0], operands[1], operands[2]);
3157 DONE;
3158 })
3159
3160 (define_expand "mmx_smulv4hi3_highpart"
3161 [(set (match_operand:V4HI 0 "register_operand")
3162 (truncate:V4HI
3163 (lshiftrt:V4SI
3164 (mult:V4SI
3165 (sign_extend:V4SI
3166 (match_operand:V4HI 1 "register_mmxmem_operand"))
3167 (sign_extend:V4SI
3168 (match_operand:V4HI 2 "register_mmxmem_operand")))
3169 (const_int 16))))]
3170 "TARGET_MMX || TARGET_MMX_WITH_SSE"
3171 "ix86_fixup_binary_operands_no_copy (MULT, V4HImode, operands);")
3172
3173 (define_insn "*mmx_smulv4hi3_highpart"
3174 [(set (match_operand:V4HI 0 "register_operand" "=y,x,Yw")
3175 (truncate:V4HI
3176 (lshiftrt:V4SI
3177 (mult:V4SI
3178 (sign_extend:V4SI
3179 (match_operand:V4HI 1 "register_mmxmem_operand" "%0,0,Yw"))
3180 (sign_extend:V4SI
3181 (match_operand:V4HI 2 "register_mmxmem_operand" "ym,x,Yw")))
3182 (const_int 16))))]
3183 "(TARGET_MMX || TARGET_MMX_WITH_SSE)
3184 && ix86_binary_operator_ok (MULT, V4HImode, operands)"
3185 "@
3186 pmulhw\t{%2, %0|%0, %2}
3187 pmulhw\t{%2, %0|%0, %2}
3188 vpmulhw\t{%2, %1, %0|%0, %1, %2}"
3189 [(set_attr "isa" "*,sse2_noavx,avx")
3190 (set_attr "mmx_isa" "native,*,*")
3191 (set_attr "type" "mmxmul,ssemul,ssemul")
3192 (set_attr "mode" "DI,TI,TI")])
3193
3194 (define_expand "mmx_umulv4hi3_highpart"
3195 [(set (match_operand:V4HI 0 "register_operand")
3196 (truncate:V4HI
3197 (lshiftrt:V4SI
3198 (mult:V4SI
3199 (zero_extend:V4SI
3200 (match_operand:V4HI 1 "register_mmxmem_operand"))
3201 (zero_extend:V4SI
3202 (match_operand:V4HI 2 "register_mmxmem_operand")))
3203 (const_int 16))))]
3204 "(TARGET_MMX || TARGET_MMX_WITH_SSE)
3205 && (TARGET_SSE || TARGET_3DNOW_A)"
3206 "ix86_fixup_binary_operands_no_copy (MULT, V4HImode, operands);")
3207
3208 (define_insn "*mmx_umulv4hi3_highpart"
3209 [(set (match_operand:V4HI 0 "register_operand" "=y,x,Yw")
3210 (truncate:V4HI
3211 (lshiftrt:V4SI
3212 (mult:V4SI
3213 (zero_extend:V4SI
3214 (match_operand:V4HI 1 "register_mmxmem_operand" "%0,0,Yw"))
3215 (zero_extend:V4SI
3216 (match_operand:V4HI 2 "register_mmxmem_operand" "ym,x,Yw")))
3217 (const_int 16))))]
3218 "(TARGET_MMX || TARGET_MMX_WITH_SSE)
3219 && (TARGET_SSE || TARGET_3DNOW_A)
3220 && ix86_binary_operator_ok (MULT, V4HImode, operands)"
3221 "@
3222 pmulhuw\t{%2, %0|%0, %2}
3223 pmulhuw\t{%2, %0|%0, %2}
3224 vpmulhuw\t{%2, %1, %0|%0, %1, %2}"
3225 [(set_attr "isa" "*,sse2_noavx,avx")
3226 (set_attr "mmx_isa" "native,*,*")
3227 (set_attr "type" "mmxmul,ssemul,ssemul")
3228 (set_attr "mode" "DI,TI,TI")])
3229
3230 (define_expand "<s>mulv4hi3_highpart"
3231 [(set (match_operand:V4HI 0 "register_operand")
3232 (truncate:V4HI
3233 (lshiftrt:V4SI
3234 (mult:V4SI
3235 (any_extend:V4SI
3236 (match_operand:V4HI 1 "register_operand"))
3237 (any_extend:V4SI
3238 (match_operand:V4HI 2 "register_operand")))
3239 (const_int 16))))]
3240 "TARGET_MMX_WITH_SSE")
3241
3242 (define_insn "<s>mulv2hi3_highpart"
3243 [(set (match_operand:V2HI 0 "register_operand" "=x,Yw")
3244 (truncate:V2HI
3245 (lshiftrt:V2SI
3246 (mult:V2SI
3247 (any_extend:V2SI
3248 (match_operand:V2HI 1 "register_operand" "%0,Yw"))
3249 (any_extend:V2SI
3250 (match_operand:V2HI 2 "register_operand" "x,Yw")))
3251 (const_int 16))))]
3252 "TARGET_SSE2"
3253 "@
3254 pmulh<u>w\t{%2, %0|%0, %2}
3255 vpmulh<u>w\t{%2, %1, %0|%0, %1, %2}"
3256 [(set_attr "isa" "noavx,avx")
3257 (set_attr "type" "ssemul")
3258 (set_attr "mode" "TI")])
3259
3260 (define_expand "mmx_pmaddwd"
3261 [(set (match_operand:V2SI 0 "register_operand")
3262 (plus:V2SI
3263 (mult:V2SI
3264 (sign_extend:V2SI
3265 (vec_select:V2HI
3266 (match_operand:V4HI 1 "register_mmxmem_operand")
3267 (parallel [(const_int 0) (const_int 2)])))
3268 (sign_extend:V2SI
3269 (vec_select:V2HI
3270 (match_operand:V4HI 2 "register_mmxmem_operand")
3271 (parallel [(const_int 0) (const_int 2)]))))
3272 (mult:V2SI
3273 (sign_extend:V2SI
3274 (vec_select:V2HI (match_dup 1)
3275 (parallel [(const_int 1) (const_int 3)])))
3276 (sign_extend:V2SI
3277 (vec_select:V2HI (match_dup 2)
3278 (parallel [(const_int 1) (const_int 3)]))))))]
3279 "TARGET_MMX || TARGET_MMX_WITH_SSE"
3280 "ix86_fixup_binary_operands_no_copy (MULT, V4HImode, operands);")
3281
3282 (define_insn "*mmx_pmaddwd"
3283 [(set (match_operand:V2SI 0 "register_operand" "=y,x,Yw")
3284 (plus:V2SI
3285 (mult:V2SI
3286 (sign_extend:V2SI
3287 (vec_select:V2HI
3288 (match_operand:V4HI 1 "register_mmxmem_operand" "%0,0,Yw")
3289 (parallel [(const_int 0) (const_int 2)])))
3290 (sign_extend:V2SI
3291 (vec_select:V2HI
3292 (match_operand:V4HI 2 "register_mmxmem_operand" "ym,x,Yw")
3293 (parallel [(const_int 0) (const_int 2)]))))
3294 (mult:V2SI
3295 (sign_extend:V2SI
3296 (vec_select:V2HI (match_dup 1)
3297 (parallel [(const_int 1) (const_int 3)])))
3298 (sign_extend:V2SI
3299 (vec_select:V2HI (match_dup 2)
3300 (parallel [(const_int 1) (const_int 3)]))))))]
3301 "(TARGET_MMX || TARGET_MMX_WITH_SSE)
3302 && ix86_binary_operator_ok (MULT, V4HImode, operands)"
3303 "@
3304 pmaddwd\t{%2, %0|%0, %2}
3305 pmaddwd\t{%2, %0|%0, %2}
3306 vpmaddwd\t{%2, %1, %0|%0, %1, %2}"
3307 [(set_attr "isa" "*,sse2_noavx,avx")
3308 (set_attr "mmx_isa" "native,*,*")
3309 (set_attr "type" "mmxmul,sseiadd,sseiadd")
3310 (set_attr "mode" "DI,TI,TI")])
3311
3312 (define_expand "mmx_pmulhrwv4hi3"
3313 [(set (match_operand:V4HI 0 "register_operand")
3314 (truncate:V4HI
3315 (lshiftrt:V4SI
3316 (plus:V4SI
3317 (mult:V4SI
3318 (sign_extend:V4SI
3319 (match_operand:V4HI 1 "nonimmediate_operand"))
3320 (sign_extend:V4SI
3321 (match_operand:V4HI 2 "nonimmediate_operand")))
3322 (const_vector:V4SI [(const_int 32768) (const_int 32768)
3323 (const_int 32768) (const_int 32768)]))
3324 (const_int 16))))]
3325 "TARGET_3DNOW"
3326 "ix86_fixup_binary_operands_no_copy (MULT, V4HImode, operands);")
3327
3328 (define_insn "*mmx_pmulhrwv4hi3"
3329 [(set (match_operand:V4HI 0 "register_operand" "=y")
3330 (truncate:V4HI
3331 (lshiftrt:V4SI
3332 (plus:V4SI
3333 (mult:V4SI
3334 (sign_extend:V4SI
3335 (match_operand:V4HI 1 "nonimmediate_operand" "%0"))
3336 (sign_extend:V4SI
3337 (match_operand:V4HI 2 "nonimmediate_operand" "ym")))
3338 (const_vector:V4SI [(const_int 32768) (const_int 32768)
3339 (const_int 32768) (const_int 32768)]))
3340 (const_int 16))))]
3341 "TARGET_3DNOW && ix86_binary_operator_ok (MULT, V4HImode, operands)"
3342 "pmulhrw\t{%2, %0|%0, %2}"
3343 [(set_attr "type" "mmxmul")
3344 (set_attr "prefix_extra" "1")
3345 (set_attr "mode" "DI")])
3346
3347 (define_expand "sse2_umulv1siv1di3"
3348 [(set (match_operand:V1DI 0 "register_operand")
3349 (mult:V1DI
3350 (zero_extend:V1DI
3351 (vec_select:V1SI
3352 (match_operand:V2SI 1 "register_mmxmem_operand")
3353 (parallel [(const_int 0)])))
3354 (zero_extend:V1DI
3355 (vec_select:V1SI
3356 (match_operand:V2SI 2 "register_mmxmem_operand")
3357 (parallel [(const_int 0)])))))]
3358 "(TARGET_MMX || TARGET_MMX_WITH_SSE) && TARGET_SSE2"
3359 "ix86_fixup_binary_operands_no_copy (MULT, V2SImode, operands);")
3360
3361 (define_insn "*sse2_umulv1siv1di3"
3362 [(set (match_operand:V1DI 0 "register_operand" "=y,x,Yv")
3363 (mult:V1DI
3364 (zero_extend:V1DI
3365 (vec_select:V1SI
3366 (match_operand:V2SI 1 "register_mmxmem_operand" "%0,0,Yv")
3367 (parallel [(const_int 0)])))
3368 (zero_extend:V1DI
3369 (vec_select:V1SI
3370 (match_operand:V2SI 2 "register_mmxmem_operand" "ym,x,Yv")
3371 (parallel [(const_int 0)])))))]
3372 "(TARGET_MMX || TARGET_MMX_WITH_SSE)
3373 && TARGET_SSE2
3374 && ix86_binary_operator_ok (MULT, V2SImode, operands)"
3375 "@
3376 pmuludq\t{%2, %0|%0, %2}
3377 pmuludq\t{%2, %0|%0, %2}
3378 vpmuludq\t{%2, %1, %0|%0, %1, %2}"
3379 [(set_attr "isa" "*,sse2_noavx,avx")
3380 (set_attr "mmx_isa" "native,*,*")
3381 (set_attr "type" "mmxmul,ssemul,ssemul")
3382 (set_attr "mode" "DI,TI,TI")])
3383
3384 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3385 ;;
3386 ;; Parallel integral shifts
3387 ;;
3388 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3389
3390 (define_insn "<code><mode>3"
3391 [(set (match_operand:MMXMODE14 0 "register_operand" "=Yr,*x,Yv")
3392 (smaxmin:MMXMODE14
3393 (match_operand:MMXMODE14 1 "register_operand" "%0,0,Yv")
3394 (match_operand:MMXMODE14 2 "register_operand" "Yr,*x,Yv")))]
3395 "TARGET_SSE4_1 && TARGET_MMX_WITH_SSE"
3396 "@
3397 p<maxmin_int><mmxvecsize>\t{%2, %0|%0, %2}
3398 p<maxmin_int><mmxvecsize>\t{%2, %0|%0, %2}
3399 vp<maxmin_int><mmxvecsize>\t{%2, %1, %0|%0, %1, %2}"
3400 [(set_attr "isa" "noavx,noavx,avx")
3401 (set_attr "type" "sseiadd")
3402 (set_attr "prefix_extra" "1")
3403 (set_attr "prefix" "orig,orig,vex")
3404 (set_attr "mode" "TI")])
3405
3406 (define_expand "mmx_<code>v4hi3"
3407 [(set (match_operand:V4HI 0 "register_operand")
3408 (smaxmin:V4HI
3409 (match_operand:V4HI 1 "register_mmxmem_operand")
3410 (match_operand:V4HI 2 "register_mmxmem_operand")))]
3411 "(TARGET_MMX || TARGET_MMX_WITH_SSE)
3412 && (TARGET_SSE || TARGET_3DNOW_A)"
3413 "ix86_fixup_binary_operands_no_copy (<CODE>, V4HImode, operands);")
3414
3415 (define_insn "*mmx_<code>v4hi3"
3416 [(set (match_operand:V4HI 0 "register_operand" "=y,x,Yw")
3417 (smaxmin:V4HI
3418 (match_operand:V4HI 1 "register_mmxmem_operand" "%0,0,Yw")
3419 (match_operand:V4HI 2 "register_mmxmem_operand" "ym,x,Yw")))]
3420 "(TARGET_MMX || TARGET_MMX_WITH_SSE)
3421 && (TARGET_SSE || TARGET_3DNOW_A)
3422 && ix86_binary_operator_ok (<CODE>, V4HImode, operands)"
3423 "@
3424 p<maxmin_int>w\t{%2, %0|%0, %2}
3425 p<maxmin_int>w\t{%2, %0|%0, %2}
3426 vp<maxmin_int>w\t{%2, %1, %0|%0, %1, %2}"
3427 [(set_attr "isa" "*,sse2_noavx,avx")
3428 (set_attr "mmx_isa" "native,*,*")
3429 (set_attr "type" "mmxadd,sseiadd,sseiadd")
3430 (set_attr "mode" "DI,TI,TI")])
3431
3432 (define_expand "<code>v4hi3"
3433 [(set (match_operand:V4HI 0 "register_operand")
3434 (smaxmin:V4HI
3435 (match_operand:V4HI 1 "register_operand")
3436 (match_operand:V4HI 2 "register_operand")))]
3437 "TARGET_MMX_WITH_SSE")
3438
3439 (define_insn "<code><mode>3"
3440 [(set (match_operand:VI1_16_32 0 "register_operand" "=Yr,*x,Yv")
3441 (smaxmin:VI1_16_32
3442 (match_operand:VI1_16_32 1 "register_operand" "%0,0,Yv")
3443 (match_operand:VI1_16_32 2 "register_operand" "Yr,*x,Yv")))]
3444 "TARGET_SSE4_1"
3445 "@
3446 p<maxmin_int>b\t{%2, %0|%0, %2}
3447 p<maxmin_int>b\t{%2, %0|%0, %2}
3448 vp<maxmin_int>b\t{%2, %1, %0|%0, %1, %2}"
3449 [(set_attr "isa" "noavx,noavx,avx")
3450 (set_attr "type" "sseiadd")
3451 (set_attr "prefix_extra" "1")
3452 (set_attr "prefix" "orig,orig,vex")
3453 (set_attr "mode" "TI")])
3454
3455 (define_insn "<code>v2hi3"
3456 [(set (match_operand:V2HI 0 "register_operand" "=x,Yw")
3457 (smaxmin:V2HI
3458 (match_operand:V2HI 1 "register_operand" "%0,Yw")
3459 (match_operand:V2HI 2 "register_operand" "x,Yw")))]
3460 "TARGET_SSE2"
3461 "@
3462 p<maxmin_int>w\t{%2, %0|%0, %2}
3463 vp<maxmin_int>w\t{%2, %1, %0|%0, %1, %2}"
3464 [(set_attr "isa" "noavx,avx")
3465 (set_attr "type" "sseiadd")
3466 (set_attr "mode" "TI")])
3467
3468 (define_insn "<code><mode>3"
3469 [(set (match_operand:MMXMODE24 0 "register_operand" "=Yr,*x,Yv")
3470 (umaxmin:MMXMODE24
3471 (match_operand:MMXMODE24 1 "register_operand" "%0,0,Yv")
3472 (match_operand:MMXMODE24 2 "register_operand" "Yr,*x,Yv")))]
3473 "TARGET_SSE4_1 && TARGET_MMX_WITH_SSE"
3474 "@
3475 p<maxmin_int><mmxvecsize>\t{%2, %0|%0, %2}
3476 p<maxmin_int><mmxvecsize>\t{%2, %0|%0, %2}
3477 vp<maxmin_int><mmxvecsize>\t{%2, %1, %0|%0, %1, %2}"
3478 [(set_attr "isa" "noavx,noavx,avx")
3479 (set_attr "type" "sseiadd")
3480 (set_attr "prefix_extra" "1")
3481 (set_attr "prefix" "orig,orig,vex")
3482 (set_attr "mode" "TI")])
3483
3484 (define_expand "mmx_<code>v8qi3"
3485 [(set (match_operand:V8QI 0 "register_operand")
3486 (umaxmin:V8QI
3487 (match_operand:V8QI 1 "register_mmxmem_operand")
3488 (match_operand:V8QI 2 "register_mmxmem_operand")))]
3489 "(TARGET_MMX || TARGET_MMX_WITH_SSE)
3490 && (TARGET_SSE || TARGET_3DNOW_A)"
3491 "ix86_fixup_binary_operands_no_copy (<CODE>, V8QImode, operands);")
3492
3493 (define_insn "*mmx_<code>v8qi3"
3494 [(set (match_operand:V8QI 0 "register_operand" "=y,x,Yw")
3495 (umaxmin:V8QI
3496 (match_operand:V8QI 1 "register_mmxmem_operand" "%0,0,Yw")
3497 (match_operand:V8QI 2 "register_mmxmem_operand" "ym,x,Yw")))]
3498 "(TARGET_MMX || TARGET_MMX_WITH_SSE)
3499 && (TARGET_SSE || TARGET_3DNOW_A)
3500 && ix86_binary_operator_ok (<CODE>, V8QImode, operands)"
3501 "@
3502 p<maxmin_int>b\t{%2, %0|%0, %2}
3503 p<maxmin_int>b\t{%2, %0|%0, %2}
3504 vp<maxmin_int>b\t{%2, %1, %0|%0, %1, %2}"
3505 [(set_attr "isa" "*,sse2_noavx,avx")
3506 (set_attr "mmx_isa" "native,*,*")
3507 (set_attr "type" "mmxadd,sseiadd,sseiadd")
3508 (set_attr "mode" "DI,TI,TI")])
3509
3510 (define_expand "<code>v8qi3"
3511 [(set (match_operand:V8QI 0 "register_operand")
3512 (umaxmin:V8QI
3513 (match_operand:V8QI 1 "register_operand")
3514 (match_operand:V8QI 2 "register_operand")))]
3515 "TARGET_MMX_WITH_SSE")
3516
3517 (define_insn "<code><mode>3"
3518 [(set (match_operand:VI1_16_32 0 "register_operand" "=x,Yw")
3519 (umaxmin:VI1_16_32
3520 (match_operand:VI1_16_32 1 "register_operand" "%0,Yw")
3521 (match_operand:VI1_16_32 2 "register_operand" "x,Yw")))]
3522 "TARGET_SSE2"
3523 "@
3524 p<maxmin_int>b\t{%2, %0|%0, %2}
3525 vp<maxmin_int>b\t{%2, %1, %0|%0, %1, %2}"
3526 [(set_attr "isa" "noavx,avx")
3527 (set_attr "type" "sseiadd")
3528 (set_attr "mode" "TI")])
3529
3530 (define_insn "<code>v2hi3"
3531 [(set (match_operand:V2HI 0 "register_operand" "=Yr,*x,Yv")
3532 (umaxmin:V2HI
3533 (match_operand:V2HI 1 "register_operand" "%0,0,Yv")
3534 (match_operand:V2HI 2 "register_operand" "Yr,*x,Yv")))]
3535 "TARGET_SSE4_1"
3536 "@
3537 p<maxmin_int>w\t{%2, %0|%0, %2}
3538 p<maxmin_int>w\t{%2, %0|%0, %2}
3539 vp<maxmin_int>w\t{%2, %1, %0|%0, %1, %2}"
3540 [(set_attr "isa" "noavx,noavx,avx")
3541 (set_attr "type" "sseiadd")
3542 (set_attr "prefix_extra" "1")
3543 (set_attr "prefix" "orig,orig,vex")
3544 (set_attr "mode" "TI")])
3545
3546 (define_insn "ssse3_abs<mode>2"
3547 [(set (match_operand:MMXMODEI 0 "register_operand" "=y,Yv")
3548 (abs:MMXMODEI
3549 (match_operand:MMXMODEI 1 "register_mmxmem_operand" "ym,Yv")))]
3550 "(TARGET_MMX || TARGET_MMX_WITH_SSE) && TARGET_SSSE3"
3551 "@
3552 pabs<mmxvecsize>\t{%1, %0|%0, %1}
3553 %vpabs<mmxvecsize>\t{%1, %0|%0, %1}"
3554 [(set_attr "mmx_isa" "native,*")
3555 (set_attr "type" "sselog1")
3556 (set_attr "prefix_rep" "0")
3557 (set_attr "prefix_extra" "1")
3558 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
3559 (set_attr "mode" "DI,TI")])
3560
3561 (define_expand "abs<mode>2"
3562 [(set (match_operand:MMXMODEI 0 "register_operand")
3563 (abs:MMXMODEI
3564 (match_operand:MMXMODEI 1 "register_operand")))]
3565 "TARGET_SSSE3 && TARGET_MMX_WITH_SSE")
3566
3567 (define_insn "abs<mode>2"
3568 [(set (match_operand:VI_16_32 0 "register_operand" "=Yv")
3569 (abs:VI_16_32
3570 (match_operand:VI_16_32 1 "register_operand" "Yv")))]
3571 "TARGET_SSSE3"
3572 "%vpabs<mmxvecsize>\t{%1, %0|%0, %1}"
3573 [(set_attr "type" "sselog1")
3574 (set_attr "prefix_rep" "0")
3575 (set_attr "prefix_extra" "1")
3576 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
3577 (set_attr "mode" "TI")])
3578
3579 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3580 ;;
3581 ;; Parallel integral shifts
3582 ;;
3583 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3584
3585 (define_insn "mmx_ashr<mode>3"
3586 [(set (match_operand:MMXMODE24 0 "register_operand" "=y,x,<Yv_Yw>")
3587 (ashiftrt:MMXMODE24
3588 (match_operand:MMXMODE24 1 "register_operand" "0,0,<Yv_Yw>")
3589 (match_operand:DI 2 "nonmemory_operand" "yN,xN,<Yv_Yw>N")))]
3590 "TARGET_MMX || TARGET_MMX_WITH_SSE"
3591 "@
3592 psra<mmxvecsize>\t{%2, %0|%0, %2}
3593 psra<mmxvecsize>\t{%2, %0|%0, %2}
3594 vpsra<mmxvecsize>\t{%2, %1, %0|%0, %1, %2}"
3595 [(set_attr "isa" "*,sse2_noavx,avx")
3596 (set_attr "mmx_isa" "native,*,*")
3597 (set_attr "type" "mmxshft,sseishft,sseishft")
3598 (set (attr "length_immediate")
3599 (if_then_else (match_operand 2 "const_int_operand")
3600 (const_string "1")
3601 (const_string "0")))
3602 (set_attr "mode" "DI,TI,TI")])
3603
3604 (define_expand "ashr<mode>3"
3605 [(set (match_operand:MMXMODE24 0 "register_operand")
3606 (ashiftrt:MMXMODE24
3607 (match_operand:MMXMODE24 1 "register_operand")
3608 (match_operand:DI 2 "nonmemory_operand")))]
3609 "TARGET_MMX_WITH_SSE")
3610
3611 (define_insn "mmx_<insn><mode>3"
3612 [(set (match_operand:MMXMODE248 0 "register_operand" "=y,x,<Yv_Yw>")
3613 (any_lshift:MMXMODE248
3614 (match_operand:MMXMODE248 1 "register_operand" "0,0,<Yv_Yw>")
3615 (match_operand:DI 2 "nonmemory_operand" "yN,xN,<Yv_Yw>N")))]
3616 "TARGET_MMX || TARGET_MMX_WITH_SSE"
3617 "@
3618 p<vshift><mmxvecsize>\t{%2, %0|%0, %2}
3619 p<vshift><mmxvecsize>\t{%2, %0|%0, %2}
3620 vp<vshift><mmxvecsize>\t{%2, %1, %0|%0, %1, %2}"
3621 [(set_attr "isa" "*,sse2_noavx,avx")
3622 (set_attr "mmx_isa" "native,*,*")
3623 (set_attr "type" "mmxshft,sseishft,sseishft")
3624 (set (attr "length_immediate")
3625 (if_then_else (match_operand 2 "const_int_operand")
3626 (const_string "1")
3627 (const_string "0")))
3628 (set_attr "mode" "DI,TI,TI")])
3629
3630 (define_expand "<insn><mode>3"
3631 [(set (match_operand:MMXMODE24 0 "register_operand")
3632 (any_lshift:MMXMODE24
3633 (match_operand:MMXMODE24 1 "register_operand")
3634 (match_operand:DI 2 "nonmemory_operand")))]
3635 "TARGET_MMX_WITH_SSE")
3636
3637 (define_insn "mmx_<insn>v1si3"
3638 [(set (match_operand:V1SI 0 "register_operand" "=x,Yw")
3639 (any_lshift:V1SI
3640 (match_operand:V1SI 1 "register_operand" "0,Yw")
3641 (match_operand:DI 2 "nonmemory_operand" "xN,YwN")))]
3642 "TARGET_SSE2"
3643 "@
3644 p<vshift>d\t{%2, %0|%0, %2}
3645 vp<vshift>d\t{%2, %1, %0|%0, %1, %2}"
3646 [(set_attr "isa" "noavx,avx")
3647 (set_attr "type" "sseishft")
3648 (set (attr "length_immediate")
3649 (if_then_else (match_operand 2 "const_int_operand")
3650 (const_string "1")
3651 (const_string "0")))
3652 (set_attr "mode" "TI")])
3653
3654 (define_insn "<insn>v2hi3"
3655 [(set (match_operand:V2HI 0 "register_operand" "=x,Yw")
3656 (any_shift:V2HI
3657 (match_operand:V2HI 1 "register_operand" "0,Yw")
3658 (match_operand:DI 2 "nonmemory_operand" "xN,YwN")))]
3659 "TARGET_SSE2"
3660 "@
3661 p<vshift>w\t{%2, %0|%0, %2}
3662 vp<vshift>w\t{%2, %1, %0|%0, %1, %2}"
3663 [(set_attr "isa" "noavx,avx")
3664 (set_attr "type" "sseishft")
3665 (set (attr "length_immediate")
3666 (if_then_else (match_operand 2 "const_int_operand")
3667 (const_string "1")
3668 (const_string "0")))
3669 (set_attr "mode" "TI")])
3670
3671 (define_expand "<insn>v8qi3"
3672 [(set (match_operand:V8QI 0 "register_operand")
3673 (any_shift:V8QI (match_operand:V8QI 1 "register_operand")
3674 (match_operand:DI 2 "nonmemory_operand")))]
3675 "TARGET_MMX_WITH_SSE"
3676 {
3677 ix86_expand_vecop_qihi_partial (<CODE>, operands[0],
3678 operands[1], operands[2]);
3679 DONE;
3680 })
3681
3682 (define_expand "<insn>v4qi3"
3683 [(set (match_operand:V4QI 0 "register_operand")
3684 (any_shift:V4QI (match_operand:V4QI 1 "register_operand")
3685 (match_operand:DI 2 "nonmemory_operand")))]
3686 "TARGET_SSE2"
3687 {
3688 ix86_expand_vecop_qihi_partial (<CODE>, operands[0],
3689 operands[1], operands[2]);
3690 DONE;
3691 })
3692
3693 (define_insn_and_split "<insn>v2qi3"
3694 [(set (match_operand:V2QI 0 "register_operand" "=Q")
3695 (any_shift:V2QI
3696 (match_operand:V2QI 1 "register_operand" "0")
3697 (match_operand:QI 2 "nonmemory_operand" "cI")))
3698 (clobber (reg:CC FLAGS_REG))]
3699 "!TARGET_PARTIAL_REG_STALL || optimize_size"
3700 "#"
3701 "(!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))
3702 && reload_completed"
3703 [(parallel
3704 [(set (zero_extract:HI (match_dup 3) (const_int 8) (const_int 8))
3705 (subreg:HI
3706 (any_shift:QI
3707 (subreg:QI
3708 (zero_extract:HI (match_dup 4)
3709 (const_int 8)
3710 (const_int 8)) 0)
3711 (match_dup 2)) 0))
3712 (clobber (reg:CC FLAGS_REG))])
3713 (parallel
3714 [(set (strict_low_part (match_dup 0))
3715 (any_shift:QI (match_dup 1) (match_dup 2)))
3716 (clobber (reg:CC FLAGS_REG))])]
3717 {
3718 operands[4] = lowpart_subreg (HImode, operands[1], V2QImode);
3719 operands[3] = lowpart_subreg (HImode, operands[0], V2QImode);
3720 operands[1] = lowpart_subreg (QImode, operands[1], V2QImode);
3721 operands[0] = lowpart_subreg (QImode, operands[0], V2QImode);
3722 }
3723 [(set_attr "type" "multi")
3724 (set_attr "mode" "QI")])
3725
3726 (define_expand "v<insn>v8qi3"
3727 [(set (match_operand:V8QI 0 "register_operand")
3728 (any_shift:V8QI
3729 (match_operand:V8QI 1 "register_operand")
3730 (match_operand:V8QI 2 "register_operand")))]
3731 "TARGET_AVX512BW && TARGET_AVX512VL && TARGET_MMX_WITH_SSE"
3732 {
3733 ix86_expand_vecop_qihi_partial (<CODE>, operands[0],
3734 operands[1], operands[2]);
3735 DONE;
3736 })
3737
3738 (define_expand "v<insn>v4qi3"
3739 [(set (match_operand:V4QI 0 "register_operand")
3740 (any_shift:V4QI
3741 (match_operand:V4QI 1 "register_operand")
3742 (match_operand:V4QI 2 "register_operand")))]
3743 "TARGET_AVX512BW && TARGET_AVX512VL"
3744 {
3745 ix86_expand_vecop_qihi_partial (<CODE>, operands[0],
3746 operands[1], operands[2]);
3747 DONE;
3748 })
3749
3750 (define_expand "vec_shl_<mode>"
3751 [(set (match_operand:V248FI 0 "register_operand")
3752 (ashift:V1DI
3753 (match_operand:V248FI 1 "nonimmediate_operand")
3754 (match_operand:DI 2 "nonmemory_operand")))]
3755 "TARGET_MMX_WITH_SSE"
3756 {
3757 rtx op0 = gen_reg_rtx (V1DImode);
3758 rtx op1 = force_reg (<MODE>mode, operands[1]);
3759
3760 emit_insn (gen_mmx_ashlv1di3
3761 (op0, gen_lowpart (V1DImode, op1), operands[2]));
3762 emit_move_insn (operands[0], gen_lowpart (<MODE>mode, op0));
3763 DONE;
3764 })
3765
3766 (define_expand "vec_shl_<mode>"
3767 [(set (match_operand:V24FI_32 0 "register_operand")
3768 (ashift:V1SI
3769 (match_operand:V24FI_32 1 "nonimmediate_operand")
3770 (match_operand:DI 2 "nonmemory_operand")))]
3771 "TARGET_SSE2"
3772 {
3773 rtx op0 = gen_reg_rtx (V1SImode);
3774 rtx op1 = force_reg (<MODE>mode, operands[1]);
3775
3776 emit_insn (gen_mmx_ashlv1si3
3777 (op0, gen_lowpart (V1SImode, op1), operands[2]));
3778 emit_move_insn (operands[0], gen_lowpart (<MODE>mode, op0));
3779 DONE;
3780 })
3781
3782 (define_expand "vec_shr_<mode>"
3783 [(set (match_operand:V248FI 0 "register_operand")
3784 (lshiftrt:V1DI
3785 (match_operand:V248FI 1 "nonimmediate_operand")
3786 (match_operand:DI 2 "nonmemory_operand")))]
3787 "TARGET_MMX_WITH_SSE"
3788 {
3789 rtx op0 = gen_reg_rtx (V1DImode);
3790 rtx op1 = force_reg (<MODE>mode, operands[1]);
3791
3792 emit_insn (gen_mmx_lshrv1di3
3793 (op0, gen_lowpart (V1DImode, op1), operands[2]));
3794 emit_move_insn (operands[0], gen_lowpart (<MODE>mode, op0));
3795 DONE;
3796 })
3797
3798 (define_expand "vec_shr_<mode>"
3799 [(set (match_operand:V24FI_32 0 "register_operand")
3800 (lshiftrt:V1SI
3801 (match_operand:V24FI_32 1 "nonimmediate_operand")
3802 (match_operand:DI 2 "nonmemory_operand")))]
3803 "TARGET_SSE2"
3804 {
3805 rtx op0 = gen_reg_rtx (V1SImode);
3806 rtx op1 = force_reg (<MODE>mode, operands[1]);
3807
3808 emit_insn (gen_mmx_lshrv1si3
3809 (op0, gen_lowpart (V1SImode, op1), operands[2]));
3810 emit_move_insn (operands[0], gen_lowpart (<MODE>mode, op0));
3811 DONE;
3812 })
3813
3814 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3815 ;;
3816 ;; Parallel integral comparisons
3817 ;;
3818 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3819
3820 (define_expand "mmx_eq<mode>3"
3821 [(set (match_operand:MMXMODEI 0 "register_operand")
3822 (eq:MMXMODEI
3823 (match_operand:MMXMODEI 1 "register_mmxmem_operand")
3824 (match_operand:MMXMODEI 2 "register_mmxmem_operand")))]
3825 "TARGET_MMX || TARGET_MMX_WITH_SSE"
3826 "ix86_fixup_binary_operands_no_copy (EQ, <MODE>mode, operands);")
3827
3828 (define_insn "*mmx_eq<mode>3"
3829 [(set (match_operand:MMXMODEI 0 "register_operand" "=y,x,x")
3830 (eq:MMXMODEI
3831 (match_operand:MMXMODEI 1 "register_mmxmem_operand" "%0,0,x")
3832 (match_operand:MMXMODEI 2 "register_mmxmem_operand" "ym,x,x")))]
3833 "(TARGET_MMX || TARGET_MMX_WITH_SSE)
3834 && ix86_binary_operator_ok (EQ, <MODE>mode, operands)"
3835 "@
3836 pcmpeq<mmxvecsize>\t{%2, %0|%0, %2}
3837 pcmpeq<mmxvecsize>\t{%2, %0|%0, %2}
3838 vpcmpeq<mmxvecsize>\t{%2, %1, %0|%0, %1, %2}"
3839 [(set_attr "isa" "*,sse2_noavx,avx")
3840 (set_attr "mmx_isa" "native,*,*")
3841 (set_attr "type" "mmxcmp,ssecmp,ssecmp")
3842 (set_attr "mode" "DI,TI,TI")])
3843
3844 (define_insn "*eq<mode>3"
3845 [(set (match_operand:VI_16_32 0 "register_operand" "=x,x")
3846 (eq:VI_16_32
3847 (match_operand:VI_16_32 1 "register_operand" "%0,x")
3848 (match_operand:VI_16_32 2 "register_operand" "x,x")))]
3849 "TARGET_SSE2"
3850 "@
3851 pcmpeq<mmxvecsize>\t{%2, %0|%0, %2}
3852 vpcmpeq<mmxvecsize>\t{%2, %1, %0|%0, %1, %2}"
3853 [(set_attr "isa" "noavx,avx")
3854 (set_attr "type" "ssecmp")
3855 (set_attr "mode" "TI")])
3856
3857 (define_insn "mmx_gt<mode>3"
3858 [(set (match_operand:MMXMODEI 0 "register_operand" "=y,x,x")
3859 (gt:MMXMODEI
3860 (match_operand:MMXMODEI 1 "register_operand" "0,0,x")
3861 (match_operand:MMXMODEI 2 "register_mmxmem_operand" "ym,x,x")))]
3862 "TARGET_MMX || TARGET_MMX_WITH_SSE"
3863 "@
3864 pcmpgt<mmxvecsize>\t{%2, %0|%0, %2}
3865 pcmpgt<mmxvecsize>\t{%2, %0|%0, %2}
3866 vpcmpgt<mmxvecsize>\t{%2, %1, %0|%0, %1, %2}"
3867 [(set_attr "isa" "*,sse2_noavx,avx")
3868 (set_attr "mmx_isa" "native,*,*")
3869 (set_attr "type" "mmxcmp,ssecmp,ssecmp")
3870 (set_attr "mode" "DI,TI,TI")])
3871
3872 (define_insn "*gt<mode>3"
3873 [(set (match_operand:VI_16_32 0 "register_operand" "=x,x")
3874 (gt:VI_16_32
3875 (match_operand:VI_16_32 1 "register_operand" "0,x")
3876 (match_operand:VI_16_32 2 "register_operand" "x,x")))]
3877 "TARGET_SSE2"
3878 "@
3879 pcmpgt<mmxvecsize>\t{%2, %0|%0, %2}
3880 vpcmpgt<mmxvecsize>\t{%2, %1, %0|%0, %1, %2}"
3881 [(set_attr "isa" "noavx,avx")
3882 (set_attr "type" "ssecmp")
3883 (set_attr "mode" "TI")])
3884
3885 (define_insn "*xop_maskcmp<mode>3"
3886 [(set (match_operand:MMXMODEI 0 "register_operand" "=x")
3887 (match_operator:MMXMODEI 1 "ix86_comparison_int_operator"
3888 [(match_operand:MMXMODEI 2 "register_operand" "x")
3889 (match_operand:MMXMODEI 3 "register_operand" "x")]))]
3890 "TARGET_XOP"
3891 "vpcom%Y1<mmxvecsize>\t{%3, %2, %0|%0, %2, %3}"
3892 [(set_attr "type" "sse4arg")
3893 (set_attr "mode" "TI")])
3894
3895 (define_insn "*xop_maskcmp<mode>3"
3896 [(set (match_operand:VI_16_32 0 "register_operand" "=x")
3897 (match_operator:VI_16_32 1 "ix86_comparison_int_operator"
3898 [(match_operand:VI_16_32 2 "register_operand" "x")
3899 (match_operand:VI_16_32 3 "register_operand" "x")]))]
3900 "TARGET_XOP"
3901 "vpcom%Y1<mmxvecsize>\t{%3, %2, %0|%0, %2, %3}"
3902 [(set_attr "type" "sse4arg")
3903 (set_attr "mode" "TI")])
3904
3905 (define_insn "*xop_maskcmp_uns<mode>3"
3906 [(set (match_operand:MMXMODEI 0 "register_operand" "=x")
3907 (match_operator:MMXMODEI 1 "ix86_comparison_uns_operator"
3908 [(match_operand:MMXMODEI 2 "register_operand" "x")
3909 (match_operand:MMXMODEI 3 "register_operand" "x")]))]
3910 "TARGET_XOP"
3911 "vpcom%Y1u<mmxvecsize>\t{%3, %2, %0|%0, %2, %3}"
3912 [(set_attr "type" "sse4arg")
3913 (set_attr "mode" "TI")])
3914
3915 (define_insn "*xop_maskcmp_uns<mode>3"
3916 [(set (match_operand:VI_16_32 0 "register_operand" "=x")
3917 (match_operator:VI_16_32 1 "ix86_comparison_uns_operator"
3918 [(match_operand:VI_16_32 2 "register_operand" "x")
3919 (match_operand:VI_16_32 3 "register_operand" "x")]))]
3920 "TARGET_XOP"
3921 "vpcom%Y1u<mmxvecsize>\t{%3, %2, %0|%0, %2, %3}"
3922 [(set_attr "type" "sse4arg")
3923 (set_attr "mode" "TI")])
3924
3925 (define_expand "vec_cmp<mode><mode>"
3926 [(set (match_operand:MMXMODEI 0 "register_operand")
3927 (match_operator:MMXMODEI 1 ""
3928 [(match_operand:MMXMODEI 2 "register_operand")
3929 (match_operand:MMXMODEI 3 "register_operand")]))]
3930 "TARGET_MMX_WITH_SSE"
3931 {
3932 bool ok = ix86_expand_int_vec_cmp (operands);
3933 gcc_assert (ok);
3934 DONE;
3935 })
3936
3937 (define_expand "vec_cmp<mode><mode>"
3938 [(set (match_operand:VI_16_32 0 "register_operand")
3939 (match_operator:VI_16_32 1 ""
3940 [(match_operand:VI_16_32 2 "register_operand")
3941 (match_operand:VI_16_32 3 "register_operand")]))]
3942 "TARGET_SSE2"
3943 {
3944 bool ok = ix86_expand_int_vec_cmp (operands);
3945 gcc_assert (ok);
3946 DONE;
3947 })
3948
3949 (define_expand "vec_cmpu<mode><mode>"
3950 [(set (match_operand:MMXMODEI 0 "register_operand")
3951 (match_operator:MMXMODEI 1 ""
3952 [(match_operand:MMXMODEI 2 "register_operand")
3953 (match_operand:MMXMODEI 3 "register_operand")]))]
3954 "TARGET_MMX_WITH_SSE"
3955 {
3956 bool ok = ix86_expand_int_vec_cmp (operands);
3957 gcc_assert (ok);
3958 DONE;
3959 })
3960
3961 (define_expand "vec_cmpu<mode><mode>"
3962 [(set (match_operand:VI_16_32 0 "register_operand")
3963 (match_operator:VI_16_32 1 ""
3964 [(match_operand:VI_16_32 2 "register_operand")
3965 (match_operand:VI_16_32 3 "register_operand")]))]
3966 "TARGET_SSE2"
3967 {
3968 bool ok = ix86_expand_int_vec_cmp (operands);
3969 gcc_assert (ok);
3970 DONE;
3971 })
3972
3973 (define_expand "vcond<MMXMODE124:mode><MMXMODEI:mode>"
3974 [(set (match_operand:MMXMODE124 0 "register_operand")
3975 (if_then_else:MMXMODE124
3976 (match_operator 3 ""
3977 [(match_operand:MMXMODEI 4 "register_operand")
3978 (match_operand:MMXMODEI 5 "register_operand")])
3979 (match_operand:MMXMODE124 1)
3980 (match_operand:MMXMODE124 2)))]
3981 "TARGET_MMX_WITH_SSE
3982 && (GET_MODE_NUNITS (<MMXMODE124:MODE>mode)
3983 == GET_MODE_NUNITS (<MMXMODEI:MODE>mode))"
3984 {
3985 bool ok = ix86_expand_int_vcond (operands);
3986 gcc_assert (ok);
3987 DONE;
3988 })
3989
3990 (define_expand "vcond<mode><mode>"
3991 [(set (match_operand:VI_16_32 0 "register_operand")
3992 (if_then_else:VI_16_32
3993 (match_operator 3 ""
3994 [(match_operand:VI_16_32 4 "register_operand")
3995 (match_operand:VI_16_32 5 "register_operand")])
3996 (match_operand:VI_16_32 1)
3997 (match_operand:VI_16_32 2)))]
3998 "TARGET_SSE2"
3999 {
4000 bool ok = ix86_expand_int_vcond (operands);
4001 gcc_assert (ok);
4002 DONE;
4003 })
4004
4005 (define_expand "vcondu<MMXMODE124:mode><MMXMODEI:mode>"
4006 [(set (match_operand:MMXMODE124 0 "register_operand")
4007 (if_then_else:MMXMODE124
4008 (match_operator 3 ""
4009 [(match_operand:MMXMODEI 4 "register_operand")
4010 (match_operand:MMXMODEI 5 "register_operand")])
4011 (match_operand:MMXMODE124 1)
4012 (match_operand:MMXMODE124 2)))]
4013 "TARGET_MMX_WITH_SSE
4014 && (GET_MODE_NUNITS (<MMXMODE124:MODE>mode)
4015 == GET_MODE_NUNITS (<MMXMODEI:MODE>mode))"
4016 {
4017 bool ok = ix86_expand_int_vcond (operands);
4018 gcc_assert (ok);
4019 DONE;
4020 })
4021
4022 (define_expand "vcondu<mode><mode>"
4023 [(set (match_operand:VI_16_32 0 "register_operand")
4024 (if_then_else:VI_16_32
4025 (match_operator 3 ""
4026 [(match_operand:VI_16_32 4 "register_operand")
4027 (match_operand:VI_16_32 5 "register_operand")])
4028 (match_operand:VI_16_32 1)
4029 (match_operand:VI_16_32 2)))]
4030 "TARGET_SSE2"
4031 {
4032 bool ok = ix86_expand_int_vcond (operands);
4033 gcc_assert (ok);
4034 DONE;
4035 })
4036
4037 (define_expand "vcond_mask_<mode><mmxintvecmodelower>"
4038 [(set (match_operand:MMXMODE124 0 "register_operand")
4039 (vec_merge:MMXMODE124
4040 (match_operand:MMXMODE124 1 "register_operand")
4041 (match_operand:MMXMODE124 2 "register_operand")
4042 (match_operand:<mmxintvecmode> 3 "register_operand")))]
4043 "TARGET_MMX_WITH_SSE"
4044 {
4045 ix86_expand_sse_movcc (operands[0], operands[3],
4046 operands[1], operands[2]);
4047 DONE;
4048 })
4049
4050 (define_expand "vcond_mask_<mode><mode>"
4051 [(set (match_operand:VI_16_32 0 "register_operand")
4052 (vec_merge:VI_16_32
4053 (match_operand:VI_16_32 1 "register_operand")
4054 (match_operand:VI_16_32 2 "register_operand")
4055 (match_operand:VI_16_32 3 "register_operand")))]
4056 "TARGET_SSE2"
4057 {
4058 ix86_expand_sse_movcc (operands[0], operands[3],
4059 operands[1], operands[2]);
4060 DONE;
4061 })
4062
4063 (define_insn "mmx_pblendvb_v8qi"
4064 [(set (match_operand:V8QI 0 "register_operand" "=Yr,*x,x")
4065 (unspec:V8QI
4066 [(match_operand:V8QI 1 "register_operand" "0,0,x")
4067 (match_operand:V8QI 2 "register_operand" "Yr,*x,x")
4068 (match_operand:V8QI 3 "register_operand" "Yz,Yz,x")]
4069 UNSPEC_BLENDV))]
4070 "TARGET_SSE4_1 && TARGET_MMX_WITH_SSE"
4071 "@
4072 pblendvb\t{%3, %2, %0|%0, %2, %3}
4073 pblendvb\t{%3, %2, %0|%0, %2, %3}
4074 vpblendvb\t{%3, %2, %1, %0|%0, %1, %2, %3}"
4075 [(set_attr "isa" "noavx,noavx,avx")
4076 (set_attr "type" "ssemov")
4077 (set_attr "prefix_extra" "1")
4078 (set_attr "length_immediate" "1")
4079 (set_attr "prefix" "orig,orig,vex")
4080 (set_attr "btver2_decode" "vector")
4081 (set_attr "mode" "TI")])
4082
4083 (define_insn_and_split "*mmx_pblendvb_v8qi_1"
4084 [(set (match_operand:V8QI 0 "register_operand")
4085 (unspec:V8QI
4086 [(match_operand:V8QI 1 "register_operand")
4087 (match_operand:V8QI 2 "register_operand")
4088 (eq:V8QI
4089 (eq:V8QI
4090 (match_operand:V8QI 3 "register_operand")
4091 (match_operand:V8QI 4 "nonmemory_operand"))
4092 (match_operand:V8QI 5 "const0_operand"))]
4093 UNSPEC_BLENDV))]
4094 "TARGET_MMX_WITH_SSE && ix86_pre_reload_split ()"
4095 "#"
4096 "&& 1"
4097 [(set (match_dup 6)
4098 (eq:V8QI (match_dup 3) (match_dup 7)))
4099 (set (match_dup 0)
4100 (unspec:V8QI
4101 [(match_dup 2)
4102 (match_dup 1)
4103 (match_dup 6)]
4104 UNSPEC_BLENDV))]
4105 {
4106 operands[6] = gen_reg_rtx (V8QImode);
4107 operands[7] = force_reg (V8QImode, operands[4]);
4108 })
4109
4110 (define_insn_and_split "*mmx_pblendvb_v8qi_2"
4111 [(set (match_operand:V8QI 0 "register_operand")
4112 (unspec:V8QI
4113 [(match_operand:V8QI 1 "register_operand")
4114 (match_operand:V8QI 2 "register_operand")
4115 (subreg:V8QI
4116 (eq:MMXMODE24
4117 (eq:MMXMODE24
4118 (match_operand:MMXMODE24 3 "register_operand")
4119 (match_operand:MMXMODE24 4 "nonmemory_operand"))
4120 (match_operand:MMXMODE24 5 "const0_operand")) 0)]
4121 UNSPEC_BLENDV))]
4122 "TARGET_MMX_WITH_SSE && ix86_pre_reload_split ()"
4123 "#"
4124 "&& 1"
4125 [(set (match_dup 6)
4126 (eq:MMXMODE24 (match_dup 3) (match_dup 8)))
4127 (set (match_dup 0)
4128 (unspec:V8QI
4129 [(match_dup 2)
4130 (match_dup 1)
4131 (match_dup 7)]
4132 UNSPEC_BLENDV))]
4133 {
4134 operands[6] = gen_reg_rtx (<MODE>mode);
4135 operands[7] = lowpart_subreg (V8QImode, operands[6], <MODE>mode);
4136 operands[8] = force_reg (<MODE>mode, operands[4]);
4137 })
4138
4139 (define_insn "mmx_pblendvb_<mode>"
4140 [(set (match_operand:VI_16_32 0 "register_operand" "=Yr,*x,x")
4141 (unspec:VI_16_32
4142 [(match_operand:VI_16_32 1 "register_operand" "0,0,x")
4143 (match_operand:VI_16_32 2 "register_operand" "Yr,*x,x")
4144 (match_operand:VI_16_32 3 "register_operand" "Yz,Yz,x")]
4145 UNSPEC_BLENDV))]
4146 "TARGET_SSE4_1"
4147 "@
4148 pblendvb\t{%3, %2, %0|%0, %2, %3}
4149 pblendvb\t{%3, %2, %0|%0, %2, %3}
4150 vpblendvb\t{%3, %2, %1, %0|%0, %1, %2, %3}"
4151 [(set_attr "isa" "noavx,noavx,avx")
4152 (set_attr "type" "ssemov")
4153 (set_attr "prefix_extra" "1")
4154 (set_attr "length_immediate" "1")
4155 (set_attr "prefix" "orig,orig,vex")
4156 (set_attr "btver2_decode" "vector")
4157 (set_attr "mode" "TI")])
4158
4159 (define_insn_and_split "*mmx_pblendvb_<mode>_1"
4160 [(set (match_operand:VI_16_32 0 "register_operand")
4161 (unspec:VI_16_32
4162 [(match_operand:VI_16_32 1 "register_operand")
4163 (match_operand:VI_16_32 2 "register_operand")
4164 (eq:VI_16_32
4165 (eq:VI_16_32
4166 (match_operand:VI_16_32 3 "register_operand")
4167 (match_operand:VI_16_32 4 "nonmemory_operand"))
4168 (match_operand:VI_16_32 5 "const0_operand"))]
4169 UNSPEC_BLENDV))]
4170 "TARGET_SSE2 && ix86_pre_reload_split ()"
4171 "#"
4172 "&& 1"
4173 [(set (match_dup 6)
4174 (eq:VI_16_32 (match_dup 3) (match_dup 7)))
4175 (set (match_dup 0)
4176 (unspec:VI_16_32
4177 [(match_dup 2)
4178 (match_dup 1)
4179 (match_dup 6)]
4180 UNSPEC_BLENDV))]
4181 {
4182 operands[6] = gen_reg_rtx (<MODE>mode);
4183 operands[7] = force_reg (<MODE>mode, operands[4]);
4184 })
4185
4186 (define_insn_and_split "*mmx_pblendvb_v4qi_2"
4187 [(set (match_operand:V4QI 0 "register_operand")
4188 (unspec:V4QI
4189 [(match_operand:V4QI 1 "register_operand")
4190 (match_operand:V4QI 2 "register_operand")
4191 (subreg:V4QI
4192 (eq:V2HI
4193 (eq:V2HI
4194 (match_operand:V2HI 3 "register_operand")
4195 (match_operand:V2HI 4 "nonmemory_operand"))
4196 (match_operand:V2HI 5 "const0_operand")) 0)]
4197 UNSPEC_BLENDV))]
4198 "TARGET_SSE2 && ix86_pre_reload_split ()"
4199 "#"
4200 "&& 1"
4201 [(set (match_dup 6)
4202 (eq:V2HI (match_dup 3) (match_dup 8)))
4203 (set (match_dup 0)
4204 (unspec:V4QI
4205 [(match_dup 2)
4206 (match_dup 1)
4207 (match_dup 7)]
4208 UNSPEC_BLENDV))]
4209 {
4210 operands[6] = gen_reg_rtx (V2HImode);
4211 operands[7] = lowpart_subreg (V4QImode, operands[6], V2HImode);
4212 operands[8] = force_reg (V2HImode, operands[4]);
4213 })
4214
4215 ;; XOP parallel XMM conditional moves
4216 (define_insn "*xop_pcmov_<mode>"
4217 [(set (match_operand:MMXMODE124 0 "register_operand" "=x")
4218 (if_then_else:MMXMODE124
4219 (match_operand:MMXMODE124 3 "register_operand" "x")
4220 (match_operand:MMXMODE124 1 "register_operand" "x")
4221 (match_operand:MMXMODE124 2 "register_operand" "x")))]
4222 "TARGET_XOP && TARGET_MMX_WITH_SSE"
4223 "vpcmov\t{%3, %2, %1, %0|%0, %1, %2, %3}"
4224 [(set_attr "type" "sse4arg")
4225 (set_attr "mode" "TI")])
4226
4227 (define_insn "*xop_pcmov_<mode>"
4228 [(set (match_operand:V4F_64 0 "register_operand" "=x")
4229 (if_then_else:V4F_64
4230 (match_operand:V4F_64 3 "register_operand" "x")
4231 (match_operand:V4F_64 1 "register_operand" "x")
4232 (match_operand:V4F_64 2 "register_operand" "x")))]
4233 "TARGET_XOP && TARGET_MMX_WITH_SSE"
4234 "vpcmov\t{%3, %2, %1, %0|%0, %1, %2, %3}"
4235 [(set_attr "type" "sse4arg")
4236 (set_attr "mode" "TI")])
4237
4238 (define_insn "*xop_pcmov_<mode>"
4239 [(set (match_operand:VI_16_32 0 "register_operand" "=x")
4240 (if_then_else:VI_16_32
4241 (match_operand:VI_16_32 3 "register_operand" "x")
4242 (match_operand:VI_16_32 1 "register_operand" "x")
4243 (match_operand:VI_16_32 2 "register_operand" "x")))]
4244 "TARGET_XOP"
4245 "vpcmov\t{%3, %2, %1, %0|%0, %1, %2, %3}"
4246 [(set_attr "type" "sse4arg")
4247 (set_attr "mode" "TI")])
4248
4249 (define_insn "*xop_pcmov_<mode>"
4250 [(set (match_operand:V2F_32 0 "register_operand" "=x")
4251 (if_then_else:V2F_32
4252 (match_operand:V2F_32 3 "register_operand" "x")
4253 (match_operand:V2F_32 1 "register_operand" "x")
4254 (match_operand:V2F_32 2 "register_operand" "x")))]
4255 "TARGET_XOP"
4256 "vpcmov\t{%3, %2, %1, %0|%0, %1, %2, %3}"
4257 [(set_attr "type" "sse4arg")
4258 (set_attr "mode" "TI")])
4259
4260 ;; XOP permute instructions
4261 (define_insn "mmx_ppermv64"
4262 [(set (match_operand:V8QI 0 "register_operand" "=x")
4263 (unspec:V8QI
4264 [(match_operand:V8QI 1 "register_operand" "x")
4265 (match_operand:V8QI 2 "register_operand" "x")
4266 (match_operand:V16QI 3 "nonimmediate_operand" "xm")]
4267 UNSPEC_XOP_PERMUTE))]
4268 "TARGET_XOP && TARGET_MMX_WITH_SSE"
4269 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
4270 [(set_attr "type" "sse4arg")
4271 (set_attr "mode" "TI")])
4272
4273 (define_insn "mmx_ppermv32"
4274 [(set (match_operand:V4QI 0 "register_operand" "=x")
4275 (unspec:V4QI
4276 [(match_operand:V4QI 1 "register_operand" "x")
4277 (match_operand:V4QI 2 "register_operand" "x")
4278 (match_operand:V16QI 3 "nonimmediate_operand" "xm")]
4279 UNSPEC_XOP_PERMUTE))]
4280 "TARGET_XOP"
4281 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
4282 [(set_attr "type" "sse4arg")
4283 (set_attr "mode" "TI")])
4284
4285 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
4286 ;;
4287 ;; Parallel integral logical operations
4288 ;;
4289 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
4290
4291 (define_expand "one_cmpl<mode>2"
4292 [(set (match_operand:MMXMODEI 0 "register_operand")
4293 (xor:MMXMODEI
4294 (match_operand:MMXMODEI 1 "register_operand")
4295 (match_dup 2)))]
4296 "TARGET_MMX_WITH_SSE"
4297 "operands[2] = force_reg (<MODE>mode, CONSTM1_RTX (<MODE>mode));")
4298
4299 (define_insn "one_cmpl<mode>2"
4300 [(set (match_operand:VI_16_32 0 "register_operand" "=?r,&x,&v")
4301 (not:VI_16_32
4302 (match_operand:VI_16_32 1 "register_operand" "0,x,v")))]
4303 ""
4304 "#"
4305 [(set_attr "isa" "*,sse2,avx512vl")
4306 (set_attr "type" "negnot,sselog1,sselog1")
4307 (set_attr "mode" "SI,TI,TI")])
4308
4309 (define_split
4310 [(set (match_operand:VI_16_32 0 "general_reg_operand")
4311 (not:VI_16_32
4312 (match_operand:VI_16_32 1 "general_reg_operand")))]
4313 "reload_completed"
4314 [(set (match_dup 0)
4315 (not:SI (match_dup 1)))]
4316 {
4317 operands[1] = lowpart_subreg (SImode, operands[1], <MODE>mode);
4318 operands[0] = lowpart_subreg (SImode, operands[0], <MODE>mode);
4319 })
4320
4321 (define_split
4322 [(set (match_operand:VI_16_32 0 "sse_reg_operand")
4323 (not:VI_16_32
4324 (match_operand:VI_16_32 1 "sse_reg_operand")))]
4325 "TARGET_SSE2 && reload_completed"
4326 [(set (match_dup 0) (match_dup 2))
4327 (set (match_dup 0)
4328 (xor:V16QI
4329 (match_dup 0) (match_dup 1)))]
4330 {
4331 operands[2] = CONSTM1_RTX (V16QImode);
4332 operands[1] = lowpart_subreg (V16QImode, operands[1], <MODE>mode);
4333 operands[0] = lowpart_subreg (V16QImode, operands[0], <MODE>mode);
4334 })
4335
4336 (define_insn "mmx_andnot<mode>3"
4337 [(set (match_operand:MMXMODEI 0 "register_operand" "=y,x,x,v")
4338 (and:MMXMODEI
4339 (not:MMXMODEI (match_operand:MMXMODEI 1 "register_operand" "0,0,x,v"))
4340 (match_operand:MMXMODEI 2 "register_mmxmem_operand" "ym,x,x,v")))]
4341 "TARGET_MMX || TARGET_MMX_WITH_SSE"
4342 "@
4343 pandn\t{%2, %0|%0, %2}
4344 pandn\t{%2, %0|%0, %2}
4345 vpandn\t{%2, %1, %0|%0, %1, %2}
4346 vpandnd\t{%2, %1, %0|%0, %1, %2}"
4347 [(set_attr "isa" "*,sse2_noavx,avx,avx512vl")
4348 (set_attr "mmx_isa" "native,*,*,*")
4349 (set_attr "type" "mmxadd,sselog,sselog,sselog")
4350 (set_attr "mode" "DI,TI,TI,TI")])
4351
4352 (define_insn "*andnot<mode>3"
4353 [(set (match_operand:VI_16_32 0 "register_operand" "=?&r,?r,x,x,v")
4354 (and:VI_16_32
4355 (not:VI_16_32
4356 (match_operand:VI_16_32 1 "register_operand" "0,r,0,x,v"))
4357 (match_operand:VI_16_32 2 "register_operand" "r,r,x,x,v")))
4358 (clobber (reg:CC FLAGS_REG))]
4359 ""
4360 "#"
4361 [(set_attr "isa" "*,bmi,sse2_noavx,avx,avx512vl")
4362 (set_attr "type" "alu,bitmanip,sselog,sselog,sselog")
4363 (set_attr "mode" "SI,SI,TI,TI,TI")])
4364
4365 (define_split
4366 [(set (match_operand:VI_16_32 0 "general_reg_operand")
4367 (and:VI_16_32
4368 (not:VI_16_32 (match_operand:VI_16_32 1 "general_reg_operand"))
4369 (match_operand:VI_16_32 2 "general_reg_operand")))
4370 (clobber (reg:CC FLAGS_REG))]
4371 "TARGET_BMI && reload_completed"
4372 [(parallel
4373 [(set (match_dup 0)
4374 (and:SI (not:SI (match_dup 1)) (match_dup 2)))
4375 (clobber (reg:CC FLAGS_REG))])]
4376 {
4377 operands[2] = lowpart_subreg (SImode, operands[2], <MODE>mode);
4378 operands[1] = lowpart_subreg (SImode, operands[1], <MODE>mode);
4379 operands[0] = lowpart_subreg (SImode, operands[0], <MODE>mode);
4380 })
4381
4382 (define_split
4383 [(set (match_operand:VI_16_32 0 "general_reg_operand")
4384 (and:VI_16_32
4385 (not:VI_16_32 (match_operand:VI_16_32 1 "general_reg_operand"))
4386 (match_operand:VI_16_32 2 "general_reg_operand")))
4387 (clobber (reg:CC FLAGS_REG))]
4388 "!TARGET_BMI && reload_completed"
4389 [(set (match_dup 0)
4390 (not:SI (match_dup 1)))
4391 (parallel
4392 [(set (match_dup 0)
4393 (and:SI (match_dup 0) (match_dup 2)))
4394 (clobber (reg:CC FLAGS_REG))])]
4395 {
4396 operands[2] = lowpart_subreg (SImode, operands[2], <MODE>mode);
4397 operands[1] = lowpart_subreg (SImode, operands[1], <MODE>mode);
4398 operands[0] = lowpart_subreg (SImode, operands[0], <MODE>mode);
4399 })
4400
4401 (define_split
4402 [(set (match_operand:VI_16_32 0 "sse_reg_operand")
4403 (and:VI_16_32
4404 (not:VI_16_32 (match_operand:VI_16_32 1 "sse_reg_operand"))
4405 (match_operand:VI_16_32 2 "sse_reg_operand")))
4406 (clobber (reg:CC FLAGS_REG))]
4407 "TARGET_SSE2 && reload_completed"
4408 [(set (match_dup 0)
4409 (and:V16QI (not:V16QI (match_dup 1)) (match_dup 2)))]
4410 {
4411 operands[2] = lowpart_subreg (V16QImode, operands[2], <MODE>mode);
4412 operands[1] = lowpart_subreg (V16QImode, operands[1], <MODE>mode);
4413 operands[0] = lowpart_subreg (V16QImode, operands[0], <MODE>mode);
4414 })
4415
4416 (define_expand "mmx_<code><mode>3"
4417 [(set (match_operand:MMXMODEI 0 "register_operand")
4418 (any_logic:MMXMODEI
4419 (match_operand:MMXMODEI 1 "register_mmxmem_operand")
4420 (match_operand:MMXMODEI 2 "register_mmxmem_operand")))]
4421 "TARGET_MMX || TARGET_MMX_WITH_SSE"
4422 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
4423
4424 (define_expand "<code><mode>3"
4425 [(set (match_operand:MMXMODEI 0 "register_operand")
4426 (any_logic:MMXMODEI
4427 (match_operand:MMXMODEI 1 "register_operand")
4428 (match_operand:MMXMODEI 2 "register_operand")))]
4429 "TARGET_MMX_WITH_SSE")
4430
4431 (define_insn "*mmx_<code><mode>3"
4432 [(set (match_operand:MMXMODEI 0 "register_operand" "=y,x,x,v")
4433 (any_logic:MMXMODEI
4434 (match_operand:MMXMODEI 1 "register_mmxmem_operand" "%0,0,x,v")
4435 (match_operand:MMXMODEI 2 "register_mmxmem_operand" "ym,x,x,v")))]
4436 "(TARGET_MMX || TARGET_MMX_WITH_SSE)
4437 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
4438 "@
4439 p<logic>\t{%2, %0|%0, %2}
4440 p<logic>\t{%2, %0|%0, %2}
4441 vp<logic>\t{%2, %1, %0|%0, %1, %2}
4442 vp<logic>d\t{%2, %1, %0|%0, %1, %2}"
4443 [(set_attr "isa" "*,sse2_noavx,avx,avx512vl")
4444 (set_attr "mmx_isa" "native,*,*,*")
4445 (set_attr "type" "mmxadd,sselog,sselog,sselog")
4446 (set_attr "mode" "DI,TI,TI,TI")])
4447
4448 (define_expand "<code><mode>3"
4449 [(set (match_operand:VI_16_32 0 "nonimmediate_operand")
4450 (any_logic:VI_16_32
4451 (match_operand:VI_16_32 1 "nonimmediate_operand")
4452 (match_operand:VI_16_32 2 "nonimmediate_or_x86_64_const_vector_operand")))]
4453 ""
4454 "ix86_expand_binary_operator (<CODE>, <MODE>mode, operands); DONE;")
4455
4456 (define_insn "*<code><mode>3"
4457 [(set (match_operand:VI_16_32 0 "nonimmediate_operand" "=?r,m,x,x,v")
4458 (any_logic:VI_16_32
4459 (match_operand:VI_16_32 1 "nonimmediate_operand" "%0,0,0,x,v")
4460 (match_operand:VI_16_32 2 "nonimmediate_or_x86_64_const_vector_operand" "r,i,x,x,v")))
4461 (clobber (reg:CC FLAGS_REG))]
4462 "ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
4463 "#"
4464 [(set_attr "isa" "*,*,sse2_noavx,avx,avx512vl")
4465 (set_attr "type" "alu,alu,sselog,sselog,sselog")
4466 (set_attr "mode" "SI,SI,TI,TI,TI")])
4467
4468 (define_split
4469 [(set (match_operand:VI_16_32 0 "nonimmediate_gr_operand")
4470 (any_logic:VI_16_32
4471 (match_operand:VI_16_32 1 "nonimmediate_gr_operand")
4472 (match_operand:VI_16_32 2 "reg_or_const_vector_operand")))
4473 (clobber (reg:CC FLAGS_REG))]
4474 "reload_completed"
4475 [(parallel
4476 [(set (match_dup 0)
4477 (any_logic:<mmxinsnmode> (match_dup 1) (match_dup 2)))
4478 (clobber (reg:CC FLAGS_REG))])]
4479 {
4480 if (!register_operand (operands[2], <MODE>mode))
4481 {
4482 HOST_WIDE_INT val = ix86_convert_const_vector_to_integer (operands[2],
4483 <MODE>mode);
4484 operands[2] = GEN_INT (val);
4485 }
4486 else
4487 operands[2] = lowpart_subreg (<mmxinsnmode>mode, operands[2], <MODE>mode);
4488 operands[1] = lowpart_subreg (<mmxinsnmode>mode, operands[1], <MODE>mode);
4489 operands[0] = lowpart_subreg (<mmxinsnmode>mode, operands[0], <MODE>mode);
4490 })
4491
4492 (define_split
4493 [(set (match_operand:VI_16_32 0 "sse_reg_operand")
4494 (any_logic:VI_16_32
4495 (match_operand:VI_16_32 1 "sse_reg_operand")
4496 (match_operand:VI_16_32 2 "sse_reg_operand")))
4497 (clobber (reg:CC FLAGS_REG))]
4498 "TARGET_SSE2 && reload_completed"
4499 [(set (match_dup 0)
4500 (any_logic:V16QI (match_dup 1) (match_dup 2)))]
4501 {
4502 operands[2] = lowpart_subreg (V16QImode, operands[2], <MODE>mode);
4503 operands[1] = lowpart_subreg (V16QImode, operands[1], <MODE>mode);
4504 operands[0] = lowpart_subreg (V16QImode, operands[0], <MODE>mode);
4505 })
4506
4507 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
4508 ;;
4509 ;; Parallel integral element swizzling
4510 ;;
4511 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
4512
4513 (define_insn_and_split "mmx_packsswb"
4514 [(set (match_operand:V8QI 0 "register_operand" "=y,x,Yw")
4515 (vec_concat:V8QI
4516 (ss_truncate:V4QI
4517 (match_operand:V4HI 1 "register_operand" "0,0,Yw"))
4518 (ss_truncate:V4QI
4519 (match_operand:V4HI 2 "register_mmxmem_operand" "ym,x,Yw"))))]
4520 "TARGET_MMX || TARGET_MMX_WITH_SSE"
4521 "@
4522 packsswb\t{%2, %0|%0, %2}
4523 #
4524 #"
4525 "&& reload_completed
4526 && SSE_REGNO_P (REGNO (operands[0]))"
4527 [(const_int 0)]
4528 "ix86_split_mmx_pack (operands, SS_TRUNCATE); DONE;"
4529 [(set_attr "mmx_isa" "native,sse_noavx,avx")
4530 (set_attr "type" "mmxshft,sselog,sselog")
4531 (set_attr "mode" "DI,TI,TI")])
4532
4533 ;; This instruction does unsigned saturation of signed source
4534 ;; and is different from generic us_truncate RTX.
4535 (define_insn_and_split "mmx_packuswb"
4536 [(set (match_operand:V8QI 0 "register_operand" "=y,x,Yw")
4537 (unspec:V8QI
4538 [(match_operand:V4HI 1 "register_operand" "0,0,Yw")
4539 (match_operand:V4HI 2 "register_mmxmem_operand" "ym,x,Yw")]
4540 UNSPEC_US_TRUNCATE))]
4541 "TARGET_MMX || TARGET_MMX_WITH_SSE"
4542 "@
4543 packuswb\t{%2, %0|%0, %2}
4544 #
4545 #"
4546 "&& reload_completed
4547 && SSE_REGNO_P (REGNO (operands[0]))"
4548 [(const_int 0)]
4549 "ix86_split_mmx_pack (operands, US_TRUNCATE); DONE;"
4550 [(set_attr "mmx_isa" "native,sse_noavx,avx")
4551 (set_attr "type" "mmxshft,sselog,sselog")
4552 (set_attr "mode" "DI,TI,TI")])
4553
4554 (define_insn_and_split "mmx_packssdw"
4555 [(set (match_operand:V4HI 0 "register_operand" "=y,x,Yw")
4556 (vec_concat:V4HI
4557 (ss_truncate:V2HI
4558 (match_operand:V2SI 1 "register_operand" "0,0,Yw"))
4559 (ss_truncate:V2HI
4560 (match_operand:V2SI 2 "register_mmxmem_operand" "ym,x,Yw"))))]
4561 "TARGET_MMX || TARGET_MMX_WITH_SSE"
4562 "@
4563 packssdw\t{%2, %0|%0, %2}
4564 #
4565 #"
4566 "&& reload_completed
4567 && SSE_REGNO_P (REGNO (operands[0]))"
4568 [(const_int 0)]
4569 "ix86_split_mmx_pack (operands, SS_TRUNCATE); DONE;"
4570 [(set_attr "mmx_isa" "native,sse_noavx,avx")
4571 (set_attr "type" "mmxshft,sselog,sselog")
4572 (set_attr "mode" "DI,TI,TI")])
4573
4574 (define_insn_and_split "mmx_packusdw"
4575 [(set (match_operand:V4HI 0 "register_operand" "=Yr,*x,Yw")
4576 (unspec:V4HI
4577 [(match_operand:V2SI 1 "register_operand" "0,0,Yw")
4578 (match_operand:V2SI 2 "register_operand" "Yr,*x,Yw")]
4579 UNSPEC_US_TRUNCATE))]
4580 "TARGET_SSE4_1 && TARGET_MMX_WITH_SSE"
4581 "#"
4582 "&& reload_completed"
4583 [(const_int 0)]
4584 "ix86_split_mmx_pack (operands, US_TRUNCATE); DONE;"
4585 [(set_attr "isa" "noavx,noavx,avx")
4586 (set_attr "type" "sselog")
4587 (set_attr "mode" "TI")])
4588
4589 (define_insn_and_split "mmx_punpckhbw"
4590 [(set (match_operand:V8QI 0 "register_operand" "=y,x,Yw")
4591 (vec_select:V8QI
4592 (vec_concat:V16QI
4593 (match_operand:V8QI 1 "register_operand" "0,0,Yw")
4594 (match_operand:V8QI 2 "register_mmxmem_operand" "ym,x,Yw"))
4595 (parallel [(const_int 4) (const_int 12)
4596 (const_int 5) (const_int 13)
4597 (const_int 6) (const_int 14)
4598 (const_int 7) (const_int 15)])))]
4599 "TARGET_MMX || TARGET_MMX_WITH_SSE"
4600 "@
4601 punpckhbw\t{%2, %0|%0, %2}
4602 #
4603 #"
4604 "&& reload_completed
4605 && SSE_REGNO_P (REGNO (operands[0]))"
4606 [(const_int 0)]
4607 "ix86_split_mmx_punpck (operands, true); DONE;"
4608 [(set_attr "mmx_isa" "native,sse_noavx,avx")
4609 (set_attr "type" "mmxcvt,sselog,sselog")
4610 (set_attr "mode" "DI,TI,TI")])
4611
4612 (define_insn_and_split "mmx_punpckhbw_low"
4613 [(set (match_operand:V4QI 0 "register_operand" "=x,Yw")
4614 (vec_select:V4QI
4615 (vec_concat:V8QI
4616 (match_operand:V4QI 1 "register_operand" "0,Yw")
4617 (match_operand:V4QI 2 "register_operand" "x,Yw"))
4618 (parallel [(const_int 2) (const_int 6)
4619 (const_int 3) (const_int 7)])))]
4620 "TARGET_SSE2"
4621 "#"
4622 "&& reload_completed"
4623 [(const_int 0)]
4624 "ix86_split_mmx_punpck (operands, true); DONE;"
4625 [(set_attr "isa" "noavx,avx")
4626 (set_attr "type" "sselog")
4627 (set_attr "mode" "TI")])
4628
4629 (define_insn_and_split "mmx_punpcklbw"
4630 [(set (match_operand:V8QI 0 "register_operand" "=y,x,Yw")
4631 (vec_select:V8QI
4632 (vec_concat:V16QI
4633 (match_operand:V8QI 1 "register_operand" "0,0,Yw")
4634 (match_operand:V8QI 2 "register_mmxmem_operand" "ym,x,Yw"))
4635 (parallel [(const_int 0) (const_int 8)
4636 (const_int 1) (const_int 9)
4637 (const_int 2) (const_int 10)
4638 (const_int 3) (const_int 11)])))]
4639 "TARGET_MMX || TARGET_MMX_WITH_SSE"
4640 "@
4641 punpcklbw\t{%2, %0|%0, %k2}
4642 #
4643 #"
4644 "&& reload_completed
4645 && SSE_REGNO_P (REGNO (operands[0]))"
4646 [(const_int 0)]
4647 "ix86_split_mmx_punpck (operands, false); DONE;"
4648 [(set_attr "mmx_isa" "native,sse_noavx,avx")
4649 (set_attr "type" "mmxcvt,sselog,sselog")
4650 (set_attr "mode" "DI,TI,TI")])
4651
4652 (define_insn_and_split "mmx_punpcklbw_low"
4653 [(set (match_operand:V4QI 0 "register_operand" "=x,Yw")
4654 (vec_select:V4QI
4655 (vec_concat:V8QI
4656 (match_operand:V4QI 1 "register_operand" "0,Yw")
4657 (match_operand:V4QI 2 "register_operand" "x,Yw"))
4658 (parallel [(const_int 0) (const_int 4)
4659 (const_int 1) (const_int 5)])))]
4660 "TARGET_SSE2"
4661 "#"
4662 "&& reload_completed"
4663 [(const_int 0)]
4664 "ix86_split_mmx_punpck (operands, false); DONE;"
4665 [(set_attr "isa" "noavx,avx")
4666 (set_attr "type" "sselog")
4667 (set_attr "mode" "TI")])
4668
4669 (define_insn_and_split "mmx_punpckhwd"
4670 [(set (match_operand:V4HI 0 "register_operand" "=y,x,Yw")
4671 (vec_select:V4HI
4672 (vec_concat:V8HI
4673 (match_operand:V4HI 1 "register_operand" "0,0,Yw")
4674 (match_operand:V4HI 2 "register_mmxmem_operand" "ym,x,Yw"))
4675 (parallel [(const_int 2) (const_int 6)
4676 (const_int 3) (const_int 7)])))]
4677 "TARGET_MMX || TARGET_MMX_WITH_SSE"
4678 "@
4679 punpckhwd\t{%2, %0|%0, %2}
4680 #
4681 #"
4682 "&& reload_completed
4683 && SSE_REGNO_P (REGNO (operands[0]))"
4684 [(const_int 0)]
4685 "ix86_split_mmx_punpck (operands, true); DONE;"
4686 [(set_attr "mmx_isa" "native,sse_noavx,avx")
4687 (set_attr "type" "mmxcvt,sselog,sselog")
4688 (set_attr "mode" "DI,TI,TI")])
4689
4690 (define_insn_and_split "mmx_punpcklwd"
4691 [(set (match_operand:V4HI 0 "register_operand" "=y,x,Yw")
4692 (vec_select:V4HI
4693 (vec_concat:V8HI
4694 (match_operand:V4HI 1 "register_operand" "0,0,Yw")
4695 (match_operand:V4HI 2 "register_mmxmem_operand" "ym,x,Yw"))
4696 (parallel [(const_int 0) (const_int 4)
4697 (const_int 1) (const_int 5)])))]
4698 "TARGET_MMX || TARGET_MMX_WITH_SSE"
4699 "@
4700 punpcklwd\t{%2, %0|%0, %k2}
4701 #
4702 #"
4703 "&& reload_completed
4704 && SSE_REGNO_P (REGNO (operands[0]))"
4705 [(const_int 0)]
4706 "ix86_split_mmx_punpck (operands, false); DONE;"
4707 [(set_attr "mmx_isa" "native,sse_noavx,avx")
4708 (set_attr "type" "mmxcvt,sselog,sselog")
4709 (set_attr "mode" "DI,TI,TI")])
4710
4711 (define_insn_and_split "mmx_punpckhdq"
4712 [(set (match_operand:V2SI 0 "register_operand" "=y,x,Yv")
4713 (vec_select:V2SI
4714 (vec_concat:V4SI
4715 (match_operand:V2SI 1 "register_operand" "0,0,Yv")
4716 (match_operand:V2SI 2 "register_mmxmem_operand" "ym,x,Yv"))
4717 (parallel [(const_int 1)
4718 (const_int 3)])))]
4719 "TARGET_MMX || TARGET_MMX_WITH_SSE"
4720 "@
4721 punpckhdq\t{%2, %0|%0, %2}
4722 #
4723 #"
4724 "&& reload_completed
4725 && SSE_REGNO_P (REGNO (operands[0]))"
4726 [(const_int 0)]
4727 "ix86_split_mmx_punpck (operands, true); DONE;"
4728 [(set_attr "mmx_isa" "native,sse_noavx,avx")
4729 (set_attr "type" "mmxcvt,sselog,sselog")
4730 (set_attr "mode" "DI,TI,TI")])
4731
4732 (define_insn_and_split "mmx_punpckldq"
4733 [(set (match_operand:V2SI 0 "register_operand" "=y,x,Yv")
4734 (vec_select:V2SI
4735 (vec_concat:V4SI
4736 (match_operand:V2SI 1 "register_operand" "0,0,Yv")
4737 (match_operand:V2SI 2 "register_mmxmem_operand" "ym,x,Yv"))
4738 (parallel [(const_int 0)
4739 (const_int 2)])))]
4740 "TARGET_MMX || TARGET_MMX_WITH_SSE"
4741 "@
4742 punpckldq\t{%2, %0|%0, %k2}
4743 #
4744 #"
4745 "&& reload_completed
4746 && SSE_REGNO_P (REGNO (operands[0]))"
4747 [(const_int 0)]
4748 "ix86_split_mmx_punpck (operands, false); DONE;"
4749 [(set_attr "mmx_isa" "native,sse_noavx,avx")
4750 (set_attr "type" "mmxcvt,sselog,sselog")
4751 (set_attr "mode" "DI,TI,TI")])
4752
4753 (define_insn "sse4_1_<code>v4qiv4hi2"
4754 [(set (match_operand:V4HI 0 "register_operand" "=Yr,*x,Yw")
4755 (any_extend:V4HI
4756 (vec_select:V4QI
4757 (match_operand:V8QI 1 "register_operand" "Yr,*x,Yw")
4758 (parallel [(const_int 0) (const_int 1)
4759 (const_int 2) (const_int 3)]))))]
4760 "TARGET_SSE4_1 && TARGET_MMX_WITH_SSE"
4761 "%vpmov<extsuffix>bw\t{%1, %0|%0, %1}"
4762 [(set_attr "isa" "noavx,noavx,avx")
4763 (set_attr "type" "ssemov")
4764 (set_attr "prefix_extra" "1")
4765 (set_attr "prefix" "orig,orig,maybe_evex")
4766 (set_attr "mode" "TI")])
4767
4768 (define_expand "<insn>v4qiv4hi2"
4769 [(set (match_operand:V4HI 0 "register_operand")
4770 (any_extend:V4HI
4771 (match_operand:V4QI 1 "register_operand")))]
4772 "TARGET_MMX_WITH_SSE"
4773 {
4774 if (!TARGET_SSE4_1)
4775 {
4776 ix86_expand_sse_extend (operands[0], operands[1], <u_bool>);
4777 DONE;
4778 }
4779
4780 rtx op1 = force_reg (V4QImode, operands[1]);
4781 op1 = lowpart_subreg (V8QImode, op1, V4QImode);
4782 emit_insn (gen_sse4_1_<code>v4qiv4hi2 (operands[0], op1));
4783 DONE;
4784 })
4785
4786 (define_insn "sse4_1_<code>v2hiv2si2"
4787 [(set (match_operand:V2SI 0 "register_operand" "=Yr,*x,v")
4788 (any_extend:V2SI
4789 (vec_select:V2HI
4790 (match_operand:V4HI 1 "register_operand" "Yr,*x,v")
4791 (parallel [(const_int 0) (const_int 1)]))))]
4792 "TARGET_SSE4_1 && TARGET_MMX_WITH_SSE"
4793 "%vpmov<extsuffix>wd\t{%1, %0|%0, %1}"
4794 [(set_attr "isa" "noavx,noavx,avx")
4795 (set_attr "type" "ssemov")
4796 (set_attr "prefix_extra" "1")
4797 (set_attr "prefix" "orig,orig,maybe_evex")
4798 (set_attr "mode" "TI")])
4799
4800 (define_expand "<insn>v2hiv2si2"
4801 [(set (match_operand:V2SI 0 "register_operand")
4802 (any_extend:V2SI
4803 (match_operand:V2HI 1 "register_operand")))]
4804 "TARGET_MMX_WITH_SSE"
4805 {
4806 if (!TARGET_SSE4_1)
4807 {
4808 ix86_expand_sse_extend (operands[0], operands[1], <u_bool>);
4809 DONE;
4810 }
4811
4812 rtx op1 = force_reg (V2HImode, operands[1]);
4813 op1 = lowpart_subreg (V4HImode, op1, V2HImode);
4814 emit_insn (gen_sse4_1_<code>v2hiv2si2 (operands[0], op1));
4815 DONE;
4816 })
4817
4818 (define_insn "sse4_1_<code>v2qiv2si2"
4819 [(set (match_operand:V2SI 0 "register_operand" "=Yr,*x,v")
4820 (any_extend:V2SI
4821 (vec_select:V2QI
4822 (match_operand:V4QI 1 "register_operand" "Yr,*x,v")
4823 (parallel [(const_int 0) (const_int 1)]))))]
4824 "TARGET_SSE4_1 && TARGET_MMX_WITH_SSE"
4825 "%vpmov<extsuffix>bd\t{%1, %0|%0, %1}"
4826 [(set_attr "isa" "noavx,noavx,avx")
4827 (set_attr "type" "ssemov")
4828 (set_attr "prefix_extra" "1")
4829 (set_attr "prefix" "orig,orig,maybe_evex")
4830 (set_attr "mode" "TI")])
4831
4832 (define_expand "<insn>v2qiv2si2"
4833 [(set (match_operand:V2SI 0 "register_operand")
4834 (any_extend:V2SI
4835 (match_operand:V2QI 1 "register_operand")))]
4836 "TARGET_SSE4_1 && TARGET_MMX_WITH_SSE"
4837 {
4838 rtx op1 = force_reg (V2QImode, operands[1]);
4839 op1 = lowpart_subreg (V4QImode, op1, V2QImode);
4840 emit_insn (gen_sse4_1_<code>v2qiv2si2 (operands[0], op1));
4841 DONE;
4842 })
4843
4844 (define_insn "sse4_1_<code>v2qiv2hi2"
4845 [(set (match_operand:V2HI 0 "register_operand" "=Yr,*x,Yw")
4846 (any_extend:V2HI
4847 (vec_select:V2QI
4848 (match_operand:V4QI 1 "register_operand" "Yr,*x,Yw")
4849 (parallel [(const_int 0) (const_int 1)]))))]
4850 "TARGET_SSE4_1"
4851 "%vpmov<extsuffix>bw\t{%1, %0|%0, %1}"
4852 [(set_attr "isa" "noavx,noavx,avx")
4853 (set_attr "type" "ssemov")
4854 (set_attr "prefix_extra" "1")
4855 (set_attr "prefix" "orig,orig,maybe_evex")
4856 (set_attr "mode" "TI")])
4857
4858 (define_expand "<insn>v2qiv2hi2"
4859 [(set (match_operand:V2HI 0 "register_operand")
4860 (any_extend:V2HI
4861 (match_operand:V2QI 1 "register_operand")))]
4862 "TARGET_SSE2"
4863 {
4864 if (!TARGET_SSE4_1)
4865 {
4866 ix86_expand_sse_extend (operands[0], operands[1], <u_bool>);
4867 DONE;
4868 }
4869
4870 rtx op1 = force_reg (V2QImode, operands[1]);
4871 op1 = lowpart_subreg (V4QImode, op1, V2QImode);
4872 emit_insn (gen_sse4_1_<code>v2qiv2hi2 (operands[0], op1));
4873 DONE;
4874 })
4875
4876 (define_insn "truncv2hiv2qi2"
4877 [(set (match_operand:V2QI 0 "register_operand" "=v")
4878 (truncate:V2QI
4879 (match_operand:V2HI 1 "register_operand" "v")))]
4880 "TARGET_AVX512VL && TARGET_AVX512BW"
4881 "vpmovwb\t{%1, %0|%0, %1}"
4882 [(set_attr "type" "ssemov")
4883 (set_attr "prefix" "evex")
4884 (set_attr "mode" "TI")])
4885
4886 (define_mode_iterator V2QI_V2HI [V2QI V2HI])
4887 (define_insn "truncv2si<mode>2"
4888 [(set (match_operand:V2QI_V2HI 0 "register_operand" "=v")
4889 (truncate:V2QI_V2HI
4890 (match_operand:V2SI 1 "register_operand" "v")))]
4891 "TARGET_AVX512VL && TARGET_MMX_WITH_SSE"
4892 "vpmovd<mmxvecsize>\t{%1, %0|%0, %1}"
4893 [(set_attr "type" "ssemov")
4894 (set_attr "prefix" "evex")
4895 (set_attr "mode" "TI")])
4896
4897 ;; Pack/unpack vector modes
4898 (define_mode_attr mmxpackmode
4899 [(V4HI "V8QI") (V2SI "V4HI")])
4900
4901 (define_expand "vec_pack_trunc_<mode>"
4902 [(match_operand:<mmxpackmode> 0 "register_operand")
4903 (match_operand:MMXMODE24 1 "register_operand")
4904 (match_operand:MMXMODE24 2 "register_operand")]
4905 "TARGET_MMX_WITH_SSE"
4906 {
4907 rtx op1 = gen_lowpart (<mmxpackmode>mode, operands[1]);
4908 rtx op2 = gen_lowpart (<mmxpackmode>mode, operands[2]);
4909 ix86_expand_vec_extract_even_odd (operands[0], op1, op2, 0);
4910 DONE;
4911 })
4912
4913 (define_expand "vec_pack_trunc_v2hi"
4914 [(match_operand:V4QI 0 "register_operand")
4915 (match_operand:V2HI 1 "register_operand")
4916 (match_operand:V2HI 2 "register_operand")]
4917 "TARGET_SSE2"
4918 {
4919 rtx op1 = gen_lowpart (V4QImode, operands[1]);
4920 rtx op2 = gen_lowpart (V4QImode, operands[2]);
4921 ix86_expand_vec_extract_even_odd (operands[0], op1, op2, 0);
4922 DONE;
4923 })
4924
4925 (define_mode_attr mmxunpackmode
4926 [(V8QI "V4HI") (V4HI "V2SI")])
4927
4928 (define_expand "vec_unpacks_lo_<mode>"
4929 [(match_operand:<mmxunpackmode> 0 "register_operand")
4930 (match_operand:MMXMODE12 1 "register_operand")]
4931 "TARGET_MMX_WITH_SSE"
4932 "ix86_expand_sse_unpack (operands[0], operands[1], false, false); DONE;")
4933
4934 (define_expand "vec_unpacks_hi_<mode>"
4935 [(match_operand:<mmxunpackmode> 0 "register_operand")
4936 (match_operand:MMXMODE12 1 "register_operand")]
4937 "TARGET_MMX_WITH_SSE"
4938 "ix86_expand_sse_unpack (operands[0], operands[1], false, true); DONE;")
4939
4940 (define_expand "vec_unpacku_lo_<mode>"
4941 [(match_operand:<mmxunpackmode> 0 "register_operand")
4942 (match_operand:MMXMODE12 1 "register_operand")]
4943 "TARGET_MMX_WITH_SSE"
4944 "ix86_expand_sse_unpack (operands[0], operands[1], true, false); DONE;")
4945
4946 (define_expand "vec_unpacku_hi_<mode>"
4947 [(match_operand:<mmxunpackmode> 0 "register_operand")
4948 (match_operand:MMXMODE12 1 "register_operand")]
4949 "TARGET_MMX_WITH_SSE"
4950 "ix86_expand_sse_unpack (operands[0], operands[1], true, true); DONE;")
4951
4952 (define_expand "vec_unpacks_lo_v4qi"
4953 [(match_operand:V2HI 0 "register_operand")
4954 (match_operand:V4QI 1 "register_operand")]
4955 "TARGET_SSE2"
4956 "ix86_expand_sse_unpack (operands[0], operands[1], false, false); DONE;")
4957
4958 (define_expand "vec_unpacks_hi_v4qi"
4959 [(match_operand:V2HI 0 "register_operand")
4960 (match_operand:V4QI 1 "register_operand")]
4961 "TARGET_SSE2"
4962 "ix86_expand_sse_unpack (operands[0], operands[1], false, true); DONE;")
4963
4964 (define_expand "vec_unpacku_lo_v4qi"
4965 [(match_operand:V2HI 0 "register_operand")
4966 (match_operand:V4QI 1 "register_operand")]
4967 "TARGET_SSE2"
4968 "ix86_expand_sse_unpack (operands[0], operands[1], true, false); DONE;")
4969
4970 (define_expand "vec_unpacku_hi_v4qi"
4971 [(match_operand:V2HI 0 "register_operand")
4972 (match_operand:V4QI 1 "register_operand")]
4973 "TARGET_SSE2"
4974 "ix86_expand_sse_unpack (operands[0], operands[1], true, true); DONE;")
4975
4976 (define_insn "*mmx_pinsrd"
4977 [(set (match_operand:V2SI 0 "register_operand" "=x,Yv")
4978 (vec_merge:V2SI
4979 (vec_duplicate:V2SI
4980 (match_operand:SI 2 "nonimmediate_operand" "jrjm,rm"))
4981 (match_operand:V2SI 1 "register_operand" "0,Yv")
4982 (match_operand:SI 3 "const_int_operand")))]
4983 "TARGET_SSE4_1 && TARGET_MMX_WITH_SSE
4984 && ((unsigned) exact_log2 (INTVAL (operands[3]))
4985 < GET_MODE_NUNITS (V2SImode))"
4986 {
4987 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
4988 switch (which_alternative)
4989 {
4990 case 1:
4991 return "vpinsrd\t{%3, %2, %1, %0|%0, %1, %2, %3}";
4992 case 0:
4993 return "pinsrd\t{%3, %2, %0|%0, %2, %3}";
4994 default:
4995 gcc_unreachable ();
4996 }
4997 }
4998 [(set_attr "isa" "noavx,avx")
4999 (set_attr "addr" "gpr16,*")
5000 (set_attr "prefix_extra" "1")
5001 (set_attr "type" "sselog")
5002 (set_attr "length_immediate" "1")
5003 (set_attr "prefix" "orig,vex")
5004 (set_attr "mode" "TI")])
5005
5006 (define_insn "*mmx_pinsrw"
5007 [(set (match_operand:V4FI_64 0 "register_operand" "=y,x,YW,&x")
5008 (vec_merge:V4FI_64
5009 (vec_duplicate:V4FI_64
5010 (match_operand:<mmxscalarmode> 2 "nonimmediate_operand" "rm,rm,rm,x"))
5011 (match_operand:V4FI_64 1 "register_operand" "0,0,YW,x")
5012 (match_operand:SI 3 "const_int_operand")))]
5013 "(TARGET_MMX || TARGET_MMX_WITH_SSE)
5014 && (TARGET_SSE || TARGET_3DNOW_A)
5015 && ((unsigned) exact_log2 (INTVAL (operands[3]))
5016 < GET_MODE_NUNITS (V4HImode))"
5017 {
5018 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
5019 switch (which_alternative)
5020 {
5021 case 3:
5022 return "#";
5023 case 2:
5024 if (MEM_P (operands[2]))
5025 return "vpinsrw\t{%3, %2, %1, %0|%0, %1, %2, %3}";
5026 else
5027 return "vpinsrw\t{%3, %k2, %1, %0|%0, %1, %k2, %3}";
5028 case 1:
5029 case 0:
5030 if (MEM_P (operands[2]))
5031 return "pinsrw\t{%3, %2, %0|%0, %2, %3}";
5032 else
5033 return "pinsrw\t{%3, %k2, %0|%0, %k2, %3}";
5034 default:
5035 gcc_unreachable ();
5036 }
5037 }
5038 [(set_attr "isa" "*,sse2_noavx,avx,sse4")
5039 (set_attr "mmx_isa" "native,*,*,*")
5040 (set_attr "type" "mmxcvt,sselog,sselog,sselog")
5041 (set_attr "length_immediate" "1")
5042 (set_attr "mode" "DI,TI,TI,TI")])
5043
5044 ;; For TARGET_SSE2, implement insert from XMM reg with PSHULFW + PBLENDW.
5045 (define_split
5046 [(set (match_operand:V4FI_64 0 "sse_reg_operand")
5047 (vec_merge:V4FI_64
5048 (vec_duplicate:V4FI_64
5049 (match_operand:<mmxscalarmode> 2 "sse_reg_operand"))
5050 (match_operand:V4FI_64 1 "sse_reg_operand")
5051 (match_operand:SI 3 "const_int_operand")))]
5052 "TARGET_MMX_WITH_SSE && TARGET_SSE4_1 && reload_completed
5053 && ((unsigned) exact_log2 (INTVAL (operands[3]))
5054 < GET_MODE_NUNITS (<MODE>mode))"
5055 [(set (match_dup 0)
5056 (vec_duplicate:V4FI_64 (match_dup 2)))
5057 (set (match_dup 0)
5058 (vec_merge:V4FI_64 (match_dup 1) (match_dup 0) (match_dup 3)))]
5059 "operands[3] = GEN_INT (~INTVAL (operands[3]) & 0xf);")
5060
5061 (define_insn "*mmx_pinsrb"
5062 [(set (match_operand:V8QI 0 "register_operand" "=x,YW")
5063 (vec_merge:V8QI
5064 (vec_duplicate:V8QI
5065 (match_operand:QI 2 "nonimmediate_operand" "jrjm,rm"))
5066 (match_operand:V8QI 1 "register_operand" "0,YW")
5067 (match_operand:SI 3 "const_int_operand")))]
5068 "TARGET_SSE4_1 && TARGET_MMX_WITH_SSE
5069 && ((unsigned) exact_log2 (INTVAL (operands[3]))
5070 < GET_MODE_NUNITS (V8QImode))"
5071 {
5072 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
5073 switch (which_alternative)
5074 {
5075 case 1:
5076 if (MEM_P (operands[2]))
5077 return "vpinsrb\t{%3, %2, %1, %0|%0, %1, %2, %3}";
5078 else
5079 return "vpinsrb\t{%3, %k2, %1, %0|%0, %1, %k2, %3}";
5080 case 0:
5081 if (MEM_P (operands[2]))
5082 return "pinsrb\t{%3, %2, %0|%0, %2, %3}";
5083 else
5084 return "pinsrb\t{%3, %k2, %0|%0, %k2, %3}";
5085 default:
5086 gcc_unreachable ();
5087 }
5088 }
5089 [(set_attr "isa" "noavx,avx")
5090 (set_attr "type" "sselog")
5091 (set_attr "addr" "gpr16,*")
5092 (set_attr "prefix_extra" "1")
5093 (set_attr "length_immediate" "1")
5094 (set_attr "prefix" "orig,vex")
5095 (set_attr "mode" "TI")])
5096
5097 (define_insn "*mmx_pextrw"
5098 [(set (match_operand:HI 0 "register_sse4nonimm_operand" "=r,r,jm,m")
5099 (vec_select:HI
5100 (match_operand:V4HI 1 "register_operand" "y,YW,YW,YW")
5101 (parallel [(match_operand:SI 2 "const_0_to_3_operand")])))]
5102 "(TARGET_MMX || TARGET_MMX_WITH_SSE)
5103 && (TARGET_SSE || TARGET_3DNOW_A)"
5104 "@
5105 pextrw\t{%2, %1, %k0|%k0, %1, %2}
5106 %vpextrw\t{%2, %1, %k0|%k0, %1, %2}
5107 pextrw\t{%2, %1, %0|%0, %1, %2}
5108 vpextrw\t{%2, %1, %0|%0, %1, %2}"
5109 [(set_attr "isa" "*,sse2,sse4_noavx,avx")
5110 (set_attr "addr" "*,*,gpr16,*")
5111 (set_attr "mmx_isa" "native,*,*,*")
5112 (set_attr "type" "mmxcvt,sselog1,sselog1,sselog1")
5113 (set_attr "length_immediate" "1")
5114 (set_attr "prefix" "orig,maybe_vex,maybe_vex,maybe_evex")
5115 (set_attr "mode" "DI,TI,TI,TI")])
5116
5117 (define_insn "*mmx_pextrw<mode>"
5118 [(set (match_operand:<mmxscalarmode> 0 "register_sse4nonimm_operand" "=?r,?r,jm,m,x,Yw")
5119 (vec_select:<mmxscalarmode>
5120 (match_operand:V4F_64 1 "register_operand" "y,YW,YW,YW,0,YW")
5121 (parallel [(match_operand:SI 2 "const_0_to_3_operand")])))]
5122 "(TARGET_MMX || TARGET_MMX_WITH_SSE)
5123 && (TARGET_SSE || TARGET_3DNOW_A)"
5124 {
5125 switch (which_alternative)
5126 {
5127 case 0:
5128 case 1:
5129 return "%vpextrw\t{%2, %1, %k0|%k0, %1, %2}";
5130 case 2:
5131 case 3:
5132 return "%vpextrw\t{%2, %1, %0|%0, %1, %2}";
5133 case 4:
5134 operands[2] = GEN_INT (INTVAL (operands[2]) * 2);
5135 return "psrldq\t{%2, %0|%0, %2}";
5136 case 5:
5137 operands[2] = GEN_INT (INTVAL (operands[2]) * 2);
5138 return "vpsrldq\t{%2, %1, %0|%0, %1, %2}";
5139
5140 default:
5141 gcc_unreachable ();
5142 }
5143 }
5144 [(set_attr "isa" "*,sse2,sse4_noavx,avx,noavx,avx")
5145 (set_attr "addr" "*,*,gpr16,*,*,*")
5146 (set_attr "mmx_isa" "native,*,*,*,*,*")
5147 (set_attr "type" "mmxcvt,sselog1,sselog1,sselog1,sseishft1,sseishft1")
5148 (set_attr "length_immediate" "1")
5149 (set_attr "prefix" "orig,maybe_vex,maybe_vex,maybe_evex,orig,maybe_evex")
5150 (set_attr "mode" "DI,TI,TI,TI,TI,TI")])
5151
5152 (define_insn "*mmx_pextrw_zext"
5153 [(set (match_operand:SWI48 0 "register_operand" "=r,r")
5154 (zero_extend:SWI48
5155 (vec_select:HI
5156 (match_operand:V4HI 1 "register_operand" "y,YW")
5157 (parallel [(match_operand:SI 2 "const_0_to_3_operand")]))))]
5158 "(TARGET_MMX || TARGET_MMX_WITH_SSE)
5159 && (TARGET_SSE || TARGET_3DNOW_A)"
5160 "@
5161 pextrw\t{%2, %1, %k0|%k0, %1, %2}
5162 %vpextrw\t{%2, %1, %k0|%k0, %1, %2}"
5163 [(set_attr "isa" "*,sse2")
5164 (set_attr "mmx_isa" "native,*")
5165 (set_attr "type" "mmxcvt,sselog1")
5166 (set_attr "length_immediate" "1")
5167 (set_attr "prefix" "orig,maybe_vex")
5168 (set_attr "mode" "DI,TI")])
5169
5170 (define_insn "*mmx_pextrb"
5171 [(set (match_operand:QI 0 "nonimmediate_operand" "=jr,jm,r,m")
5172 (vec_select:QI
5173 (match_operand:V8QI 1 "register_operand" "YW,YW,YW,YW")
5174 (parallel [(match_operand:SI 2 "const_0_to_7_operand")])))]
5175 "TARGET_SSE4_1 && TARGET_MMX_WITH_SSE"
5176 "@
5177 pextrb\t{%2, %1, %k0|%k0, %1, %2}
5178 pextrb\t{%2, %1, %0|%0, %1, %2}
5179 vpextrb\t{%2, %1, %k0|%k0, %1, %2}
5180 vpextrb\t{%2, %1, %0|%0, %1, %2}"
5181 [(set_attr "isa" "noavx,noavx,avx,avx")
5182 (set_attr "addr" "*,gpr16,*,*")
5183 (set_attr "type" "sselog1")
5184 (set_attr "prefix_extra" "1")
5185 (set_attr "length_immediate" "1")
5186 (set_attr "prefix" "maybe_vex")
5187 (set_attr "mode" "TI")])
5188
5189 (define_insn "*mmx_pextrb_zext"
5190 [(set (match_operand:SWI248 0 "register_operand" "=jr,r")
5191 (zero_extend:SWI248
5192 (vec_select:QI
5193 (match_operand:V8QI 1 "register_operand" "YW,YW")
5194 (parallel [(match_operand:SI 2 "const_0_to_7_operand")]))))]
5195 "TARGET_SSE4_1 && TARGET_MMX_WITH_SSE"
5196 "%vpextrb\t{%2, %1, %k0|%k0, %1, %2}"
5197 [(set_attr "isa" "noavx,avx")
5198 (set_attr "type" "sselog1")
5199 (set_attr "prefix_extra" "1")
5200 (set_attr "length_immediate" "1")
5201 (set_attr "prefix" "maybe_vex")
5202 (set_attr "mode" "TI")])
5203
5204 (define_insn "mmx_pshufbv8qi3"
5205 [(set (match_operand:V8QI 0 "register_operand" "=x,Yw")
5206 (unspec:V8QI
5207 [(match_operand:V8QI 1 "register_operand" "0,Yw")
5208 (match_operand:V16QI 2 "vector_operand" "xja,Ywm")]
5209 UNSPEC_PSHUFB))]
5210 "TARGET_SSSE3 && TARGET_MMX_WITH_SSE"
5211 "@
5212 pshufb\t{%2, %0|%0, %2}
5213 vpshufb\t{%2, %1, %0|%0, %1, %2}"
5214 [(set_attr "isa" "noavx,avx")
5215 (set_attr "addr" "gpr16,*")
5216 (set_attr "type" "sselog1")
5217 (set_attr "prefix_extra" "1")
5218 (set_attr "prefix" "orig,maybe_evex")
5219 (set_attr "btver2_decode" "vector")
5220 (set_attr "mode" "TI")])
5221
5222 (define_insn "mmx_pshufbv4qi3"
5223 [(set (match_operand:V4QI 0 "register_operand" "=x,Yw")
5224 (unspec:V4QI
5225 [(match_operand:V4QI 1 "register_operand" "0,Yw")
5226 (match_operand:V16QI 2 "vector_operand" "xja,Ywm")]
5227 UNSPEC_PSHUFB))]
5228 "TARGET_SSSE3"
5229 "@
5230 pshufb\t{%2, %0|%0, %2}
5231 vpshufb\t{%2, %1, %0|%0, %1, %2}"
5232 [(set_attr "isa" "noavx,avx")
5233 (set_attr "addr" "gpr16,*")
5234 (set_attr "type" "sselog1")
5235 (set_attr "prefix_extra" "1")
5236 (set_attr "prefix" "orig,maybe_evex")
5237 (set_attr "btver2_decode" "vector")
5238 (set_attr "mode" "TI")])
5239
5240 (define_expand "mmx_pshufw"
5241 [(match_operand:V4HI 0 "register_operand")
5242 (match_operand:V4HI 1 "register_mmxmem_operand")
5243 (match_operand:SI 2 "const_int_operand")]
5244 "(TARGET_MMX || TARGET_MMX_WITH_SSE)
5245 && (TARGET_SSE || TARGET_3DNOW_A)"
5246 {
5247 int mask = INTVAL (operands[2]);
5248 emit_insn (gen_mmx_pshufwv4hi_1 (operands[0], operands[1],
5249 GEN_INT ((mask >> 0) & 3),
5250 GEN_INT ((mask >> 2) & 3),
5251 GEN_INT ((mask >> 4) & 3),
5252 GEN_INT ((mask >> 6) & 3)));
5253 DONE;
5254 })
5255
5256 (define_insn "mmx_pshufw<mode>_1"
5257 [(set (match_operand:V4FI_64 0 "register_operand" "=y,Yw")
5258 (vec_select:V4FI_64
5259 (match_operand:V4FI_64 1 "register_mmxmem_operand" "ym,Yw")
5260 (parallel [(match_operand 2 "const_0_to_3_operand")
5261 (match_operand 3 "const_0_to_3_operand")
5262 (match_operand 4 "const_0_to_3_operand")
5263 (match_operand 5 "const_0_to_3_operand")])))]
5264 "(TARGET_MMX || TARGET_MMX_WITH_SSE)
5265 && (TARGET_SSE || TARGET_3DNOW_A)"
5266 {
5267 int mask = 0;
5268 mask |= INTVAL (operands[2]) << 0;
5269 mask |= INTVAL (operands[3]) << 2;
5270 mask |= INTVAL (operands[4]) << 4;
5271 mask |= INTVAL (operands[5]) << 6;
5272 operands[2] = GEN_INT (mask);
5273
5274 switch (which_alternative)
5275 {
5276 case 0:
5277 return "pshufw\t{%2, %1, %0|%0, %1, %2}";
5278 case 1:
5279 return "%vpshuflw\t{%2, %1, %0|%0, %1, %2}";
5280 default:
5281 gcc_unreachable ();
5282 }
5283 }
5284 [(set_attr "isa" "*,sse2")
5285 (set_attr "mmx_isa" "native,*")
5286 (set_attr "type" "mmxcvt,sselog1")
5287 (set_attr "length_immediate" "1")
5288 (set_attr "mode" "DI,TI")])
5289
5290 (define_insn "*mmx_pshufd_1"
5291 [(set (match_operand:V2SI 0 "register_operand" "=Yv")
5292 (vec_select:V2SI
5293 (match_operand:V2SI 1 "register_operand" "Yv")
5294 (parallel [(match_operand 2 "const_0_to_1_operand")
5295 (match_operand 3 "const_0_to_1_operand")])))]
5296 "TARGET_MMX_WITH_SSE"
5297 {
5298 int mask = 0;
5299 mask |= INTVAL (operands[2]) << 0;
5300 mask |= INTVAL (operands[3]) << 2;
5301 mask |= 2 << 4;
5302 mask |= 3 << 6;
5303 operands[2] = GEN_INT (mask);
5304
5305 return "%vpshufd\t{%2, %1, %0|%0, %1, %2}";
5306 }
5307 [(set_attr "type" "sselog1")
5308 (set_attr "prefix_data16" "1")
5309 (set_attr "length_immediate" "1")
5310 (set_attr "mode" "TI")])
5311
5312 (define_insn "*mmx_pblendw64"
5313 [(set (match_operand:V4FI_64 0 "register_operand" "=Yr,*x,x")
5314 (vec_merge:V4FI_64
5315 (match_operand:V4FI_64 2 "register_operand" "Yr,*x,x")
5316 (match_operand:V4FI_64 1 "register_operand" "0,0,x")
5317 (match_operand:SI 3 "const_0_to_15_operand")))]
5318 "TARGET_SSE4_1 && TARGET_MMX_WITH_SSE"
5319 "@
5320 pblendw\t{%3, %2, %0|%0, %2, %3}
5321 pblendw\t{%3, %2, %0|%0, %2, %3}
5322 vpblendw\t{%3, %2, %1, %0|%0, %1, %2, %3}"
5323 [(set_attr "isa" "noavx,noavx,avx")
5324 (set_attr "type" "ssemov")
5325 (set_attr "prefix_extra" "1")
5326 (set_attr "length_immediate" "1")
5327 (set_attr "prefix" "orig,orig,vex")
5328 (set_attr "mode" "TI")])
5329
5330 (define_insn "*mmx_pblendw32"
5331 [(set (match_operand:V2FI_32 0 "register_operand" "=Yr,*x,x")
5332 (vec_merge:V2FI_32
5333 (match_operand:V2FI_32 2 "register_operand" "Yr,*x,x")
5334 (match_operand:V2FI_32 1 "register_operand" "0,0,x")
5335 (match_operand:SI 3 "const_0_to_7_operand")))]
5336 "TARGET_SSE4_1"
5337 "@
5338 pblendw\t{%3, %2, %0|%0, %2, %3}
5339 pblendw\t{%3, %2, %0|%0, %2, %3}
5340 vpblendw\t{%3, %2, %1, %0|%0, %1, %2, %3}"
5341 [(set_attr "isa" "noavx,noavx,avx")
5342 (set_attr "type" "ssemov")
5343 (set_attr "prefix_extra" "1")
5344 (set_attr "length_immediate" "1")
5345 (set_attr "prefix" "orig,orig,vex")
5346 (set_attr "mode" "TI")])
5347
5348 ;; Optimize V2SImode load from memory, swapping the elements and
5349 ;; storing back into the memory into DImode rotate of the memory by 32.
5350 (define_split
5351 [(set (match_operand:V2SI 0 "memory_operand")
5352 (vec_select:V2SI (match_dup 0)
5353 (parallel [(const_int 1) (const_int 0)])))]
5354 "TARGET_64BIT && (TARGET_READ_MODIFY_WRITE || optimize_insn_for_size_p ())"
5355 [(set (match_dup 0)
5356 (rotate:DI (match_dup 0) (const_int 32)))]
5357 "operands[0] = adjust_address (operands[0], DImode, 0);")
5358
5359 (define_insn "mmx_pswapdv2si2"
5360 [(set (match_operand:V2SI 0 "register_operand" "=y,Yv")
5361 (vec_select:V2SI
5362 (match_operand:V2SI 1 "register_mmxmem_operand" "ym,Yv")
5363 (parallel [(const_int 1) (const_int 0)])))]
5364 "TARGET_3DNOW_A"
5365 "@
5366 pswapd\t{%1, %0|%0, %1}
5367 %vpshufd\t{$0xe1, %1, %0|%0, %1, 0xe1}";
5368 [(set_attr "isa" "*,sse2")
5369 (set_attr "mmx_isa" "native,*")
5370 (set_attr "type" "mmxcvt,sselog1")
5371 (set_attr "prefix_extra" "1,*")
5372 (set_attr "length_immediate" "*,1")
5373 (set_attr "mode" "DI,TI")])
5374
5375 (define_insn "*vec_dupv4hi"
5376 [(set (match_operand:V4HI 0 "register_operand" "=y,Yw")
5377 (vec_duplicate:V4HI
5378 (truncate:HI
5379 (match_operand:SI 1 "register_operand" "0,Yw"))))]
5380 "(TARGET_MMX || TARGET_MMX_WITH_SSE)
5381 && (TARGET_SSE || TARGET_3DNOW_A)"
5382 "@
5383 pshufw\t{$0, %0, %0|%0, %0, 0}
5384 %vpshuflw\t{$0, %1, %0|%0, %1, 0}"
5385 [(set_attr "isa" "*,sse2")
5386 (set_attr "mmx_isa" "native,*")
5387 (set_attr "type" "mmxcvt,sselog1")
5388 (set_attr "length_immediate" "1")
5389 (set_attr "mode" "DI,TI")])
5390
5391 (define_insn "*vec_dup<mode>"
5392 [(set (match_operand:V4FI_64 0 "register_operand" "=Yw")
5393 (vec_duplicate:V4FI_64
5394 (match_operand:<mmxscalarmode> 1 "register_operand" "Yw")))]
5395 "TARGET_MMX_WITH_SSE"
5396 "%vpshuflw\t{$0, %1, %0|%0, %1, 0}"
5397 [(set_attr "isa" "sse2")
5398 (set_attr "type" "sselog1")
5399 (set_attr "length_immediate" "1")
5400 (set_attr "mode" "TI")])
5401
5402 (define_insn "*vec_dupv2si"
5403 [(set (match_operand:V2SI 0 "register_operand" "=y,Yv")
5404 (vec_duplicate:V2SI
5405 (match_operand:SI 1 "register_operand" "0,Yv")))]
5406 "TARGET_MMX || TARGET_MMX_WITH_SSE"
5407 "@
5408 punpckldq\t%0, %0
5409 %vpshufd\t{$0xe0, %1, %0|%0, %1, 0xe0}"
5410 [(set_attr "isa" "*,sse2")
5411 (set_attr "mmx_isa" "native,*")
5412 (set_attr "type" "mmxcvt,sselog1")
5413 (set_attr "prefix_data16" "*,1")
5414 (set_attr "length_immediate" "*,1")
5415 (set_attr "mode" "DI,TI")])
5416
5417 (define_insn "*mmx_concatv2si"
5418 [(set (match_operand:V2SI 0 "register_operand" "=y,y")
5419 (vec_concat:V2SI
5420 (match_operand:SI 1 "nonimmediate_operand" " 0,rm")
5421 (match_operand:SI 2 "nonimm_or_0_operand" "ym,C")))]
5422 "TARGET_MMX && !TARGET_SSE"
5423 "@
5424 punpckldq\t{%2, %0|%0, %2}
5425 movd\t{%1, %0|%0, %1}"
5426 [(set_attr "type" "mmxcvt,mmxmov")
5427 (set_attr "mode" "DI")])
5428
5429 (define_expand "vec_setv2si"
5430 [(match_operand:V2SI 0 "register_operand")
5431 (match_operand:SI 1 "register_operand")
5432 (match_operand 2 "vec_setm_mmx_operand")]
5433 "TARGET_MMX || TARGET_MMX_WITH_SSE"
5434 {
5435 if (CONST_INT_P (operands[2]))
5436 ix86_expand_vector_set (TARGET_MMX_WITH_SSE, operands[0], operands[1],
5437 INTVAL (operands[2]));
5438 else
5439 ix86_expand_vector_set_var (operands[0], operands[1], operands[2]);
5440 DONE;
5441 })
5442
5443 ;; Avoid combining registers from different units in a single alternative,
5444 ;; see comment above inline_secondary_memory_needed function in i386.cc
5445 (define_insn_and_split "*vec_extractv2si_0"
5446 [(set (match_operand:SI 0 "nonimmediate_operand" "=x,m,y, m,r,r")
5447 (vec_select:SI
5448 (match_operand:V2SI 1 "nonimmediate_operand" "xm,x,ym,y,m,x")
5449 (parallel [(const_int 0)])))]
5450 "(TARGET_MMX || TARGET_MMX_WITH_SSE)
5451 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
5452 "#"
5453 "&& reload_completed"
5454 [(set (match_dup 0) (match_dup 1))]
5455 "operands[1] = gen_lowpart (SImode, operands[1]);"
5456 [(set_attr "isa" "*,*,*,*,*,sse2")
5457 (set_attr "mmx_isa" "*,*,native,native,*,*")
5458 (set (attr "preferred_for_speed")
5459 (cond [(eq_attr "alternative" "5")
5460 (symbol_ref "TARGET_INTER_UNIT_MOVES_FROM_VEC")
5461 ]
5462 (symbol_ref "true")))])
5463
5464 (define_insn "*vec_extractv2si_0_zext_sse4"
5465 [(set (match_operand:DI 0 "register_operand" "=r,x")
5466 (zero_extend:DI
5467 (vec_select:SI
5468 (match_operand:V2SI 1 "register_operand" "x,x")
5469 (parallel [(const_int 0)]))))]
5470 "(TARGET_MMX || TARGET_MMX_WITH_SSE) && TARGET_SSE4_1"
5471 "#"
5472 [(set_attr "isa" "x64,*")
5473 (set (attr "preferred_for_speed")
5474 (cond [(eq_attr "alternative" "0")
5475 (symbol_ref "TARGET_INTER_UNIT_MOVES_FROM_VEC")
5476 ]
5477 (symbol_ref "true")))])
5478
5479 (define_insn "*vec_extractv2si_0_zext"
5480 [(set (match_operand:DI 0 "register_operand" "=r")
5481 (zero_extend:DI
5482 (vec_select:SI
5483 (match_operand:V2SI 1 "register_operand" "x")
5484 (parallel [(const_int 0)]))))]
5485 "(TARGET_MMX || TARGET_MMX_WITH_SSE)
5486 && TARGET_64BIT && TARGET_SSE2 && TARGET_INTER_UNIT_MOVES_FROM_VEC"
5487 "#")
5488
5489 (define_split
5490 [(set (match_operand:DI 0 "register_operand")
5491 (zero_extend:DI
5492 (vec_select:SI
5493 (match_operand:V2SI 1 "register_operand")
5494 (parallel [(const_int 0)]))))]
5495 "(TARGET_MMX || TARGET_MMX_WITH_SSE)
5496 && TARGET_SSE2 && reload_completed"
5497 [(set (match_dup 0) (zero_extend:DI (match_dup 1)))]
5498 "operands[1] = gen_lowpart (SImode, operands[1]);")
5499
5500 ;; Avoid combining registers from different units in a single alternative,
5501 ;; see comment above inline_secondary_memory_needed function in i386.cc
5502 (define_insn "*vec_extractv2si_1"
5503 [(set (match_operand:SI 0 "nonimmediate_operand" "=y,jrjm,rm,x,x,y,x,r")
5504 (vec_select:SI
5505 (match_operand:V2SI 1 "nonimmediate_operand" " 0,x, x ,x,0,o,o,o")
5506 (parallel [(const_int 1)])))]
5507 "(TARGET_MMX || TARGET_MMX_WITH_SSE)
5508 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
5509 "@
5510 punpckhdq\t%0, %0
5511 pextrd\t{$1, %1, %0|%0, %1, 1}
5512 vpextrd\t{$1, %1, %0|%0, %1, 1}
5513 %vpshufd\t{$0xe5, %1, %0|%0, %1, 0xe5}
5514 shufps\t{$0xe5, %0, %0|%0, %0, 0xe5}
5515 #
5516 #
5517 #"
5518 [(set_attr "isa" "*,sse4_noavx,avx,sse2,noavx,*,*,*")
5519 (set (attr "addr")
5520 (if_then_else (eq_attr "alternative" "1")
5521 (const_string "gpr16")
5522 (const_string "*")))
5523 (set_attr "mmx_isa" "native,*,*,*,*,native,*,*")
5524 (set_attr "type" "mmxcvt,ssemov,ssemov,sseshuf1,sseshuf1,mmxmov,ssemov,imov")
5525 (set (attr "length_immediate")
5526 (if_then_else (eq_attr "alternative" "1,2,3,4")
5527 (const_string "1")
5528 (const_string "*")))
5529 (set_attr "prefix" "orig,orig,maybe_evex,maybe_vex,orig,orig,orig,orig")
5530 (set_attr "mode" "DI,TI,TI,TI,V4SF,SI,SI,SI")])
5531
5532 (define_split
5533 [(set (match_operand:SI 0 "register_operand")
5534 (vec_select:SI
5535 (match_operand:V2SI 1 "memory_operand")
5536 (parallel [(const_int 1)])))]
5537 "(TARGET_MMX || TARGET_MMX_WITH_SSE) && reload_completed"
5538 [(set (match_dup 0) (match_dup 1))]
5539 "operands[1] = adjust_address (operands[1], SImode, 4);")
5540
5541 (define_insn "*vec_extractv2si_1_zext"
5542 [(set (match_operand:DI 0 "register_operand" "=jr,r")
5543 (zero_extend:DI
5544 (vec_select:SI
5545 (match_operand:V2SI 1 "register_operand" "x,x")
5546 (parallel [(const_int 1)]))))]
5547 "(TARGET_MMX || TARGET_MMX_WITH_SSE)
5548 && TARGET_64BIT && TARGET_SSE4_1"
5549 "%vpextrd\t{$1, %1, %k0|%k0, %1, 1}"
5550 [(set_attr "isa" "noavx,avx")
5551 (set_attr "type" "sselog1")
5552 (set_attr "prefix_extra" "1")
5553 (set_attr "length_immediate" "1")
5554 (set_attr "prefix" "maybe_vex")
5555 (set_attr "mode" "TI")])
5556
5557 (define_insn_and_split "*vec_extractv2si_zext_mem"
5558 [(set (match_operand:DI 0 "register_operand" "=y,x,r")
5559 (zero_extend:DI
5560 (vec_select:SI
5561 (match_operand:V2SI 1 "memory_operand" "o,o,o")
5562 (parallel [(match_operand:SI 2 "const_0_to_1_operand")]))))]
5563 "(TARGET_MMX || TARGET_MMX_WITH_SSE) && TARGET_64BIT"
5564 "#"
5565 "&& reload_completed"
5566 [(set (match_dup 0) (zero_extend:DI (match_dup 1)))]
5567 {
5568 operands[1] = adjust_address (operands[1], SImode, INTVAL (operands[2]) * 4);
5569 }
5570 [(set_attr "isa" "*,sse2,*")
5571 (set_attr "mmx_isa" "native,*,*")])
5572
5573 (define_expand "vec_extractv2sisi"
5574 [(match_operand:SI 0 "register_operand")
5575 (match_operand:V2SI 1 "register_operand")
5576 (match_operand 2 "const_int_operand")]
5577 "TARGET_MMX || TARGET_MMX_WITH_SSE"
5578 {
5579 ix86_expand_vector_extract (TARGET_MMX_WITH_SSE, operands[0],
5580 operands[1], INTVAL (operands[2]));
5581 DONE;
5582 })
5583
5584 (define_expand "vec_initv2sisi"
5585 [(match_operand:V2SI 0 "register_operand")
5586 (match_operand 1)]
5587 "(TARGET_MMX || TARGET_MMX_WITH_SSE) && TARGET_SSE"
5588 {
5589 ix86_expand_vector_init (TARGET_MMX_WITH_SSE, operands[0],
5590 operands[1]);
5591 DONE;
5592 })
5593
5594 (define_expand "vec_set<mode>"
5595 [(match_operand:V4FI_64 0 "register_operand")
5596 (match_operand:<mmxscalarmode> 1 "register_operand")
5597 (match_operand 2 "vec_setm_mmx_operand")]
5598 "TARGET_MMX || TARGET_MMX_WITH_SSE"
5599 {
5600 if (CONST_INT_P (operands[2]))
5601 ix86_expand_vector_set (TARGET_MMX_WITH_SSE, operands[0], operands[1],
5602 INTVAL (operands[2]));
5603 else
5604 ix86_expand_vector_set_var (operands[0], operands[1], operands[2]);
5605 DONE;
5606 })
5607
5608 (define_expand "vec_extract<mode><mmxscalarmodelower>"
5609 [(match_operand:<mmxscalarmode> 0 "register_operand")
5610 (match_operand:V4FI_64 1 "register_operand")
5611 (match_operand 2 "const_int_operand")]
5612 "TARGET_MMX || TARGET_MMX_WITH_SSE"
5613 {
5614 ix86_expand_vector_extract (TARGET_MMX_WITH_SSE, operands[0],
5615 operands[1], INTVAL (operands[2]));
5616 DONE;
5617 })
5618
5619 (define_expand "vec_initv4hihi"
5620 [(match_operand:V4HI 0 "register_operand")
5621 (match_operand 1)]
5622 "(TARGET_MMX || TARGET_MMX_WITH_SSE) && TARGET_SSE"
5623 {
5624 ix86_expand_vector_init (TARGET_MMX_WITH_SSE, operands[0],
5625 operands[1]);
5626 DONE;
5627 })
5628
5629 (define_expand "vec_init<mode><mmxscalarmodelower>"
5630 [(match_operand:V4F_64 0 "register_operand")
5631 (match_operand 1)]
5632 "TARGET_MMX_WITH_SSE"
5633 {
5634 ix86_expand_vector_init (TARGET_MMX_WITH_SSE, operands[0],
5635 operands[1]);
5636 DONE;
5637 })
5638
5639 (define_expand "vec_setv8qi"
5640 [(match_operand:V8QI 0 "register_operand")
5641 (match_operand:QI 1 "register_operand")
5642 (match_operand 2 "vec_setm_mmx_operand")]
5643 "TARGET_SSE4_1 && TARGET_MMX_WITH_SSE"
5644 {
5645 if (CONST_INT_P (operands[2]))
5646 ix86_expand_vector_set (TARGET_MMX_WITH_SSE, operands[0], operands[1],
5647 INTVAL (operands[2]));
5648 else
5649 ix86_expand_vector_set_var (operands[0], operands[1], operands[2]);
5650 DONE;
5651 })
5652
5653 (define_expand "vec_extractv8qiqi"
5654 [(match_operand:QI 0 "register_operand")
5655 (match_operand:V8QI 1 "register_operand")
5656 (match_operand 2 "const_int_operand")]
5657 "TARGET_SSE4_1 && TARGET_MMX_WITH_SSE"
5658 {
5659 ix86_expand_vector_extract (TARGET_MMX_WITH_SSE, operands[0],
5660 operands[1], INTVAL (operands[2]));
5661 DONE;
5662 })
5663
5664 (define_expand "vec_initv8qiqi"
5665 [(match_operand:V8QI 0 "register_operand")
5666 (match_operand 1)]
5667 "(TARGET_MMX || TARGET_MMX_WITH_SSE) && TARGET_SSE"
5668 {
5669 ix86_expand_vector_init (TARGET_MMX_WITH_SSE, operands[0],
5670 operands[1]);
5671 DONE;
5672 })
5673
5674 (define_insn "*pinsrw"
5675 [(set (match_operand:V2FI_32 0 "register_operand" "=x,YW,&x")
5676 (vec_merge:V2FI_32
5677 (vec_duplicate:V2FI_32
5678 (match_operand:<mmxscalarmode> 2 "nonimmediate_operand" "rm,rm,x"))
5679 (match_operand:V2FI_32 1 "register_operand" "0,YW,x")
5680 (match_operand:SI 3 "const_int_operand")))]
5681 "TARGET_SSE2
5682 && ((unsigned) exact_log2 (INTVAL (operands[3]))
5683 < GET_MODE_NUNITS (V2HImode))"
5684 {
5685 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
5686 switch (which_alternative)
5687 {
5688 case 2:
5689 return "#";
5690 case 1:
5691 if (MEM_P (operands[2]))
5692 return "vpinsrw\t{%3, %2, %1, %0|%0, %1, %2, %3}";
5693 else
5694 return "vpinsrw\t{%3, %k2, %1, %0|%0, %1, %k2, %3}";
5695 case 0:
5696 if (MEM_P (operands[2]))
5697 return "pinsrw\t{%3, %2, %0|%0, %2, %3}";
5698 else
5699 return "pinsrw\t{%3, %k2, %0|%0, %k2, %3}";
5700 default:
5701 gcc_unreachable ();
5702 }
5703 }
5704 [(set_attr "isa" "noavx,avx,sse4")
5705 (set_attr "type" "sselog")
5706 (set_attr "length_immediate" "1")
5707 (set_attr "mode" "TI")])
5708
5709 ;; For TARGET_SSE2, implement insert from XMM reg with PSHULFW + PBLENDW.
5710 (define_split
5711 [(set (match_operand:V2FI_32 0 "sse_reg_operand")
5712 (vec_merge:V2FI_32
5713 (vec_duplicate:V2FI_32
5714 (match_operand:<mmxscalarmode> 2 "sse_reg_operand"))
5715 (match_operand:V2FI_32 1 "sse_reg_operand")
5716 (match_operand:SI 3 "const_int_operand")))]
5717 "TARGET_SSE4_1 && reload_completed
5718 && ((unsigned) exact_log2 (INTVAL (operands[3]))
5719 < GET_MODE_NUNITS (<MODE>mode))"
5720 [(set (match_dup 0)
5721 (vec_duplicate:V2FI_32 (match_dup 2)))
5722 (set (match_dup 0)
5723 (vec_merge:V2FI_32 (match_dup 1) (match_dup 0) (match_dup 3)))]
5724 "operands[3] = GEN_INT (~INTVAL (operands[3]) & 0x3);")
5725
5726
5727 (define_insn "*pinsrb"
5728 [(set (match_operand:V4QI 0 "register_operand" "=x,YW")
5729 (vec_merge:V4QI
5730 (vec_duplicate:V4QI
5731 (match_operand:QI 2 "nonimmediate_operand" "jrjm,rm"))
5732 (match_operand:V4QI 1 "register_operand" "0,YW")
5733 (match_operand:SI 3 "const_int_operand")))]
5734 "TARGET_SSE4_1
5735 && ((unsigned) exact_log2 (INTVAL (operands[3]))
5736 < GET_MODE_NUNITS (V4QImode))"
5737 {
5738 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
5739 switch (which_alternative)
5740 {
5741 case 1:
5742 if (MEM_P (operands[2]))
5743 return "vpinsrb\t{%3, %2, %1, %0|%0, %1, %2, %3}";
5744 else
5745 return "vpinsrb\t{%3, %k2, %1, %0|%0, %1, %k2, %3}";
5746 case 0:
5747 if (MEM_P (operands[2]))
5748 return "pinsrb\t{%3, %2, %0|%0, %2, %3}";
5749 else
5750 return "pinsrb\t{%3, %k2, %0|%0, %k2, %3}";
5751 default:
5752 gcc_unreachable ();
5753 }
5754 }
5755 [(set_attr "isa" "noavx,avx")
5756 (set_attr "addr" "gpr16,*")
5757 (set_attr "type" "sselog")
5758 (set_attr "prefix_extra" "1")
5759 (set_attr "length_immediate" "1")
5760 (set_attr "prefix" "orig,vex")
5761 (set_attr "mode" "TI")])
5762
5763 (define_insn "*pextrw"
5764 [(set (match_operand:HI 0 "register_sse4nonimm_operand" "=r,jm,m")
5765 (vec_select:HI
5766 (match_operand:V2HI 1 "register_operand" "YW,YW,YW")
5767 (parallel [(match_operand:SI 2 "const_0_to_1_operand")])))]
5768 "TARGET_SSE2"
5769 "@
5770 %vpextrw\t{%2, %1, %k0|%k0, %1, %2}
5771 pextrw\t{%2, %1, %0|%0, %1, %2}
5772 vpextrw\t{%2, %1, %0|%0, %1, %2}"
5773 [(set_attr "isa" "*,sse4_noavx,avx")
5774 (set_attr "addr" "*,gpr16,*")
5775 (set_attr "type" "sselog1")
5776 (set_attr "length_immediate" "1")
5777 (set_attr "prefix" "maybe_vex")
5778 (set_attr "mode" "TI")])
5779
5780 (define_insn "*pextrw<mode>"
5781 [(set (match_operand:<mmxscalarmode> 0 "register_sse4nonimm_operand" "=?r,jm,m,x,Yw")
5782 (vec_select:<mmxscalarmode>
5783 (match_operand:V2F_32 1 "register_operand" "YW,YW,YW,0,YW")
5784 (parallel [(match_operand:SI 2 "const_0_to_1_operand")])))]
5785 "TARGET_SSE2"
5786 {
5787 switch (which_alternative)
5788 {
5789 case 0:
5790 return "%vpextrw\t{%2, %1, %k0|%k0, %1, %2}";
5791 case 1:
5792 return "pextrw\t{%2, %1, %0|%0, %1, %2}";
5793 case 2:
5794 return "vpextrw\t{%2, %1, %0|%0, %1, %2}";
5795 case 3:
5796 operands[2] = GEN_INT (INTVAL (operands[2]) * 2);
5797 return "psrldq\t{%2, %0|%0, %2}";
5798 case 4:
5799 operands[2] = GEN_INT (INTVAL (operands[2]) * 2);
5800 return "vpsrldq\t{%2, %1, %0|%0, %1, %2}";
5801
5802 default:
5803 gcc_unreachable ();
5804 }
5805 }
5806 [(set_attr "isa" "*,sse4_noavx,avx,noavx,avx")
5807 (set_attr "addr" "*,gpr16,*,*,*")
5808 (set_attr "type" "sselog1,sselog1,sselog1,sseishft1,sseishft1")
5809 (set_attr "length_immediate" "1")
5810 (set_attr "prefix" "maybe_vex,orig,maybe_evex,orig,maybe_evex")
5811 (set_attr "mode" "TI")])
5812
5813 (define_insn "*pextrw_zext"
5814 [(set (match_operand:SWI48 0 "register_operand" "=r")
5815 (zero_extend:SWI48
5816 (vec_select:HI
5817 (match_operand:V2HI 1 "register_operand" "YW")
5818 (parallel [(match_operand:SI 2 "const_0_to_1_operand")]))))]
5819 "TARGET_SSE2"
5820 "%vpextrw\t{%2, %1, %k0|%k0, %1, %2}"
5821 [(set_attr "type" "sselog1")
5822 (set_attr "length_immediate" "1")
5823 (set_attr "prefix" "maybe_vex")
5824 (set_attr "mode" "TI")])
5825
5826 (define_insn "*pextrb"
5827 [(set (match_operand:QI 0 "nonimmediate_operand" "=jr,jm,r,m")
5828 (vec_select:QI
5829 (match_operand:V4QI 1 "register_operand" "YW,YW,YW,YW")
5830 (parallel [(match_operand:SI 2 "const_0_to_3_operand")])))]
5831 "TARGET_SSE4_1"
5832 "@
5833 pextrb\t{%2, %1, %k0|%k0, %1, %2}
5834 pextrb\t{%2, %1, %0|%0, %1, %2}
5835 vpextrb\t{%2, %1, %k0|%k0, %1, %2}
5836 vpextrb\t{%2, %1, %0|%0, %1, %2}"
5837 [(set_attr "isa" "noavx,noavx,avx,avx")
5838 (set_attr "addr" "*,gpr16,*,*")
5839 (set_attr "type" "sselog1")
5840 (set_attr "prefix_extra" "1")
5841 (set_attr "length_immediate" "1")
5842 (set_attr "prefix" "maybe_vex")
5843 (set_attr "mode" "TI")])
5844
5845 (define_insn "*pextrb_zext"
5846 [(set (match_operand:SWI248 0 "register_operand" "=jr,r")
5847 (zero_extend:SWI248
5848 (vec_select:QI
5849 (match_operand:V4QI 1 "register_operand" "YW,YW")
5850 (parallel [(match_operand:SI 2 "const_0_to_3_operand")]))))]
5851 "TARGET_SSE4_1"
5852 "%vpextrb\t{%2, %1, %k0|%k0, %1, %2}"
5853 [(set_attr "isa" "noavx,avx")
5854 (set_attr "type" "sselog1")
5855 (set_attr "prefix_extra" "1")
5856 (set_attr "length_immediate" "1")
5857 (set_attr "prefix" "maybe_vex")
5858 (set_attr "mode" "TI")])
5859
5860 (define_expand "vec_set<mode>"
5861 [(match_operand:V2FI_32 0 "register_operand")
5862 (match_operand:<mmxscalarmode> 1 "register_operand")
5863 (match_operand 2 "vec_setm_sse41_operand")]
5864 "TARGET_SSE2"
5865 {
5866 if (CONST_INT_P (operands[2]))
5867 ix86_expand_vector_set (false, operands[0], operands[1],
5868 INTVAL (operands[2]));
5869 else
5870 ix86_expand_vector_set_var (operands[0], operands[1], operands[2]);
5871 DONE;
5872 })
5873
5874 (define_expand "vec_extract<mode><mmxscalarmodelower>"
5875 [(match_operand:<mmxscalarmode> 0 "register_operand")
5876 (match_operand:V2FI_32 1 "register_operand")
5877 (match_operand 2 "const_int_operand")]
5878 "TARGET_SSE2"
5879 {
5880 ix86_expand_vector_extract (false, operands[0],
5881 operands[1], INTVAL (operands[2]));
5882 DONE;
5883 })
5884
5885 (define_expand "vec_setv4qi"
5886 [(match_operand:V4QI 0 "register_operand")
5887 (match_operand:QI 1 "register_operand")
5888 (match_operand 2 "vec_setm_mmx_operand")]
5889 "TARGET_SSE4_1"
5890 {
5891 if (CONST_INT_P (operands[2]))
5892 ix86_expand_vector_set (false, operands[0], operands[1],
5893 INTVAL (operands[2]));
5894 else
5895 ix86_expand_vector_set_var (operands[0], operands[1], operands[2]);
5896 DONE;
5897 })
5898
5899 (define_expand "vec_extractv4qiqi"
5900 [(match_operand:QI 0 "register_operand")
5901 (match_operand:V4QI 1 "register_operand")
5902 (match_operand 2 "const_int_operand")]
5903 "TARGET_SSE4_1"
5904 {
5905 ix86_expand_vector_extract (false, operands[0],
5906 operands[1], INTVAL (operands[2]));
5907 DONE;
5908 })
5909
5910 (define_insn_and_split "*punpckwd"
5911 [(set (match_operand:V2FI_32 0 "register_operand" "=x,Yw")
5912 (vec_select:V2FI_32
5913 (vec_concat:<mmxdoublevecmode>
5914 (match_operand:V2FI_32 1 "register_operand" "0,Yw")
5915 (match_operand:V2FI_32 2 "register_operand" "x,Yw"))
5916 (parallel [(match_operand 3 "const_0_to_3_operand")
5917 (match_operand 4 "const_0_to_3_operand")])))]
5918 "TARGET_SSE2"
5919 "#"
5920 "&& reload_completed"
5921 [(set (match_dup 5)
5922 (vec_select:<mmxxmmmode>
5923 (match_dup 5)
5924 (parallel [(match_dup 3) (match_dup 4)
5925 (const_int 2) (const_int 3)
5926 (const_int 4) (const_int 5)
5927 (const_int 6) (const_int 7)])))]
5928 {
5929 rtx dest = lowpart_subreg (<mmxxmmmode>mode, operands[0], <MODE>mode);
5930 rtx op1 = lowpart_subreg (<mmxxmmmode>mode, operands[1], <MODE>mode);
5931 rtx op2 = lowpart_subreg (<mmxxmmmode>mode, operands[2], <MODE>mode);
5932
5933 emit_insn (gen_vec_interleave_low<mmxxmmmodelower> (dest, op1, op2));
5934
5935 static const int map[4] = { 0, 2, 1, 3 };
5936
5937 int sel0 = map[INTVAL (operands[3])];
5938 int sel1 = map[INTVAL (operands[4])];
5939
5940 if (sel0 == 0 && sel1 == 1)
5941 DONE;
5942
5943 operands[3] = GEN_INT (sel0);
5944 operands[4] = GEN_INT (sel1);
5945 operands[5] = dest;
5946 }
5947 [(set_attr "isa" "noavx,avx")
5948 (set_attr "type" "sselog")
5949 (set_attr "mode" "TI")])
5950
5951 (define_insn "*pshufw<mode>_1"
5952 [(set (match_operand:V2FI_32 0 "register_operand" "=Yw")
5953 (vec_select:V2FI_32
5954 (match_operand:V2FI_32 1 "register_operand" "Yw")
5955 (parallel [(match_operand 2 "const_0_to_1_operand")
5956 (match_operand 3 "const_0_to_1_operand")])))]
5957 "TARGET_SSE2"
5958 {
5959 int mask = 0;
5960 mask |= INTVAL (operands[2]) << 0;
5961 mask |= INTVAL (operands[3]) << 2;
5962 mask |= 2 << 4;
5963 mask |= 3 << 6;
5964 operands[2] = GEN_INT (mask);
5965
5966 return "%vpshuflw\t{%2, %1, %0|%0, %1, %2}";
5967 }
5968 [(set_attr "type" "sselog1")
5969 (set_attr "length_immediate" "1")
5970 (set_attr "mode" "TI")])
5971
5972 (define_insn "*vec_dupv2hi"
5973 [(set (match_operand:V2HI 0 "register_operand" "=Yw")
5974 (vec_duplicate:V2HI
5975 (truncate:HI
5976 (match_operand:SI 1 "register_operand" "Yw"))))]
5977 "TARGET_SSE2"
5978 "%vpshuflw\t{$0, %1, %0|%0, %1, 0}"
5979 [(set_attr "type" "sselog1")
5980 (set_attr "length_immediate" "1")
5981 (set_attr "mode" "TI")])
5982
5983 (define_insn "*vec_dup<mode>"
5984 [(set (match_operand:V2FI_32 0 "register_operand" "=Yw")
5985 (vec_duplicate:V2FI_32
5986 (match_operand:<mmxscalarmode> 1 "register_operand" "Yw")))]
5987 "TARGET_SSE2"
5988 "%vpshuflw\t{$0, %1, %0|%0, %1, 0}"
5989 [(set_attr "type" "sselog1")
5990 (set_attr "length_immediate" "1")
5991 (set_attr "mode" "TI")])
5992
5993 (define_expand "vec_init<mode><mmxscalarmodelower>"
5994 [(match_operand:V2FI_32 0 "register_operand")
5995 (match_operand 1)]
5996 "TARGET_SSE2"
5997 {
5998 ix86_expand_vector_init (TARGET_MMX_WITH_SSE, operands[0],
5999 operands[1]);
6000 DONE;
6001 })
6002
6003 (define_expand "vec_initv4qiqi"
6004 [(match_operand:V2HI 0 "register_operand")
6005 (match_operand 1)]
6006 "TARGET_SSE2"
6007 {
6008 ix86_expand_vector_init (TARGET_MMX_WITH_SSE, operands[0],
6009 operands[1]);
6010 DONE;
6011 })
6012
6013 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6014 ;;
6015 ;; Miscellaneous
6016 ;;
6017 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6018
6019 (define_expand "mmx_uavg<mode>3"
6020 [(set (match_operand:MMXMODE12 0 "register_operand")
6021 (truncate:MMXMODE12
6022 (lshiftrt:<mmxdoublemode>
6023 (plus:<mmxdoublemode>
6024 (plus:<mmxdoublemode>
6025 (zero_extend:<mmxdoublemode>
6026 (match_operand:MMXMODE12 1 "register_mmxmem_operand"))
6027 (zero_extend:<mmxdoublemode>
6028 (match_operand:MMXMODE12 2 "register_mmxmem_operand")))
6029 (match_dup 3))
6030 (const_int 1))))]
6031 "(TARGET_MMX || TARGET_MMX_WITH_SSE)
6032 && (TARGET_SSE || TARGET_3DNOW)"
6033 {
6034 operands[3] = CONST1_RTX(<mmxdoublemode>mode);
6035 ix86_fixup_binary_operands_no_copy (PLUS, <MODE>mode, operands);
6036 })
6037
6038 (define_insn "*mmx_uavgv8qi3"
6039 [(set (match_operand:V8QI 0 "register_operand" "=y,x,Yw")
6040 (truncate:V8QI
6041 (lshiftrt:V8HI
6042 (plus:V8HI
6043 (plus:V8HI
6044 (zero_extend:V8HI
6045 (match_operand:V8QI 1 "register_mmxmem_operand" "%0,0,Yw"))
6046 (zero_extend:V8HI
6047 (match_operand:V8QI 2 "register_mmxmem_operand" "ym,x,Yw")))
6048 (const_vector:V8HI [(const_int 1) (const_int 1)
6049 (const_int 1) (const_int 1)
6050 (const_int 1) (const_int 1)
6051 (const_int 1) (const_int 1)]))
6052 (const_int 1))))]
6053 "(TARGET_MMX || TARGET_MMX_WITH_SSE)
6054 && (TARGET_SSE || TARGET_3DNOW)
6055 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
6056 {
6057 switch (which_alternative)
6058 {
6059 case 2:
6060 return "vpavgb\t{%2, %1, %0|%0, %1, %2}";
6061 case 1:
6062 case 0:
6063 /* These two instructions have the same operation, but their encoding
6064 is different. Prefer the one that is de facto standard. */
6065 if (TARGET_SSE || TARGET_3DNOW_A)
6066 return "pavgb\t{%2, %0|%0, %2}";
6067 else
6068 return "pavgusb\t{%2, %0|%0, %2}";
6069 default:
6070 gcc_unreachable ();
6071 }
6072 }
6073 [(set_attr "isa" "*,sse2_noavx,avx")
6074 (set_attr "mmx_isa" "native,*,*")
6075 (set_attr "type" "mmxshft,sseiadd,sseiadd")
6076 (set (attr "prefix_extra")
6077 (if_then_else
6078 (not (ior (match_test "TARGET_SSE")
6079 (match_test "TARGET_3DNOW_A")))
6080 (const_string "1")
6081 (const_string "*")))
6082 (set_attr "mode" "DI,TI,TI")])
6083
6084 (define_insn "*mmx_uavgv4hi3"
6085 [(set (match_operand:V4HI 0 "register_operand" "=y,x,Yw")
6086 (truncate:V4HI
6087 (lshiftrt:V4SI
6088 (plus:V4SI
6089 (plus:V4SI
6090 (zero_extend:V4SI
6091 (match_operand:V4HI 1 "register_mmxmem_operand" "%0,0,Yw"))
6092 (zero_extend:V4SI
6093 (match_operand:V4HI 2 "register_mmxmem_operand" "ym,x,Yw")))
6094 (const_vector:V4SI [(const_int 1) (const_int 1)
6095 (const_int 1) (const_int 1)]))
6096 (const_int 1))))]
6097 "(TARGET_MMX || TARGET_MMX_WITH_SSE)
6098 && (TARGET_SSE || TARGET_3DNOW_A)
6099 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
6100 "@
6101 pavgw\t{%2, %0|%0, %2}
6102 pavgw\t{%2, %0|%0, %2}
6103 vpavgw\t{%2, %1, %0|%0, %1, %2}"
6104 [(set_attr "isa" "*,sse2_noavx,avx")
6105 (set_attr "mmx_isa" "native,*,*")
6106 (set_attr "type" "mmxshft,sseiadd,sseiadd")
6107 (set_attr "mode" "DI,TI,TI")])
6108
6109 (define_expand "uavg<mode>3_ceil"
6110 [(set (match_operand:MMXMODE12 0 "register_operand")
6111 (truncate:MMXMODE12
6112 (lshiftrt:<mmxdoublemode>
6113 (plus:<mmxdoublemode>
6114 (plus:<mmxdoublemode>
6115 (zero_extend:<mmxdoublemode>
6116 (match_operand:MMXMODE12 1 "register_operand"))
6117 (zero_extend:<mmxdoublemode>
6118 (match_operand:MMXMODE12 2 "register_operand")))
6119 (match_dup 3))
6120 (const_int 1))))]
6121 "TARGET_MMX_WITH_SSE"
6122 "operands[3] = CONST1_RTX(<mmxdoublemode>mode);")
6123
6124 (define_insn "uavgv4qi3_ceil"
6125 [(set (match_operand:V4QI 0 "register_operand" "=x,Yw")
6126 (truncate:V4QI
6127 (lshiftrt:V4HI
6128 (plus:V4HI
6129 (plus:V4HI
6130 (zero_extend:V4HI
6131 (match_operand:V4QI 1 "register_operand" "%0,Yw"))
6132 (zero_extend:V4HI
6133 (match_operand:V4QI 2 "register_operand" "x,Yw")))
6134 (const_vector:V4HI [(const_int 1) (const_int 1)
6135 (const_int 1) (const_int 1)]))
6136 (const_int 1))))]
6137 "TARGET_SSE2"
6138 "@
6139 pavgb\t{%2, %0|%0, %2}
6140 vpavgb\t{%2, %1, %0|%0, %1, %2}"
6141 [(set_attr "isa" "noavx,avx")
6142 (set_attr "type" "sseiadd")
6143 (set_attr "mode" "TI")])
6144
6145 (define_insn "uavgv2qi3_ceil"
6146 [(set (match_operand:V2QI 0 "register_operand" "=x,Yw")
6147 (truncate:V2QI
6148 (lshiftrt:V2HI
6149 (plus:V2HI
6150 (plus:V2HI
6151 (zero_extend:V2HI
6152 (match_operand:V2QI 1 "register_operand" "%0,Yw"))
6153 (zero_extend:V2HI
6154 (match_operand:V2QI 2 "register_operand" "x,Yw")))
6155 (const_vector:V2HI [(const_int 1) (const_int 1)]))
6156 (const_int 1))))]
6157 "TARGET_SSE2"
6158 "@
6159 pavgb\t{%2, %0|%0, %2}
6160 vpavgb\t{%2, %1, %0|%0, %1, %2}"
6161 [(set_attr "isa" "noavx,avx")
6162 (set_attr "type" "sseiadd")
6163 (set_attr "mode" "TI")])
6164
6165 (define_insn "uavgv2hi3_ceil"
6166 [(set (match_operand:V2HI 0 "register_operand" "=x,Yw")
6167 (truncate:V2HI
6168 (lshiftrt:V2SI
6169 (plus:V2SI
6170 (plus:V2SI
6171 (zero_extend:V2SI
6172 (match_operand:V2HI 1 "register_operand" "%0,Yw"))
6173 (zero_extend:V2SI
6174 (match_operand:V2HI 2 "register_operand" "x,Yw")))
6175 (const_vector:V2SI [(const_int 1) (const_int 1)]))
6176 (const_int 1))))]
6177 "TARGET_SSE2"
6178 "@
6179 pavgw\t{%2, %0|%0, %2}
6180 vpavgw\t{%2, %1, %0|%0, %1, %2}"
6181 [(set_attr "isa" "noavx,avx")
6182 (set_attr "type" "sseiadd")
6183 (set_attr "mode" "TI")])
6184
6185 (define_expand "mmx_psadbw"
6186 [(set (match_operand:V1DI 0 "register_operand")
6187 (unspec:V1DI [(match_operand:V8QI 1 "register_mmxmem_operand")
6188 (match_operand:V8QI 2 "register_mmxmem_operand")]
6189 UNSPEC_PSADBW))]
6190 "(TARGET_MMX || TARGET_MMX_WITH_SSE) && (TARGET_SSE || TARGET_3DNOW_A)"
6191 "ix86_fixup_binary_operands_no_copy (PLUS, V8QImode, operands);")
6192
6193 (define_insn "*mmx_psadbw"
6194 [(set (match_operand:V1DI 0 "register_operand" "=y,x,Yw")
6195 (unspec:V1DI [(match_operand:V8QI 1 "register_mmxmem_operand" "%0,0,Yw")
6196 (match_operand:V8QI 2 "register_mmxmem_operand" "ym,x,Yw")]
6197 UNSPEC_PSADBW))]
6198 "(TARGET_MMX || TARGET_MMX_WITH_SSE) && (TARGET_SSE || TARGET_3DNOW_A)
6199 && ix86_binary_operator_ok (PLUS, V8QImode, operands)"
6200 "@
6201 psadbw\t{%2, %0|%0, %2}
6202 psadbw\t{%2, %0|%0, %2}
6203 vpsadbw\t{%2, %1, %0|%0, %1, %2}"
6204 [(set_attr "isa" "*,sse2_noavx,avx")
6205 (set_attr "mmx_isa" "native,*,*")
6206 (set_attr "type" "mmxshft,sseiadd,sseiadd")
6207 (set_attr "mode" "DI,TI,TI")])
6208
6209 (define_expand "reduc_<code>_scal_<mode>"
6210 [(any_logic:MMXMODE12
6211 (match_operand:<mmxscalarmode> 0 "register_operand")
6212 (match_operand:MMXMODE12 1 "register_operand"))]
6213 "TARGET_MMX_WITH_SSE"
6214 {
6215 rtx tmp = gen_reg_rtx (<MODE>mode);
6216 ix86_expand_reduc (gen_<code><mode>3, tmp, operands[1]);
6217 emit_insn (gen_vec_extract<mode><mmxscalarmodelower> (operands[0],
6218 tmp, const0_rtx));
6219 DONE;
6220 })
6221
6222 (define_expand "reduc_<code>_scal_v4qi"
6223 [(any_logic:V4QI
6224 (match_operand:QI 0 "register_operand")
6225 (match_operand:V4QI 1 "register_operand"))]
6226 "TARGET_SSE2"
6227 {
6228 rtx tmp = gen_reg_rtx (V4QImode);
6229 ix86_expand_reduc (gen_<code>v4qi3, tmp, operands[1]);
6230 emit_insn (gen_vec_extractv4qiqi (operands[0], tmp, const0_rtx));
6231 DONE;
6232 })
6233
6234 (define_expand "reduc_plus_scal_v8qi"
6235 [(plus:V8QI
6236 (match_operand:QI 0 "register_operand")
6237 (match_operand:V8QI 1 "register_operand"))]
6238 "TARGET_MMX_WITH_SSE"
6239 {
6240 rtx tmp = gen_reg_rtx (V8QImode);
6241 emit_move_insn (tmp, CONST0_RTX (V8QImode));
6242 rtx tmp2 = gen_reg_rtx (V1DImode);
6243 emit_insn (gen_mmx_psadbw (tmp2, operands[1], tmp));
6244 tmp2 = gen_lowpart (V8QImode, tmp2);
6245 emit_insn (gen_vec_extractv8qiqi (operands[0], tmp2, const0_rtx));
6246 DONE;
6247 })
6248
6249 (define_expand "reduc_plus_scal_v4hi"
6250 [(plus:V4HI
6251 (match_operand:HI 0 "register_operand")
6252 (match_operand:V4HI 1 "register_operand"))]
6253 "TARGET_MMX_WITH_SSE"
6254 {
6255 rtx tmp = gen_reg_rtx (V4HImode);
6256 ix86_expand_reduc (gen_addv4hi3, tmp, operands[1]);
6257 emit_insn (gen_vec_extractv4hihi (operands[0], tmp, const0_rtx));
6258 DONE;
6259 })
6260
6261 (define_expand "reduc_<code>_scal_v4hi"
6262 [(smaxmin:V4HI
6263 (match_operand:HI 0 "register_operand")
6264 (match_operand:V4HI 1 "register_operand"))]
6265 "TARGET_MMX_WITH_SSE"
6266 {
6267 rtx tmp = gen_reg_rtx (V4HImode);
6268 ix86_expand_reduc (gen_<code>v4hi3, tmp, operands[1]);
6269 emit_insn (gen_vec_extractv4hihi (operands[0], tmp, const0_rtx));
6270 DONE;
6271 })
6272
6273 (define_expand "reduc_<code>_scal_v4qi"
6274 [(smaxmin:V4QI
6275 (match_operand:QI 0 "register_operand")
6276 (match_operand:V4QI 1 "register_operand"))]
6277 "TARGET_SSE4_1"
6278 {
6279 rtx tmp = gen_reg_rtx (V4QImode);
6280 ix86_expand_reduc (gen_<code>v4qi3, tmp, operands[1]);
6281 emit_insn (gen_vec_extractv4qiqi (operands[0], tmp, const0_rtx));
6282 DONE;
6283 })
6284
6285 (define_expand "reduc_<code>_scal_v4hi"
6286 [(umaxmin:V4HI
6287 (match_operand:HI 0 "register_operand")
6288 (match_operand:V4HI 1 "register_operand"))]
6289 "TARGET_MMX_WITH_SSE && TARGET_SSE4_1"
6290 {
6291 rtx tmp = gen_reg_rtx (V4HImode);
6292 ix86_expand_reduc (gen_<code>v4hi3, tmp, operands[1]);
6293 emit_insn (gen_vec_extractv4hihi (operands[0], tmp, const0_rtx));
6294 DONE;
6295 })
6296
6297 (define_expand "reduc_<code>_scal_v4qi"
6298 [(umaxmin:V4QI
6299 (match_operand:QI 0 "register_operand")
6300 (match_operand:V4QI 1 "register_operand"))]
6301 "TARGET_SSE4_1"
6302 {
6303 rtx tmp = gen_reg_rtx (V4QImode);
6304 ix86_expand_reduc (gen_<code>v4qi3, tmp, operands[1]);
6305 emit_insn (gen_vec_extractv4qiqi (operands[0], tmp, const0_rtx));
6306 DONE;
6307 })
6308
6309 (define_expand "reduc_plus_scal_v4qi"
6310 [(plus:V4QI
6311 (match_operand:QI 0 "register_operand")
6312 (match_operand:V4QI 1 "register_operand"))]
6313 "TARGET_SSE2"
6314 {
6315 rtx op1 = gen_reg_rtx (V16QImode);
6316 emit_insn (gen_vec_setv4si_0 (lowpart_subreg (V4SImode, op1, V16QImode),
6317 CONST0_RTX (V4SImode),
6318 lowpart_subreg (SImode,
6319 operands[1],
6320 V4QImode)));
6321 rtx tmp = gen_reg_rtx (V16QImode);
6322 emit_move_insn (tmp, CONST0_RTX (V16QImode));
6323 rtx tmp2 = gen_reg_rtx (V2DImode);
6324 emit_insn (gen_sse2_psadbw (tmp2, op1, tmp));
6325 tmp2 = gen_lowpart (V16QImode, tmp2);
6326 emit_insn (gen_vec_extractv16qiqi (operands[0], tmp2, const0_rtx));
6327 DONE;
6328 })
6329
6330 (define_expand "usadv8qi"
6331 [(match_operand:V2SI 0 "register_operand")
6332 (match_operand:V8QI 1 "register_operand")
6333 (match_operand:V8QI 2 "register_operand")
6334 (match_operand:V2SI 3 "register_operand")]
6335 "TARGET_MMX_WITH_SSE"
6336 {
6337 rtx t1 = gen_reg_rtx (V1DImode);
6338 rtx t2 = gen_reg_rtx (V2SImode);
6339 emit_insn (gen_mmx_psadbw (t1, operands[1], operands[2]));
6340 convert_move (t2, t1, 0);
6341 emit_insn (gen_addv2si3 (operands[0], t2, operands[3]));
6342 DONE;
6343 })
6344
6345 (define_insn_and_split "mmx_pmovmskb"
6346 [(set (match_operand:SI 0 "register_operand" "=r,r,jr")
6347 (unspec:SI [(match_operand:V8QI 1 "register_operand" "y,x,x")]
6348 UNSPEC_MOVMSK))]
6349 "(TARGET_MMX || TARGET_MMX_WITH_SSE)
6350 && (TARGET_SSE || TARGET_3DNOW_A)"
6351 "@
6352 pmovmskb\t{%1, %0|%0, %1}
6353 #
6354 #"
6355 "TARGET_SSE2 && reload_completed
6356 && SSE_REGNO_P (REGNO (operands[1]))"
6357 [(set (match_dup 0)
6358 (unspec:SI [(match_dup 1)] UNSPEC_MOVMSK))
6359 (set (match_dup 0)
6360 (zero_extend:SI (match_dup 2)))]
6361 {
6362 /* Generate SSE pmovmskb and zero-extend from QImode to SImode. */
6363 operands[1] = lowpart_subreg (V16QImode, operands[1],
6364 GET_MODE (operands[1]));
6365 operands[2] = lowpart_subreg (QImode, operands[0],
6366 GET_MODE (operands[0]));
6367 }
6368 [(set_attr "mmx_isa" "native,sse_noavx,avx")
6369 (set_attr "type" "mmxcvt,ssemov,ssemov")
6370 (set_attr "mode" "DI,TI,TI")])
6371
6372 (define_expand "mmx_maskmovq"
6373 [(set (match_operand:V8QI 0 "memory_operand")
6374 (unspec:V8QI [(match_operand:V8QI 1 "register_operand")
6375 (match_operand:V8QI 2 "register_operand")
6376 (match_dup 0)]
6377 UNSPEC_MASKMOV))]
6378 "TARGET_SSE || TARGET_3DNOW_A")
6379
6380 (define_insn "*mmx_maskmovq"
6381 [(set (mem:V8QI (match_operand:P 0 "register_operand" "D"))
6382 (unspec:V8QI [(match_operand:V8QI 1 "register_operand" "y")
6383 (match_operand:V8QI 2 "register_operand" "y")
6384 (mem:V8QI (match_dup 0))]
6385 UNSPEC_MASKMOV))]
6386 "TARGET_SSE || TARGET_3DNOW_A"
6387 ;; @@@ check ordering of operands in intel/nonintel syntax
6388 "maskmovq\t{%2, %1|%1, %2}"
6389 [(set_attr "type" "mmxcvt")
6390 (set_attr "znver1_decode" "vector")
6391 (set_attr "mode" "DI")])
6392
6393 (define_int_iterator EMMS
6394 [(UNSPECV_EMMS "TARGET_MMX")
6395 (UNSPECV_FEMMS "TARGET_3DNOW")])
6396
6397 (define_int_attr emms
6398 [(UNSPECV_EMMS "emms")
6399 (UNSPECV_FEMMS "femms")])
6400
6401 (define_expand "mmx_<emms>"
6402 [(parallel
6403 [(unspec_volatile [(const_int 0)] EMMS)
6404 (clobber (reg:XF ST0_REG))
6405 (clobber (reg:XF ST1_REG))
6406 (clobber (reg:XF ST2_REG))
6407 (clobber (reg:XF ST3_REG))
6408 (clobber (reg:XF ST4_REG))
6409 (clobber (reg:XF ST5_REG))
6410 (clobber (reg:XF ST6_REG))
6411 (clobber (reg:XF ST7_REG))
6412 (clobber (reg:DI MM0_REG))
6413 (clobber (reg:DI MM1_REG))
6414 (clobber (reg:DI MM2_REG))
6415 (clobber (reg:DI MM3_REG))
6416 (clobber (reg:DI MM4_REG))
6417 (clobber (reg:DI MM5_REG))
6418 (clobber (reg:DI MM6_REG))
6419 (clobber (reg:DI MM7_REG))])]
6420 "TARGET_MMX || TARGET_MMX_WITH_SSE"
6421 {
6422 if (!TARGET_MMX)
6423 {
6424 emit_insn (gen_nop ());
6425 DONE;
6426 }
6427 })
6428
6429 (define_insn "*mmx_<emms>"
6430 [(unspec_volatile [(const_int 0)] EMMS)
6431 (clobber (reg:XF ST0_REG))
6432 (clobber (reg:XF ST1_REG))
6433 (clobber (reg:XF ST2_REG))
6434 (clobber (reg:XF ST3_REG))
6435 (clobber (reg:XF ST4_REG))
6436 (clobber (reg:XF ST5_REG))
6437 (clobber (reg:XF ST6_REG))
6438 (clobber (reg:XF ST7_REG))
6439 (clobber (reg:DI MM0_REG))
6440 (clobber (reg:DI MM1_REG))
6441 (clobber (reg:DI MM2_REG))
6442 (clobber (reg:DI MM3_REG))
6443 (clobber (reg:DI MM4_REG))
6444 (clobber (reg:DI MM5_REG))
6445 (clobber (reg:DI MM6_REG))
6446 (clobber (reg:DI MM7_REG))]
6447 ""
6448 "<emms>"
6449 [(set_attr "type" "mmx")
6450 (set_attr "modrm" "0")
6451 (set_attr "memory" "none")])