]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/config/i386/mmx.md
i386: Do not sanitize upper part of V2SFmode reg with -fno-trapping-math [PR110832]
[thirdparty/gcc.git] / gcc / config / i386 / mmx.md
1 ;; GCC machine description for MMX and 3dNOW! instructions
2 ;; Copyright (C) 2005-2023 Free Software Foundation, Inc.
3 ;;
4 ;; This file is part of GCC.
5 ;;
6 ;; GCC is free software; you can redistribute it and/or modify
7 ;; it under the terms of the GNU General Public License as published by
8 ;; the Free Software Foundation; either version 3, or (at your option)
9 ;; any later version.
10 ;;
11 ;; GCC is distributed in the hope that it will be useful,
12 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
13 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 ;; GNU General Public License for more details.
15 ;;
16 ;; You should have received a copy of the GNU General Public License
17 ;; along with GCC; see the file COPYING3. If not see
18 ;; <http://www.gnu.org/licenses/>.
19
20 ;; The MMX and 3dNOW! patterns are in the same file because they use
21 ;; the same register file, and 3dNOW! adds a number of extensions to
22 ;; the base integer MMX isa.
23
24 ;; Note! Except for the basic move instructions, *all* of these
25 ;; patterns are outside the normal optabs namespace. This is because
26 ;; use of these registers requires the insertion of emms or femms
27 ;; instructions to return to normal fpu mode. The compiler doesn't
28 ;; know how to do that itself, which means it's up to the user. Which
29 ;; means that we should never use any of these patterns except at the
30 ;; direction of the user via a builtin.
31
32 (define_c_enum "unspec" [
33 UNSPEC_MOVNTQ
34 UNSPEC_PFRCP
35 UNSPEC_PFRCPIT1
36 UNSPEC_PFRCPIT2
37 UNSPEC_PFRSQRT
38 UNSPEC_PFRSQIT1
39 ])
40
41 (define_c_enum "unspecv" [
42 UNSPECV_EMMS
43 UNSPECV_FEMMS
44 ])
45
46 ;; 8 byte integral modes handled by MMX (and by extension, SSE)
47 (define_mode_iterator MMXMODEI [V8QI V4HI V2SI])
48 (define_mode_iterator MMXMODEI8 [V8QI V4HI V2SI (V1DI "TARGET_SSE2")])
49
50 ;; All 8-byte vector modes handled by MMX
51 (define_mode_iterator MMXMODE [V8QI V4HI V2SI V1DI V2SF V4HF V4BF])
52 (define_mode_iterator MMXMODE124 [V8QI V4HI V2SI V2SF])
53
54 ;; Mix-n-match
55 (define_mode_iterator MMXMODE12 [V8QI V4HI])
56 (define_mode_iterator MMXMODE14 [V8QI V2SI])
57 (define_mode_iterator MMXMODE24 [V4HI V2SI])
58 (define_mode_iterator MMXMODE248 [V4HI V2SI V1DI])
59
60 ;; All 4-byte integer/float16 vector modes
61 (define_mode_iterator V_32 [V4QI V2HI V1SI V2HF V2BF])
62
63 ;; 4-byte integer vector modes
64 (define_mode_iterator VI_32 [V4QI V2HI])
65
66 ;; 4-byte and 2-byte integer vector modes
67 (define_mode_iterator VI_16_32 [V4QI V2QI V2HI])
68
69 ;; 4-byte and 2-byte QImode vector modes
70 (define_mode_iterator VI1_16_32 [V4QI V2QI])
71
72 ;; All 2-byte, 4-byte and 8-byte vector modes with more than 1 element
73 (define_mode_iterator V_16_32_64
74 [V2QI V4QI V2HI V2HF
75 (V8QI "TARGET_64BIT") (V4HI "TARGET_64BIT")
76 (V4HF "TARGET_64BIT") (V4BF "TARGET_64BIT")
77 (V2SI "TARGET_64BIT") (V2SF "TARGET_64BIT")])
78
79 ;; V2S* modes
80 (define_mode_iterator V2FI [V2SF V2SI])
81
82 (define_mode_iterator V2FI_V4HF [V2SF V2SI V4HF])
83 ;; Mapping from integer vector mode to mnemonic suffix
84 (define_mode_attr mmxvecsize
85 [(V8QI "b") (V4QI "b") (V2QI "b")
86 (V4HI "w") (V2HI "w") (V2SI "d") (V1DI "q")])
87
88 ;; Mapping to same size integral mode.
89 (define_mode_attr mmxinsnmode
90 [(V8QI "DI") (V4QI "SI") (V2QI "HI")
91 (V4HI "DI") (V2HI "SI")
92 (V2SI "DI")
93 (V4HF "DI") (V2HF "SI")
94 (V4BF "DI") (V2BF "SI")
95 (V2SF "DI")])
96
97 (define_mode_attr mmxdoublemode
98 [(V8QI "V8HI") (V4HI "V4SI")])
99
100 ;; Mapping of vector float modes to an integer mode of the same size
101 (define_mode_attr mmxintvecmode
102 [(V2SF "V2SI") (V2SI "V2SI") (V4HI "V4HI") (V8QI "V8QI")])
103
104 (define_mode_attr mmxintvecmodelower
105 [(V2SF "v2si") (V2SI "v2si") (V4HI "v4hi") (V8QI "v8qi")])
106
107 ;; Mapping of vector modes to a vector mode of double size
108 (define_mode_attr mmxdoublevecmode
109 [(V2SF "V4SF") (V2SI "V4SI") (V4HF "V8HF")])
110
111 ;; Mapping of vector modes back to the scalar modes
112 (define_mode_attr mmxscalarmode
113 [(V2SI "SI") (V2SF "SF")])
114
115 (define_mode_attr Yv_Yw
116 [(V8QI "Yw") (V4HI "Yw") (V2SI "Yv") (V1DI "Yv") (V2SF "Yv")])
117
118 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
119 ;;
120 ;; Move patterns
121 ;;
122 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
123
124 ;; All of these patterns are enabled for MMX as well as 3dNOW.
125 ;; This is essential for maintaining stable calling conventions.
126
127 (define_expand "mov<mode>"
128 [(set (match_operand:MMXMODE 0 "nonimmediate_operand")
129 (match_operand:MMXMODE 1 "nonimmediate_operand"))]
130 "TARGET_MMX || TARGET_MMX_WITH_SSE"
131 {
132 ix86_expand_vector_move (<MODE>mode, operands);
133 DONE;
134 })
135
136 (define_insn "*mov<mode>_internal"
137 [(set (match_operand:MMXMODE 0 "nonimmediate_operand"
138 "=r ,o ,r,r ,m ,?!y,!y,?!y,m ,r ,?!y,v,v,v,m,r,v,!y,*x")
139 (match_operand:MMXMODE 1 "nonimm_or_0_operand"
140 "rCo,rC,C,rm,rC,C ,!y,m ,?!y,?!y,r ,C,v,m,v,v,r,*x,!y"))]
141 "(TARGET_MMX || TARGET_MMX_WITH_SSE)
142 && !(MEM_P (operands[0]) && MEM_P (operands[1]))
143 && ix86_hardreg_mov_ok (operands[0], operands[1])"
144 {
145 switch (get_attr_type (insn))
146 {
147 case TYPE_MULTI:
148 return "#";
149
150 case TYPE_IMOV:
151 if (get_attr_mode (insn) == MODE_SI)
152 return "mov{l}\t{%1, %k0|%k0, %1}";
153 else
154 return "mov{q}\t{%1, %0|%0, %1}";
155
156 case TYPE_MMX:
157 return "pxor\t%0, %0";
158
159 case TYPE_MMXMOV:
160 /* Handle broken assemblers that require movd instead of movq. */
161 if (!HAVE_AS_IX86_INTERUNIT_MOVQ
162 && (GENERAL_REG_P (operands[0]) || GENERAL_REG_P (operands[1])))
163 return "movd\t{%1, %0|%0, %1}";
164 return "movq\t{%1, %0|%0, %1}";
165
166 case TYPE_SSECVT:
167 if (SSE_REG_P (operands[0]))
168 return "movq2dq\t{%1, %0|%0, %1}";
169 else
170 return "movdq2q\t{%1, %0|%0, %1}";
171
172 case TYPE_SSELOG1:
173 return standard_sse_constant_opcode (insn, operands);
174
175 case TYPE_SSEMOV:
176 return ix86_output_ssemov (insn, operands);
177
178 default:
179 gcc_unreachable ();
180 }
181 }
182 [(set (attr "isa")
183 (cond [(eq_attr "alternative" "0,1")
184 (const_string "nox64")
185 (eq_attr "alternative" "2,3,4,9,10")
186 (const_string "x64")
187 (eq_attr "alternative" "15,16")
188 (const_string "x64_sse2")
189 (eq_attr "alternative" "17,18")
190 (const_string "sse2")
191 ]
192 (const_string "*")))
193 (set (attr "type")
194 (cond [(eq_attr "alternative" "0,1")
195 (const_string "multi")
196 (eq_attr "alternative" "2,3,4")
197 (const_string "imov")
198 (eq_attr "alternative" "5")
199 (const_string "mmx")
200 (eq_attr "alternative" "6,7,8,9,10")
201 (const_string "mmxmov")
202 (eq_attr "alternative" "11")
203 (const_string "sselog1")
204 (eq_attr "alternative" "17,18")
205 (const_string "ssecvt")
206 ]
207 (const_string "ssemov")))
208 (set (attr "prefix_rex")
209 (if_then_else (eq_attr "alternative" "9,10,15,16")
210 (const_string "1")
211 (const_string "*")))
212 (set (attr "prefix")
213 (if_then_else (eq_attr "type" "sselog1,ssemov")
214 (const_string "maybe_vex")
215 (const_string "orig")))
216 (set (attr "prefix_data16")
217 (if_then_else
218 (and (eq_attr "type" "ssemov") (eq_attr "mode" "DI"))
219 (const_string "1")
220 (const_string "*")))
221 (set (attr "mode")
222 (cond [(eq_attr "alternative" "2")
223 (const_string "SI")
224 (eq_attr "alternative" "11,12")
225 (cond [(match_test "<MODE>mode == V2SFmode
226 || <MODE>mode == V4HFmode
227 || <MODE>mode == V4BFmode")
228 (const_string "V4SF")
229 (ior (not (match_test "TARGET_SSE2"))
230 (match_test "optimize_function_for_size_p (cfun)"))
231 (const_string "V4SF")
232 ]
233 (const_string "TI"))
234
235 (and (eq_attr "alternative" "13")
236 (ior (ior (and (match_test "<MODE>mode == V2SFmode")
237 (not (match_test "TARGET_MMX_WITH_SSE")))
238 (not (match_test "TARGET_SSE2")))
239 (match_test "<MODE>mode == V4HFmode
240 || <MODE>mode == V4BFmode")))
241 (const_string "V2SF")
242
243 (and (eq_attr "alternative" "14")
244 (ior (ior (match_test "<MODE>mode == V2SFmode")
245 (not (match_test "TARGET_SSE2")))
246 (match_test "<MODE>mode == V4HFmode
247 || <MODE>mode == V4BFmode")))
248 (const_string "V2SF")
249 ]
250 (const_string "DI")))
251 (set (attr "preferred_for_speed")
252 (cond [(eq_attr "alternative" "9,15")
253 (symbol_ref "TARGET_INTER_UNIT_MOVES_FROM_VEC")
254 (eq_attr "alternative" "10,16")
255 (symbol_ref "TARGET_INTER_UNIT_MOVES_TO_VEC")
256 ]
257 (symbol_ref "true")))])
258
259 (define_split
260 [(set (match_operand:MMXMODE 0 "nonimmediate_gr_operand")
261 (match_operand:MMXMODE 1 "nonimmediate_gr_operand"))]
262 "!TARGET_64BIT && reload_completed"
263 [(const_int 0)]
264 "ix86_split_long_move (operands); DONE;")
265
266 (define_split
267 [(set (match_operand:MMXMODE 0 "nonimmediate_gr_operand")
268 (match_operand:MMXMODE 1 "const0_operand"))]
269 "!TARGET_64BIT && reload_completed"
270 [(const_int 0)]
271 "ix86_split_long_move (operands); DONE;")
272
273 (define_expand "movmisalign<mode>"
274 [(set (match_operand:MMXMODE 0 "nonimmediate_operand")
275 (match_operand:MMXMODE 1 "nonimmediate_operand"))]
276 "TARGET_MMX || TARGET_MMX_WITH_SSE"
277 {
278 ix86_expand_vector_move (<MODE>mode, operands);
279 DONE;
280 })
281
282 (define_expand "mov<mode>"
283 [(set (match_operand:V_32 0 "nonimmediate_operand")
284 (match_operand:V_32 1 "nonimmediate_operand"))]
285 ""
286 {
287 ix86_expand_vector_move (<MODE>mode, operands);
288 DONE;
289 })
290
291 (define_insn "*mov<mode>_internal"
292 [(set (match_operand:V_32 0 "nonimmediate_operand"
293 "=r ,m ,v,v,v,m,r,v")
294 (match_operand:V_32 1 "general_operand"
295 "rmC,rC,C,v,m,v,v,r"))]
296 "!(MEM_P (operands[0]) && MEM_P (operands[1]))
297 && ix86_hardreg_mov_ok (operands[0], operands[1])"
298 {
299 switch (get_attr_type (insn))
300 {
301 case TYPE_IMOV:
302 return "mov{l}\t{%1, %0|%0, %1}";
303
304 case TYPE_SSELOG1:
305 return standard_sse_constant_opcode (insn, operands);
306
307 case TYPE_SSEMOV:
308 return ix86_output_ssemov (insn, operands);
309
310 default:
311 gcc_unreachable ();
312 }
313 }
314 [(set (attr "isa")
315 (cond [(eq_attr "alternative" "6,7")
316 (const_string "sse2")
317 ]
318 (const_string "*")))
319 (set (attr "type")
320 (cond [(eq_attr "alternative" "2")
321 (const_string "sselog1")
322 (eq_attr "alternative" "3,4,5,6,7")
323 (const_string "ssemov")
324 ]
325 (const_string "imov")))
326 (set (attr "prefix")
327 (if_then_else (eq_attr "type" "sselog1,ssemov")
328 (const_string "maybe_vex")
329 (const_string "orig")))
330 (set (attr "prefix_data16")
331 (if_then_else (and (eq_attr "type" "ssemov") (eq_attr "mode" "SI"))
332 (const_string "1")
333 (const_string "*")))
334 (set (attr "mode")
335 (cond [(eq_attr "alternative" "2,3")
336 (cond [(match_test "<MODE>mode == V2HFmode
337 || <MODE>mode == V2BFmode")
338 (const_string "V4SF")
339 (match_test "TARGET_AVX")
340 (const_string "TI")
341 (ior (not (match_test "TARGET_SSE2"))
342 (match_test "optimize_function_for_size_p (cfun)"))
343 (const_string "V4SF")
344 ]
345 (const_string "TI"))
346
347 (and (eq_attr "alternative" "4,5")
348 (ior (match_test "<MODE>mode == V2HFmode
349 || <MODE>mode == V2BFmode")
350 (not (match_test "TARGET_SSE2"))))
351 (const_string "SF")
352 ]
353 (const_string "SI")))
354 (set (attr "preferred_for_speed")
355 (cond [(eq_attr "alternative" "6")
356 (symbol_ref "TARGET_INTER_UNIT_MOVES_FROM_VEC")
357 (eq_attr "alternative" "7")
358 (symbol_ref "TARGET_INTER_UNIT_MOVES_TO_VEC")
359 ]
360 (symbol_ref "true")))])
361
362 ;; 16-bit, 32-bit and 64-bit constant vector stores. After reload,
363 ;; convert them to immediate integer stores.
364 (define_insn_and_split "*mov<mode>_imm"
365 [(set (match_operand:V_16_32_64 0 "memory_operand" "=m")
366 (match_operand:V_16_32_64 1 "x86_64_const_vector_operand" "i"))]
367 ""
368 "#"
369 "&& reload_completed"
370 [(set (match_dup 0) (match_dup 1))]
371 {
372 HOST_WIDE_INT val = ix86_convert_const_vector_to_integer (operands[1],
373 <MODE>mode);
374 operands[1] = GEN_INT (val);
375 operands[0] = lowpart_subreg (<mmxinsnmode>mode, operands[0], <MODE>mode);
376 })
377
378 ;; For TARGET_64BIT we always round up to 8 bytes.
379 (define_insn "*push<mode>2_rex64"
380 [(set (match_operand:V_32 0 "push_operand" "=X,X")
381 (match_operand:V_32 1 "nonmemory_no_elim_operand" "rC,*v"))]
382 "TARGET_64BIT"
383 "@
384 push{q}\t%q1
385 #"
386 [(set_attr "type" "push,multi")
387 (set_attr "mode" "DI")])
388
389 (define_split
390 [(set (match_operand:V_32 0 "push_operand")
391 (match_operand:V_32 1 "sse_reg_operand"))]
392 "TARGET_64BIT && TARGET_SSE && reload_completed"
393 [(set (reg:P SP_REG) (plus:P (reg:P SP_REG) (match_dup 2)))
394 (set (match_dup 0) (match_dup 1))]
395 {
396 operands[2] = GEN_INT (-PUSH_ROUNDING (GET_MODE_SIZE (<V_32:MODE>mode)));
397 /* Preserve memory attributes. */
398 operands[0] = replace_equiv_address (operands[0], stack_pointer_rtx);
399 })
400
401 (define_expand "movmisalign<mode>"
402 [(set (match_operand:V_32 0 "nonimmediate_operand")
403 (match_operand:V_32 1 "nonimmediate_operand"))]
404 ""
405 {
406 ix86_expand_vector_move (<MODE>mode, operands);
407 DONE;
408 })
409
410 (define_expand "movv2qi"
411 [(set (match_operand:V2QI 0 "nonimmediate_operand")
412 (match_operand:V2QI 1 "nonimmediate_operand"))]
413 ""
414 {
415 ix86_expand_vector_move (V2QImode, operands);
416 DONE;
417 })
418
419 (define_insn "*movv2qi_internal"
420 [(set (match_operand:V2QI 0 "nonimmediate_operand"
421 "=r,r,r,m ,v,v,v,m,r,v")
422 (match_operand:V2QI 1 "general_operand"
423 "r ,C,m,rC,C,v,m,v,v,r"))]
424 "!(MEM_P (operands[0]) && MEM_P (operands[1]))"
425 {
426 switch (get_attr_type (insn))
427 {
428 case TYPE_IMOV:
429 if (get_attr_mode (insn) == MODE_SI)
430 return "mov{l}\t{%k1, %k0|%k0, %k1}";
431 else
432 return "mov{w}\t{%1, %0|%0, %1}";
433
434 case TYPE_IMOVX:
435 /* movzwl is faster than movw on p2 due to partial word stalls,
436 though not as fast as an aligned movl. */
437 return "movz{wl|x}\t{%1, %k0|%k0, %1}";
438
439 case TYPE_SSELOG1:
440 if (satisfies_constraint_C (operands[1]))
441 return standard_sse_constant_opcode (insn, operands);
442
443 if (SSE_REG_P (operands[0]))
444 return "%vpinsrw\t{$0, %1, %d0|%d0, %1, 0}";
445 else
446 return "%vpextrw\t{$0, %1, %0|%0, %1, 0}";
447
448 case TYPE_SSEMOV:
449 return ix86_output_ssemov (insn, operands);
450
451 default:
452 gcc_unreachable ();
453 }
454 }
455 [(set (attr "isa")
456 (cond [(eq_attr "alternative" "6,8,9")
457 (const_string "sse2")
458 (eq_attr "alternative" "7")
459 (const_string "sse4")
460 ]
461 (const_string "*")))
462 (set (attr "type")
463 (cond [(eq_attr "alternative" "6,7")
464 (if_then_else (match_test "TARGET_AVX512FP16")
465 (const_string "ssemov")
466 (const_string "sselog1"))
467 (eq_attr "alternative" "4")
468 (const_string "sselog1")
469 (eq_attr "alternative" "5,8,9")
470 (const_string "ssemov")
471 (match_test "optimize_function_for_size_p (cfun)")
472 (const_string "imov")
473 (and (eq_attr "alternative" "0")
474 (ior (not (match_test "TARGET_PARTIAL_REG_STALL"))
475 (not (match_test "TARGET_HIMODE_MATH"))))
476 (const_string "imov")
477 (and (eq_attr "alternative" "1,2")
478 (match_operand:V2QI 1 "aligned_operand"))
479 (const_string "imov")
480 (and (match_test "TARGET_MOVX")
481 (eq_attr "alternative" "0,2"))
482 (const_string "imovx")
483 ]
484 (const_string "imov")))
485 (set (attr "prefix")
486 (cond [(eq_attr "alternative" "4,5,6,7,8,9")
487 (const_string "maybe_evex")
488 ]
489 (const_string "orig")))
490 (set (attr "mode")
491 (cond [(eq_attr "alternative" "6,7")
492 (if_then_else (match_test "TARGET_AVX512FP16")
493 (const_string "HI")
494 (const_string "TI"))
495 (eq_attr "alternative" "8,9")
496 (if_then_else (match_test "TARGET_AVX512FP16")
497 (const_string "HI")
498 (const_string "SI"))
499 (eq_attr "alternative" "4")
500 (cond [(match_test "TARGET_AVX")
501 (const_string "TI")
502 (ior (not (match_test "TARGET_SSE2"))
503 (match_test "optimize_function_for_size_p (cfun)"))
504 (const_string "V4SF")
505 ]
506 (const_string "TI"))
507 (eq_attr "alternative" "5")
508 (cond [(match_test "TARGET_AVX512FP16")
509 (const_string "HF")
510 (match_test "TARGET_AVX")
511 (const_string "TI")
512 (ior (not (match_test "TARGET_SSE2"))
513 (match_test "optimize_function_for_size_p (cfun)"))
514 (const_string "V4SF")
515 ]
516 (const_string "TI"))
517 (eq_attr "type" "imovx")
518 (const_string "SI")
519 (and (eq_attr "alternative" "1,2")
520 (match_operand:V2QI 1 "aligned_operand"))
521 (const_string "SI")
522 (and (eq_attr "alternative" "0")
523 (ior (not (match_test "TARGET_PARTIAL_REG_STALL"))
524 (not (match_test "TARGET_HIMODE_MATH"))))
525 (const_string "SI")
526 ]
527 (const_string "HI")))
528 (set (attr "preferred_for_speed")
529 (cond [(eq_attr "alternative" "8")
530 (symbol_ref "TARGET_INTER_UNIT_MOVES_FROM_VEC")
531 (eq_attr "alternative" "9")
532 (symbol_ref "TARGET_INTER_UNIT_MOVES_TO_VEC")
533 ]
534 (symbol_ref "true")))])
535
536 ;; We always round up to UNITS_PER_WORD bytes.
537 (define_insn "*pushv2qi2"
538 [(set (match_operand:V2QI 0 "push_operand" "=X,X")
539 (match_operand:V2QI 1 "nonmemory_no_elim_operand" "rC,v"))]
540 ""
541 "* return TARGET_64BIT ? \"push{q}\t%q1\" : \"push{l}\t%k1\";
542 #"
543 [(set_attr "isa" "*,sse4")
544 (set_attr "type" "push,multi")
545 (set (attr "mode")
546 (cond [(eq_attr "alternative" "0")
547 (if_then_else (match_test "TARGET_64BIT")
548 (const_string "DI")
549 (const_string "SI"))
550 (eq_attr "alternative" "1")
551 (if_then_else (match_test "TARGET_AVX512FP16")
552 (const_string "HI")
553 (const_string "TI"))
554 ]
555 (const_string "HI")))])
556
557 (define_split
558 [(set (match_operand:V2QI 0 "push_operand")
559 (match_operand:V2QI 1 "sse_reg_operand"))]
560 "TARGET_SSE4_1 && reload_completed"
561 [(set (reg:P SP_REG) (plus:P (reg:P SP_REG) (match_dup 2)))
562 (set (match_dup 0) (match_dup 1))]
563 {
564 operands[2] = GEN_INT (-PUSH_ROUNDING (GET_MODE_SIZE (V2QImode)));
565 /* Preserve memory attributes. */
566 operands[0] = replace_equiv_address (operands[0], stack_pointer_rtx);
567 })
568
569 (define_expand "movmisalignv2qi"
570 [(set (match_operand:V2QI 0 "nonimmediate_operand")
571 (match_operand:V2QI 1 "nonimmediate_operand"))]
572 ""
573 {
574 ix86_expand_vector_move (V2QImode, operands);
575 DONE;
576 })
577
578 (define_insn "sse_movntq"
579 [(set (match_operand:DI 0 "memory_operand" "=m,m")
580 (unspec:DI [(match_operand:DI 1 "register_operand" "y,r")]
581 UNSPEC_MOVNTQ))]
582 "(TARGET_MMX || TARGET_MMX_WITH_SSE)
583 && (TARGET_SSE || TARGET_3DNOW_A)"
584 "@
585 movntq\t{%1, %0|%0, %1}
586 movnti\t{%1, %0|%0, %1}"
587 [(set_attr "isa" "*,x64")
588 (set_attr "mmx_isa" "native,*")
589 (set_attr "type" "mmxmov,ssemov")
590 (set_attr "mode" "DI")])
591
592 (define_expand "movq_<mode>_to_sse"
593 [(set (match_operand:<mmxdoublevecmode> 0 "register_operand")
594 (vec_concat:<mmxdoublevecmode>
595 (match_operand:V2FI_V4HF 1 "nonimmediate_operand")
596 (match_dup 2)))]
597 "TARGET_SSE2"
598 {
599 if (<MODE>mode == V2SFmode
600 && !flag_trapping_math)
601 {
602 rtx op1 = force_reg (<MODE>mode, operands[1]);
603 emit_move_insn (operands[0], lowpart_subreg (<mmxdoublevecmode>mode,
604 op1, <MODE>mode));
605 DONE;
606 }
607
608 operands[2] = CONST0_RTX (<MODE>mode);
609 })
610
611 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
612 ;;
613 ;; Parallel single-precision floating point arithmetic
614 ;;
615 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
616
617 (define_expand "<code>v2sf2"
618 [(set (match_operand:V2SF 0 "register_operand")
619 (absneg:V2SF
620 (match_operand:V2SF 1 "register_operand")))]
621 "TARGET_MMX_WITH_SSE"
622 "ix86_expand_fp_absneg_operator (<CODE>, V2SFmode, operands); DONE;")
623
624 (define_insn_and_split "*mmx_<code>v2sf2"
625 [(set (match_operand:V2SF 0 "register_operand" "=x,x,x")
626 (absneg:V2SF
627 (match_operand:V2SF 1 "register_operand" "0,x,x")))
628 (use (match_operand:V2SF 2 "nonimmediate_operand" "x,0,x"))]
629 "TARGET_MMX_WITH_SSE"
630 "#"
631 "&& reload_completed"
632 [(set (match_dup 0)
633 (<absneg_op>:V2SF (match_dup 1) (match_dup 2)))]
634 {
635 if (!TARGET_AVX && operands_match_p (operands[0], operands[2]))
636 std::swap (operands[1], operands[2]);
637 }
638 [(set_attr "isa" "noavx,noavx,avx")])
639
640 (define_insn_and_split "*mmx_nabsv2sf2"
641 [(set (match_operand:V2SF 0 "register_operand" "=x,x,x")
642 (neg:V2SF
643 (abs:V2SF
644 (match_operand:V2SF 1 "register_operand" "0,x,x"))))
645 (use (match_operand:V2SF 2 "nonimmediate_operand" "x,0,x"))]
646 "TARGET_MMX_WITH_SSE"
647 "#"
648 "&& reload_completed"
649 [(set (match_dup 0)
650 (ior:V2SF (match_dup 1) (match_dup 2)))]
651 {
652 if (!TARGET_AVX && operands_match_p (operands[0], operands[2]))
653 std::swap (operands[1], operands[2]);
654 }
655 [(set_attr "isa" "noavx,noavx,avx")])
656
657 (define_expand "<insn>v2sf3"
658 [(set (match_operand:V2SF 0 "register_operand")
659 (plusminusmult:V2SF
660 (match_operand:V2SF 1 "nonimmediate_operand")
661 (match_operand:V2SF 2 "nonimmediate_operand")))]
662 "TARGET_MMX_WITH_SSE && ix86_partial_vec_fp_math"
663 {
664 rtx op2 = gen_reg_rtx (V4SFmode);
665 rtx op1 = gen_reg_rtx (V4SFmode);
666 rtx op0 = gen_reg_rtx (V4SFmode);
667
668 emit_insn (gen_movq_v2sf_to_sse (op2, operands[2]));
669 emit_insn (gen_movq_v2sf_to_sse (op1, operands[1]));
670
671 emit_insn (gen_<insn>v4sf3 (op0, op1, op2));
672
673 emit_move_insn (operands[0], lowpart_subreg (V2SFmode, op0, V4SFmode));
674 DONE;
675 })
676
677 (define_expand "mmx_addv2sf3"
678 [(set (match_operand:V2SF 0 "register_operand")
679 (plus:V2SF
680 (match_operand:V2SF 1 "nonimmediate_operand")
681 (match_operand:V2SF 2 "nonimmediate_operand")))]
682 "TARGET_3DNOW"
683 "ix86_fixup_binary_operands_no_copy (PLUS, V2SFmode, operands);")
684
685 (define_insn "*mmx_addv2sf3"
686 [(set (match_operand:V2SF 0 "register_operand" "=y")
687 (plus:V2SF (match_operand:V2SF 1 "nonimmediate_operand" "%0")
688 (match_operand:V2SF 2 "nonimmediate_operand" "ym")))]
689 "TARGET_3DNOW && ix86_binary_operator_ok (PLUS, V2SFmode, operands)"
690 "pfadd\t{%2, %0|%0, %2}"
691 [(set_attr "type" "mmxadd")
692 (set_attr "prefix_extra" "1")
693 (set_attr "mode" "V2SF")])
694
695 (define_expand "mmx_subv2sf3"
696 [(set (match_operand:V2SF 0 "register_operand")
697 (minus:V2SF (match_operand:V2SF 1 "register_operand")
698 (match_operand:V2SF 2 "nonimmediate_operand")))]
699 "TARGET_3DNOW")
700
701 (define_expand "mmx_subrv2sf3"
702 [(set (match_operand:V2SF 0 "register_operand")
703 (minus:V2SF (match_operand:V2SF 2 "register_operand")
704 (match_operand:V2SF 1 "nonimmediate_operand")))]
705 "TARGET_3DNOW")
706
707 (define_insn "*mmx_subv2sf3"
708 [(set (match_operand:V2SF 0 "register_operand" "=y,y")
709 (minus:V2SF (match_operand:V2SF 1 "nonimmediate_operand" "0,ym")
710 (match_operand:V2SF 2 "nonimmediate_operand" "ym,0")))]
711 "TARGET_3DNOW && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
712 "@
713 pfsub\t{%2, %0|%0, %2}
714 pfsubr\t{%1, %0|%0, %1}"
715 [(set_attr "type" "mmxadd")
716 (set_attr "prefix_extra" "1")
717 (set_attr "mode" "V2SF")])
718
719 (define_expand "mmx_mulv2sf3"
720 [(set (match_operand:V2SF 0 "register_operand")
721 (mult:V2SF (match_operand:V2SF 1 "nonimmediate_operand")
722 (match_operand:V2SF 2 "nonimmediate_operand")))]
723 "TARGET_3DNOW"
724 "ix86_fixup_binary_operands_no_copy (MULT, V2SFmode, operands);")
725
726 (define_insn "*mmx_mulv2sf3"
727 [(set (match_operand:V2SF 0 "register_operand" "=y")
728 (mult:V2SF (match_operand:V2SF 1 "nonimmediate_operand" "%0")
729 (match_operand:V2SF 2 "nonimmediate_operand" "ym")))]
730 "TARGET_3DNOW && ix86_binary_operator_ok (MULT, V2SFmode, operands)"
731 "pfmul\t{%2, %0|%0, %2}"
732 [(set_attr "type" "mmxmul")
733 (set_attr "prefix_extra" "1")
734 (set_attr "mode" "V2SF")])
735
736 (define_expand "divv2sf3"
737 [(set (match_operand:V2SF 0 "register_operand")
738 (div:V2SF (match_operand:V2SF 1 "register_operand")
739 (match_operand:V2SF 2 "register_operand")))]
740 "TARGET_MMX_WITH_SSE && ix86_partial_vec_fp_math"
741 {
742 rtx op2 = gen_reg_rtx (V4SFmode);
743 rtx op1 = gen_reg_rtx (V4SFmode);
744 rtx op0 = gen_reg_rtx (V4SFmode);
745
746 rtx tmp = gen_rtx_VEC_CONCAT (V4SFmode, operands[2],
747 force_reg (V2SFmode, CONST1_RTX (V2SFmode)));
748 emit_insn (gen_rtx_SET (op2, tmp));
749 emit_insn (gen_movq_v2sf_to_sse (op1, operands[1]));
750
751 emit_insn (gen_divv4sf3 (op0, op1, op2));
752
753 emit_move_insn (operands[0], lowpart_subreg (V2SFmode, op0, V4SFmode));
754 DONE;
755 })
756
757 (define_expand "<code>v2sf3"
758 [(set (match_operand:V2SF 0 "register_operand")
759 (smaxmin:V2SF
760 (match_operand:V2SF 1 "register_operand")
761 (match_operand:V2SF 2 "register_operand")))]
762 "TARGET_MMX_WITH_SSE && ix86_partial_vec_fp_math"
763 {
764 rtx op2 = gen_reg_rtx (V4SFmode);
765 rtx op1 = gen_reg_rtx (V4SFmode);
766 rtx op0 = gen_reg_rtx (V4SFmode);
767
768 emit_insn (gen_movq_v2sf_to_sse (op2, operands[2]));
769 emit_insn (gen_movq_v2sf_to_sse (op1, operands[1]));
770
771 emit_insn (gen_<code>v4sf3 (op0, op1, op2));
772
773 emit_move_insn (operands[0], lowpart_subreg (V2SFmode, op0, V4SFmode));
774 DONE;
775 })
776
777 (define_expand "mmx_<code>v2sf3"
778 [(set (match_operand:V2SF 0 "register_operand")
779 (smaxmin:V2SF
780 (match_operand:V2SF 1 "nonimmediate_operand")
781 (match_operand:V2SF 2 "nonimmediate_operand")))]
782 "TARGET_3DNOW"
783 {
784 if (!flag_finite_math_only || flag_signed_zeros)
785 {
786 operands[1] = force_reg (V2SFmode, operands[1]);
787 emit_insn (gen_mmx_ieee_<maxmin_float>v2sf3
788 (operands[0], operands[1], operands[2]));
789 DONE;
790 }
791 else
792 ix86_fixup_binary_operands_no_copy (<CODE>, V2SFmode, operands);
793 })
794
795 ;; These versions of the min/max patterns are intentionally ignorant of
796 ;; their behavior wrt -0.0 and NaN (via the commutative operand mark).
797 ;; Since both the tree-level MAX_EXPR and the rtl-level SMAX operator
798 ;; are undefined in this condition, we're certain this is correct.
799
800 (define_insn "*mmx_<code>v2sf3"
801 [(set (match_operand:V2SF 0 "register_operand" "=y")
802 (smaxmin:V2SF
803 (match_operand:V2SF 1 "nonimmediate_operand" "%0")
804 (match_operand:V2SF 2 "nonimmediate_operand" "ym")))]
805 "TARGET_3DNOW && ix86_binary_operator_ok (<CODE>, V2SFmode, operands)"
806 "pf<maxmin_float>\t{%2, %0|%0, %2}"
807 [(set_attr "type" "mmxadd")
808 (set_attr "prefix_extra" "1")
809 (set_attr "mode" "V2SF")])
810
811 ;; These versions of the min/max patterns implement exactly the operations
812 ;; min = (op1 < op2 ? op1 : op2)
813 ;; max = (!(op1 < op2) ? op1 : op2)
814 ;; Their operands are not commutative, and thus they may be used in the
815 ;; presence of -0.0 and NaN.
816
817 (define_insn "mmx_ieee_<ieee_maxmin>v2sf3"
818 [(set (match_operand:V2SF 0 "register_operand" "=y")
819 (unspec:V2SF
820 [(match_operand:V2SF 1 "register_operand" "0")
821 (match_operand:V2SF 2 "nonimmediate_operand" "ym")]
822 IEEE_MAXMIN))]
823 "TARGET_3DNOW"
824 "pf<ieee_maxmin>\t{%2, %0|%0, %2}"
825 [(set_attr "type" "mmxadd")
826 (set_attr "prefix_extra" "1")
827 (set_attr "mode" "V2SF")])
828
829 (define_insn "mmx_rcpv2sf2"
830 [(set (match_operand:V2SF 0 "register_operand" "=y")
831 (unspec:V2SF [(match_operand:V2SF 1 "nonimmediate_operand" "ym")]
832 UNSPEC_PFRCP))]
833 "TARGET_3DNOW"
834 "pfrcp\t{%1, %0|%0, %1}"
835 [(set_attr "type" "mmx")
836 (set_attr "prefix_extra" "1")
837 (set_attr "mode" "V2SF")])
838
839 (define_insn "mmx_rcpit1v2sf3"
840 [(set (match_operand:V2SF 0 "register_operand" "=y")
841 (unspec:V2SF [(match_operand:V2SF 1 "register_operand" "0")
842 (match_operand:V2SF 2 "nonimmediate_operand" "ym")]
843 UNSPEC_PFRCPIT1))]
844 "TARGET_3DNOW"
845 "pfrcpit1\t{%2, %0|%0, %2}"
846 [(set_attr "type" "mmx")
847 (set_attr "prefix_extra" "1")
848 (set_attr "mode" "V2SF")])
849
850 (define_insn "mmx_rcpit2v2sf3"
851 [(set (match_operand:V2SF 0 "register_operand" "=y")
852 (unspec:V2SF [(match_operand:V2SF 1 "register_operand" "0")
853 (match_operand:V2SF 2 "nonimmediate_operand" "ym")]
854 UNSPEC_PFRCPIT2))]
855 "TARGET_3DNOW"
856 "pfrcpit2\t{%2, %0|%0, %2}"
857 [(set_attr "type" "mmx")
858 (set_attr "prefix_extra" "1")
859 (set_attr "mode" "V2SF")])
860
861 (define_expand "sqrtv2sf2"
862 [(set (match_operand:V2SF 0 "register_operand")
863 (sqrt:V2SF (match_operand:V2SF 1 "nonimmediate_operand")))]
864 "TARGET_MMX_WITH_SSE && ix86_partial_vec_fp_math"
865 {
866 rtx op1 = gen_reg_rtx (V4SFmode);
867 rtx op0 = gen_reg_rtx (V4SFmode);
868
869 emit_insn (gen_movq_v2sf_to_sse (op1, operands[1]));
870
871 emit_insn (gen_sqrtv4sf2 (op0, op1));
872
873 emit_move_insn (operands[0], lowpart_subreg (V2SFmode, op0, V4SFmode));
874 DONE;
875 })
876
877 (define_insn "mmx_rsqrtv2sf2"
878 [(set (match_operand:V2SF 0 "register_operand" "=y")
879 (unspec:V2SF [(match_operand:V2SF 1 "nonimmediate_operand" "ym")]
880 UNSPEC_PFRSQRT))]
881 "TARGET_3DNOW"
882 "pfrsqrt\t{%1, %0|%0, %1}"
883 [(set_attr "type" "mmx")
884 (set_attr "prefix_extra" "1")
885 (set_attr "mode" "V2SF")])
886
887 (define_insn "mmx_rsqit1v2sf3"
888 [(set (match_operand:V2SF 0 "register_operand" "=y")
889 (unspec:V2SF [(match_operand:V2SF 1 "register_operand" "0")
890 (match_operand:V2SF 2 "nonimmediate_operand" "ym")]
891 UNSPEC_PFRSQIT1))]
892 "TARGET_3DNOW"
893 "pfrsqit1\t{%2, %0|%0, %2}"
894 [(set_attr "type" "mmx")
895 (set_attr "prefix_extra" "1")
896 (set_attr "mode" "V2SF")])
897
898 (define_expand "mmx_haddv2sf3"
899 [(set (match_operand:V2SF 0 "register_operand")
900 (vec_concat:V2SF
901 (plus:SF
902 (vec_select:SF
903 (match_operand:V2SF 1 "register_operand")
904 (parallel [(const_int 0)]))
905 (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
906 (plus:SF
907 (vec_select:SF
908 (match_operand:V2SF 2 "nonimmediate_operand")
909 (parallel [(const_int 0)]))
910 (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))))]
911 "TARGET_3DNOW")
912
913 (define_insn "*mmx_haddv2sf3"
914 [(set (match_operand:V2SF 0 "register_operand" "=y")
915 (vec_concat:V2SF
916 (plus:SF
917 (vec_select:SF
918 (match_operand:V2SF 1 "register_operand" "0")
919 (parallel [(match_operand:SI 3 "const_0_to_1_operand")]))
920 (vec_select:SF (match_dup 1)
921 (parallel [(match_operand:SI 4 "const_0_to_1_operand")])))
922 (plus:SF
923 (vec_select:SF
924 (match_operand:V2SF 2 "nonimmediate_operand" "ym")
925 (parallel [(match_operand:SI 5 "const_0_to_1_operand")]))
926 (vec_select:SF (match_dup 2)
927 (parallel [(match_operand:SI 6 "const_0_to_1_operand")])))))]
928 "TARGET_3DNOW
929 && INTVAL (operands[3]) != INTVAL (operands[4])
930 && INTVAL (operands[5]) != INTVAL (operands[6])"
931 "pfacc\t{%2, %0|%0, %2}"
932 [(set_attr "type" "mmxadd")
933 (set_attr "prefix_extra" "1")
934 (set_attr "mode" "V2SF")])
935
936 (define_insn_and_split "*mmx_haddv2sf3_low"
937 [(set (match_operand:SF 0 "register_operand")
938 (plus:SF
939 (vec_select:SF
940 (match_operand:V2SF 1 "nonimmediate_operand")
941 (parallel [(match_operand:SI 2 "const_0_to_1_operand")]))
942 (vec_select:SF
943 (match_dup 1)
944 (parallel [(match_operand:SI 3 "const_0_to_1_operand")]))))]
945 "TARGET_SSE3 && TARGET_MMX_WITH_SSE && ix86_partial_vec_fp_math
946 && INTVAL (operands[2]) != INTVAL (operands[3])
947 && ix86_pre_reload_split ()"
948 "#"
949 "&& 1"
950 [(const_int 0)]
951 {
952 rtx op1 = gen_reg_rtx (V4SFmode);
953 rtx op0 = gen_reg_rtx (V4SFmode);
954
955 emit_insn (gen_movq_v2sf_to_sse (op1, operands[1]));
956
957 emit_insn (gen_sse3_haddv4sf3 (op0, op1, op1));
958
959 emit_move_insn (operands[0], lowpart_subreg (SFmode, op0, V4SFmode));
960 DONE;
961 })
962
963 (define_insn "mmx_hsubv2sf3"
964 [(set (match_operand:V2SF 0 "register_operand" "=y")
965 (vec_concat:V2SF
966 (minus:SF
967 (vec_select:SF
968 (match_operand:V2SF 1 "register_operand" "0")
969 (parallel [(const_int 0)]))
970 (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
971 (minus:SF
972 (vec_select:SF
973 (match_operand:V2SF 2 "nonimmediate_operand" "ym")
974 (parallel [(const_int 0)]))
975 (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))))]
976 "TARGET_3DNOW_A"
977 "pfnacc\t{%2, %0|%0, %2}"
978 [(set_attr "type" "mmxadd")
979 (set_attr "prefix_extra" "1")
980 (set_attr "mode" "V2SF")])
981
982 (define_insn_and_split "*mmx_hsubv2sf3_low"
983 [(set (match_operand:SF 0 "register_operand")
984 (minus:SF
985 (vec_select:SF
986 (match_operand:V2SF 1 "register_operand")
987 (parallel [(const_int 0)]))
988 (vec_select:SF
989 (match_dup 1)
990 (parallel [(const_int 1)]))))]
991 "TARGET_SSE3 && TARGET_MMX_WITH_SSE && ix86_partial_vec_fp_math
992 && ix86_pre_reload_split ()"
993 "#"
994 "&& 1"
995 [(const_int 0)]
996 {
997 rtx op1 = gen_reg_rtx (V4SFmode);
998 rtx op0 = gen_reg_rtx (V4SFmode);
999
1000 emit_insn (gen_movq_v2sf_to_sse (op1, operands[1]));
1001
1002 emit_insn (gen_sse3_hsubv4sf3 (op0, op1, op1));
1003
1004 emit_move_insn (operands[0], lowpart_subreg (SFmode, op0, V4SFmode));
1005 DONE;
1006 })
1007
1008 (define_expand "mmx_haddsubv2sf3"
1009 [(set (match_operand:V2SF 0 "register_operand")
1010 (vec_concat:V2SF
1011 (minus:SF
1012 (vec_select:SF
1013 (match_operand:V2SF 1 "register_operand")
1014 (parallel [(const_int 0)]))
1015 (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
1016 (plus:SF
1017 (vec_select:SF
1018 (match_operand:V2SF 2 "nonimmediate_operand")
1019 (parallel [(const_int 0)]))
1020 (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))))]
1021 "TARGET_3DNOW_A")
1022
1023 (define_insn "*mmx_haddsubv2sf3"
1024 [(set (match_operand:V2SF 0 "register_operand" "=y")
1025 (vec_concat:V2SF
1026 (minus:SF
1027 (vec_select:SF
1028 (match_operand:V2SF 1 "register_operand" "0")
1029 (parallel [(const_int 0)]))
1030 (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
1031 (plus:SF
1032 (vec_select:SF
1033 (match_operand:V2SF 2 "nonimmediate_operand" "ym")
1034 (parallel [(match_operand:SI 3 "const_0_to_1_operand")]))
1035 (vec_select:SF
1036 (match_dup 2)
1037 (parallel [(match_operand:SI 4 "const_0_to_1_operand")])))))]
1038 "TARGET_3DNOW_A
1039 && INTVAL (operands[3]) != INTVAL (operands[4])"
1040 "pfpnacc\t{%2, %0|%0, %2}"
1041 [(set_attr "type" "mmxadd")
1042 (set_attr "prefix_extra" "1")
1043 (set_attr "mode" "V2SF")])
1044
1045 (define_expand "vec_addsubv2sf3"
1046 [(set (match_operand:V2SF 0 "register_operand")
1047 (vec_merge:V2SF
1048 (minus:V2SF
1049 (match_operand:V2SF 1 "nonimmediate_operand")
1050 (match_operand:V2SF 2 "nonimmediate_operand"))
1051 (plus:V2SF (match_dup 1) (match_dup 2))
1052 (const_int 1)))]
1053 "TARGET_SSE3 && TARGET_MMX_WITH_SSE && ix86_partial_vec_fp_math"
1054 {
1055 rtx op2 = gen_reg_rtx (V4SFmode);
1056 rtx op1 = gen_reg_rtx (V4SFmode);
1057 rtx op0 = gen_reg_rtx (V4SFmode);
1058
1059 emit_insn (gen_movq_v2sf_to_sse (op2, operands[2]));
1060 emit_insn (gen_movq_v2sf_to_sse (op1, operands[1]));
1061
1062 emit_insn (gen_vec_addsubv4sf3 (op0, op1, op2));
1063
1064 emit_move_insn (operands[0], lowpart_subreg (V2SFmode, op0, V4SFmode));
1065 DONE;
1066 })
1067
1068 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1069 ;;
1070 ;; Parallel single-precision floating point comparisons
1071 ;;
1072 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1073
1074 (define_expand "mmx_eqv2sf3"
1075 [(set (match_operand:V2SI 0 "register_operand")
1076 (eq:V2SI (match_operand:V2SF 1 "nonimmediate_operand")
1077 (match_operand:V2SF 2 "nonimmediate_operand")))]
1078 "TARGET_3DNOW"
1079 "ix86_fixup_binary_operands_no_copy (EQ, V2SFmode, operands);")
1080
1081 (define_insn "*mmx_eqv2sf3"
1082 [(set (match_operand:V2SI 0 "register_operand" "=y")
1083 (eq:V2SI (match_operand:V2SF 1 "nonimmediate_operand" "%0")
1084 (match_operand:V2SF 2 "nonimmediate_operand" "ym")))]
1085 "TARGET_3DNOW && ix86_binary_operator_ok (EQ, V2SFmode, operands)"
1086 "pfcmpeq\t{%2, %0|%0, %2}"
1087 [(set_attr "type" "mmxcmp")
1088 (set_attr "prefix_extra" "1")
1089 (set_attr "mode" "V2SF")])
1090
1091 (define_insn "mmx_gtv2sf3"
1092 [(set (match_operand:V2SI 0 "register_operand" "=y")
1093 (gt:V2SI (match_operand:V2SF 1 "register_operand" "0")
1094 (match_operand:V2SF 2 "nonimmediate_operand" "ym")))]
1095 "TARGET_3DNOW"
1096 "pfcmpgt\t{%2, %0|%0, %2}"
1097 [(set_attr "type" "mmxcmp")
1098 (set_attr "prefix_extra" "1")
1099 (set_attr "mode" "V2SF")])
1100
1101 (define_insn "mmx_gev2sf3"
1102 [(set (match_operand:V2SI 0 "register_operand" "=y")
1103 (ge:V2SI (match_operand:V2SF 1 "register_operand" "0")
1104 (match_operand:V2SF 2 "nonimmediate_operand" "ym")))]
1105 "TARGET_3DNOW"
1106 "pfcmpge\t{%2, %0|%0, %2}"
1107 [(set_attr "type" "mmxcmp")
1108 (set_attr "prefix_extra" "1")
1109 (set_attr "mode" "V2SF")])
1110
1111 (define_expand "vec_cmpv2sfv2si"
1112 [(set (match_operand:V2SI 0 "register_operand")
1113 (match_operator:V2SI 1 ""
1114 [(match_operand:V2SF 2 "nonimmediate_operand")
1115 (match_operand:V2SF 3 "nonimmediate_operand")]))]
1116 "TARGET_MMX_WITH_SSE && ix86_partial_vec_fp_math"
1117 {
1118 rtx ops[4];
1119 ops[3] = gen_reg_rtx (V4SFmode);
1120 ops[2] = gen_reg_rtx (V4SFmode);
1121 ops[1] = gen_rtx_fmt_ee (GET_CODE (operands[1]), V4SImode, ops[2], ops[3]);
1122 ops[0] = gen_reg_rtx (V4SImode);
1123
1124 emit_insn (gen_movq_v2sf_to_sse (ops[3], operands[3]));
1125 emit_insn (gen_movq_v2sf_to_sse (ops[2], operands[2]));
1126
1127 bool ok = ix86_expand_fp_vec_cmp (ops);
1128 gcc_assert (ok);
1129
1130 emit_move_insn (operands[0], lowpart_subreg (V2SImode, ops[0], V4SImode));
1131 DONE;
1132 })
1133
1134 (define_expand "vcond<mode>v2sf"
1135 [(set (match_operand:V2FI 0 "register_operand")
1136 (if_then_else:V2FI
1137 (match_operator 3 ""
1138 [(match_operand:V2SF 4 "nonimmediate_operand")
1139 (match_operand:V2SF 5 "nonimmediate_operand")])
1140 (match_operand:V2FI 1 "general_operand")
1141 (match_operand:V2FI 2 "general_operand")))]
1142 "TARGET_MMX_WITH_SSE && ix86_partial_vec_fp_math"
1143 {
1144 rtx ops[6];
1145 ops[5] = gen_reg_rtx (V4SFmode);
1146 ops[4] = gen_reg_rtx (V4SFmode);
1147 ops[3] = gen_rtx_fmt_ee (GET_CODE (operands[3]), VOIDmode, ops[4], ops[5]);
1148 ops[2] = lowpart_subreg (<mmxdoublevecmode>mode,
1149 force_reg (<MODE>mode, operands[2]),
1150 <MODE>mode);
1151 ops[1] = lowpart_subreg (<mmxdoublevecmode>mode,
1152 force_reg (<MODE>mode, operands[1]),
1153 <MODE>mode);
1154 ops[0] = gen_reg_rtx (<mmxdoublevecmode>mode);
1155
1156 emit_insn (gen_movq_v2sf_to_sse (ops[5], operands[5]));
1157 emit_insn (gen_movq_v2sf_to_sse (ops[4], operands[4]));
1158
1159 bool ok = ix86_expand_fp_vcond (ops);
1160 gcc_assert (ok);
1161
1162 emit_move_insn (operands[0], lowpart_subreg (<MODE>mode, ops[0],
1163 <mmxdoublevecmode>mode));
1164 DONE;
1165 })
1166
1167 (define_insn "@sse4_1_insertps_<mode>"
1168 [(set (match_operand:V2FI 0 "register_operand" "=Yr,*x,v")
1169 (unspec:V2FI
1170 [(match_operand:V2FI 2 "nonimmediate_operand" "Yrm,*xm,vm")
1171 (match_operand:V2FI 1 "register_operand" "0,0,v")
1172 (match_operand:SI 3 "const_0_to_255_operand")]
1173 UNSPEC_INSERTPS))]
1174 "TARGET_SSE4_1 && TARGET_MMX_WITH_SSE"
1175 {
1176 if (MEM_P (operands[2]))
1177 {
1178 unsigned count_s = INTVAL (operands[3]) >> 6;
1179 if (count_s)
1180 operands[3] = GEN_INT (INTVAL (operands[3]) & 0x3f);
1181 operands[2] = adjust_address_nv (operands[2],
1182 <mmxscalarmode>mode, count_s * 4);
1183 }
1184 switch (which_alternative)
1185 {
1186 case 0:
1187 case 1:
1188 return "insertps\t{%3, %2, %0|%0, %2, %3}";
1189 case 2:
1190 return "vinsertps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
1191 default:
1192 gcc_unreachable ();
1193 }
1194 }
1195 [(set_attr "isa" "noavx,noavx,avx")
1196 (set_attr "type" "sselog")
1197 (set_attr "prefix_data16" "1,1,*")
1198 (set_attr "prefix_extra" "1")
1199 (set_attr "length_immediate" "1")
1200 (set_attr "prefix" "orig,orig,maybe_evex")
1201 (set_attr "mode" "V4SF")])
1202
1203 (define_insn "*mmx_blendps"
1204 [(set (match_operand:V2SF 0 "register_operand" "=Yr,*x,x")
1205 (vec_merge:V2SF
1206 (match_operand:V2SF 2 "register_operand" "Yr,*x,x")
1207 (match_operand:V2SF 1 "register_operand" "0,0,x")
1208 (match_operand:SI 3 "const_0_to_3_operand")))]
1209 "TARGET_SSE4_1 && TARGET_MMX_WITH_SSE"
1210 "@
1211 blendps\t{%3, %2, %0|%0, %2, %3}
1212 blendps\t{%3, %2, %0|%0, %2, %3}
1213 vblendps\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1214 [(set_attr "isa" "noavx,noavx,avx")
1215 (set_attr "type" "ssemov")
1216 (set_attr "length_immediate" "1")
1217 (set_attr "prefix_data16" "1,1,*")
1218 (set_attr "prefix_extra" "1")
1219 (set_attr "prefix" "orig,orig,vex")
1220 (set_attr "mode" "V4SF")])
1221
1222 (define_insn "mmx_blendvps"
1223 [(set (match_operand:V2SF 0 "register_operand" "=Yr,*x,x")
1224 (unspec:V2SF
1225 [(match_operand:V2SF 1 "register_operand" "0,0,x")
1226 (match_operand:V2SF 2 "register_operand" "Yr,*x,x")
1227 (match_operand:V2SF 3 "register_operand" "Yz,Yz,x")]
1228 UNSPEC_BLENDV))]
1229 "TARGET_SSE4_1 && TARGET_MMX_WITH_SSE"
1230 "@
1231 blendvps\t{%3, %2, %0|%0, %2, %3}
1232 blendvps\t{%3, %2, %0|%0, %2, %3}
1233 vblendvps\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1234 [(set_attr "isa" "noavx,noavx,avx")
1235 (set_attr "type" "ssemov")
1236 (set_attr "length_immediate" "1")
1237 (set_attr "prefix_data16" "1,1,*")
1238 (set_attr "prefix_extra" "1")
1239 (set_attr "prefix" "orig,orig,vex")
1240 (set_attr "btver2_decode" "vector")
1241 (set_attr "mode" "V4SF")])
1242
1243 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1244 ;;
1245 ;; Parallel single-precision floating point logical operations
1246 ;;
1247 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1248
1249 (define_insn "*mmx_andnotv2sf3"
1250 [(set (match_operand:V2SF 0 "register_operand" "=x,x")
1251 (and:V2SF
1252 (not:V2SF
1253 (match_operand:V2SF 1 "register_operand" "0,x"))
1254 (match_operand:V2SF 2 "register_operand" "x,x")))]
1255 "TARGET_MMX_WITH_SSE"
1256 "@
1257 andnps\t{%2, %0|%0, %2}
1258 vandnps\t{%2, %1, %0|%0, %1, %2}"
1259 [(set_attr "isa" "noavx,avx")
1260 (set_attr "type" "sselog")
1261 (set_attr "prefix" "orig,vex")
1262 (set_attr "mode" "V4SF")])
1263
1264 (define_insn "<code>v2sf3"
1265 [(set (match_operand:V2SF 0 "register_operand" "=x,x")
1266 (any_logic:V2SF
1267 (match_operand:V2SF 1 "register_operand" "%0,x")
1268 (match_operand:V2SF 2 "register_operand" "x,x")))]
1269 "TARGET_MMX_WITH_SSE"
1270 "@
1271 <logic>ps\t{%2, %0|%0, %2}
1272 v<logic>ps\t{%2, %1, %0|%0, %1, %2}"
1273 [(set_attr "isa" "noavx,avx")
1274 (set_attr "type" "sselog")
1275 (set_attr "prefix" "orig,vex")
1276 (set_attr "mode" "V4SF")])
1277
1278 (define_expand "copysignv2sf3"
1279 [(set (match_dup 4)
1280 (and:V2SF
1281 (not:V2SF (match_dup 3))
1282 (match_operand:V2SF 1 "register_operand")))
1283 (set (match_dup 5)
1284 (and:V2SF (match_dup 3)
1285 (match_operand:V2SF 2 "register_operand")))
1286 (set (match_operand:V2SF 0 "register_operand")
1287 (ior:V2SF (match_dup 4) (match_dup 5)))]
1288 "TARGET_MMX_WITH_SSE"
1289 {
1290 operands[3] = ix86_build_signbit_mask (V2SFmode, true, false);
1291
1292 operands[4] = gen_reg_rtx (V2SFmode);
1293 operands[5] = gen_reg_rtx (V2SFmode);
1294 })
1295
1296 (define_expand "xorsignv2sf3"
1297 [(set (match_dup 4)
1298 (and:V2SF (match_dup 3)
1299 (match_operand:V2SF 2 "register_operand")))
1300 (set (match_operand:V2SF 0 "register_operand")
1301 (xor:V2SF (match_dup 4)
1302 (match_operand:V2SF 1 "register_operand")))]
1303 "TARGET_MMX_WITH_SSE"
1304 {
1305 operands[3] = ix86_build_signbit_mask (V2SFmode, true, false);
1306
1307 operands[4] = gen_reg_rtx (V2SFmode);
1308 })
1309
1310 (define_expand "signbitv2sf2"
1311 [(set (match_operand:V2SI 0 "register_operand")
1312 (lshiftrt:V2SI
1313 (subreg:V2SI
1314 (match_operand:V2SF 1 "register_operand") 0)
1315 (match_dup 2)))]
1316 "TARGET_MMX_WITH_SSE"
1317 "operands[2] = GEN_INT (GET_MODE_UNIT_BITSIZE (V2SFmode)-1);")
1318
1319 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1320 ;;
1321 ;; Parallel single-precision FMA multiply/accumulate instructions.
1322 ;;
1323 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1324
1325 (define_expand "fmav2sf4"
1326 [(set (match_operand:V2SF 0 "register_operand")
1327 (fma:V2SF
1328 (match_operand:V2SF 1 "nonimmediate_operand")
1329 (match_operand:V2SF 2 "nonimmediate_operand")
1330 (match_operand:V2SF 3 "nonimmediate_operand")))]
1331 "(TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL)
1332 && TARGET_MMX_WITH_SSE && ix86_partial_vec_fp_math"
1333 {
1334 rtx op3 = gen_reg_rtx (V4SFmode);
1335 rtx op2 = gen_reg_rtx (V4SFmode);
1336 rtx op1 = gen_reg_rtx (V4SFmode);
1337 rtx op0 = gen_reg_rtx (V4SFmode);
1338
1339 emit_insn (gen_movq_v2sf_to_sse (op3, operands[3]));
1340 emit_insn (gen_movq_v2sf_to_sse (op2, operands[2]));
1341 emit_insn (gen_movq_v2sf_to_sse (op1, operands[1]));
1342
1343 emit_insn (gen_fmav4sf4 (op0, op1, op2, op3));
1344
1345 emit_move_insn (operands[0], lowpart_subreg (V2SFmode, op0, V4SFmode));
1346 DONE;
1347 })
1348
1349 (define_expand "fmsv2sf4"
1350 [(set (match_operand:V2SF 0 "register_operand")
1351 (fma:V2SF
1352 (match_operand:V2SF 1 "nonimmediate_operand")
1353 (match_operand:V2SF 2 "nonimmediate_operand")
1354 (neg:V2SF
1355 (match_operand:V2SF 3 "nonimmediate_operand"))))]
1356 "(TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL)
1357 && TARGET_MMX_WITH_SSE && ix86_partial_vec_fp_math"
1358 {
1359 rtx op3 = gen_reg_rtx (V4SFmode);
1360 rtx op2 = gen_reg_rtx (V4SFmode);
1361 rtx op1 = gen_reg_rtx (V4SFmode);
1362 rtx op0 = gen_reg_rtx (V4SFmode);
1363
1364 emit_insn (gen_movq_v2sf_to_sse (op3, operands[3]));
1365 emit_insn (gen_movq_v2sf_to_sse (op2, operands[2]));
1366 emit_insn (gen_movq_v2sf_to_sse (op1, operands[1]));
1367
1368 emit_insn (gen_fmsv4sf4 (op0, op1, op2, op3));
1369
1370 emit_move_insn (operands[0], lowpart_subreg (V2SFmode, op0, V4SFmode));
1371 DONE;
1372 })
1373
1374 (define_expand "fnmav2sf4"
1375 [(set (match_operand:V2SF 0 "register_operand")
1376 (fma:V2SF
1377 (neg:V2SF
1378 (match_operand:V2SF 1 "nonimmediate_operand"))
1379 (match_operand:V2SF 2 "nonimmediate_operand")
1380 (match_operand:V2SF 3 "nonimmediate_operand")))]
1381 "(TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL)
1382 && TARGET_MMX_WITH_SSE && ix86_partial_vec_fp_math"
1383 {
1384 rtx op3 = gen_reg_rtx (V4SFmode);
1385 rtx op2 = gen_reg_rtx (V4SFmode);
1386 rtx op1 = gen_reg_rtx (V4SFmode);
1387 rtx op0 = gen_reg_rtx (V4SFmode);
1388
1389 emit_insn (gen_movq_v2sf_to_sse (op3, operands[3]));
1390 emit_insn (gen_movq_v2sf_to_sse (op2, operands[2]));
1391 emit_insn (gen_movq_v2sf_to_sse (op1, operands[1]));
1392
1393 emit_insn (gen_fnmav4sf4 (op0, op1, op2, op3));
1394
1395 emit_move_insn (operands[0], lowpart_subreg (V2SFmode, op0, V4SFmode));
1396 DONE;
1397 })
1398
1399 (define_expand "fnmsv2sf4"
1400 [(set (match_operand:V2SF 0 "register_operand" "=v,v,x")
1401 (fma:V2SF
1402 (neg:V2SF
1403 (match_operand:V2SF 1 "nonimmediate_operand"))
1404 (match_operand:V2SF 2 "nonimmediate_operand")
1405 (neg:V2SF
1406 (match_operand:V2SF 3 "nonimmediate_operand"))))]
1407 "(TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL)
1408 && TARGET_MMX_WITH_SSE && ix86_partial_vec_fp_math"
1409 {
1410 rtx op3 = gen_reg_rtx (V4SFmode);
1411 rtx op2 = gen_reg_rtx (V4SFmode);
1412 rtx op1 = gen_reg_rtx (V4SFmode);
1413 rtx op0 = gen_reg_rtx (V4SFmode);
1414
1415 emit_insn (gen_movq_v2sf_to_sse (op3, operands[3]));
1416 emit_insn (gen_movq_v2sf_to_sse (op2, operands[2]));
1417 emit_insn (gen_movq_v2sf_to_sse (op1, operands[1]));
1418
1419 emit_insn (gen_fnmsv4sf4 (op0, op1, op2, op3));
1420
1421 emit_move_insn (operands[0], lowpart_subreg (V2SFmode, op0, V4SFmode));
1422 DONE;
1423 })
1424
1425 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1426 ;;
1427 ;; Parallel single-precision floating point conversion operations
1428 ;;
1429 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1430
1431 (define_expand "fix_truncv2sfv2si2"
1432 [(set (match_operand:V2SI 0 "register_operand")
1433 (fix:V2SI (match_operand:V2SF 1 "nonimmediate_operand")))]
1434 "TARGET_MMX_WITH_SSE && ix86_partial_vec_fp_math"
1435 {
1436 rtx op1 = gen_reg_rtx (V4SFmode);
1437 rtx op0 = gen_reg_rtx (V4SImode);
1438
1439 emit_insn (gen_movq_v2sf_to_sse (op1, operands[1]));
1440
1441 emit_insn (gen_fix_truncv4sfv4si2 (op0, op1));
1442
1443 emit_move_insn (operands[0], lowpart_subreg (V2SImode, op0, V4SImode));
1444 DONE;
1445 })
1446
1447 (define_expand "fixuns_truncv2sfv2si2"
1448 [(set (match_operand:V2SI 0 "register_operand")
1449 (unsigned_fix:V2SI (match_operand:V2SF 1 "nonimmediate_operand")))]
1450 "TARGET_AVX512VL && TARGET_MMX_WITH_SSE && ix86_partial_vec_fp_math"
1451 {
1452 rtx op1 = gen_reg_rtx (V4SFmode);
1453 rtx op0 = gen_reg_rtx (V4SImode);
1454
1455 emit_insn (gen_movq_v2sf_to_sse (op1, operands[1]));
1456
1457 emit_insn (gen_fixuns_truncv4sfv4si2 (op0, op1));
1458
1459 emit_move_insn (operands[0], lowpart_subreg (V2SImode, op0, V4SImode));
1460 DONE;
1461 })
1462
1463 (define_insn "mmx_fix_truncv2sfv2si2"
1464 [(set (match_operand:V2SI 0 "register_operand" "=y")
1465 (fix:V2SI (match_operand:V2SF 1 "nonimmediate_operand" "ym")))]
1466 "TARGET_3DNOW"
1467 "pf2id\t{%1, %0|%0, %1}"
1468 [(set_attr "type" "mmxcvt")
1469 (set_attr "prefix_extra" "1")
1470 (set_attr "mode" "V2SF")])
1471
1472 (define_expand "floatv2siv2sf2"
1473 [(set (match_operand:V2SF 0 "register_operand")
1474 (float:V2SF (match_operand:V2SI 1 "nonimmediate_operand")))]
1475 "TARGET_MMX_WITH_SSE && ix86_partial_vec_fp_math"
1476 {
1477 rtx op1 = gen_reg_rtx (V4SImode);
1478 rtx op0 = gen_reg_rtx (V4SFmode);
1479
1480 emit_insn (gen_movq_v2si_to_sse (op1, operands[1]));
1481
1482 emit_insn (gen_floatv4siv4sf2 (op0, op1));
1483
1484 emit_move_insn (operands[0], lowpart_subreg (V2SFmode, op0, V4SFmode));
1485 DONE;
1486 })
1487
1488 (define_expand "floatunsv2siv2sf2"
1489 [(set (match_operand:V2SF 0 "register_operand")
1490 (unsigned_float:V2SF (match_operand:V2SI 1 "nonimmediate_operand")))]
1491 "TARGET_AVX512VL && TARGET_MMX_WITH_SSE && ix86_partial_vec_fp_math"
1492 {
1493 rtx op1 = gen_reg_rtx (V4SImode);
1494 rtx op0 = gen_reg_rtx (V4SFmode);
1495
1496 emit_insn (gen_movq_v2si_to_sse (op1, operands[1]));
1497
1498 emit_insn (gen_floatunsv4siv4sf2 (op0, op1));
1499
1500 emit_move_insn (operands[0], lowpart_subreg (V2SFmode, op0, V4SFmode));
1501 DONE;
1502 })
1503
1504 (define_insn "mmx_floatv2siv2sf2"
1505 [(set (match_operand:V2SF 0 "register_operand" "=y")
1506 (float:V2SF (match_operand:V2SI 1 "nonimmediate_operand" "ym")))]
1507 "TARGET_3DNOW"
1508 "pi2fd\t{%1, %0|%0, %1}"
1509 [(set_attr "type" "mmxcvt")
1510 (set_attr "prefix_extra" "1")
1511 (set_attr "mode" "V2SF")])
1512
1513 (define_insn "mmx_pf2iw"
1514 [(set (match_operand:V2SI 0 "register_operand" "=y")
1515 (sign_extend:V2SI
1516 (ss_truncate:V2HI
1517 (fix:V2SI
1518 (match_operand:V2SF 1 "nonimmediate_operand" "ym")))))]
1519 "TARGET_3DNOW_A"
1520 "pf2iw\t{%1, %0|%0, %1}"
1521 [(set_attr "type" "mmxcvt")
1522 (set_attr "prefix_extra" "1")
1523 (set_attr "mode" "V2SF")])
1524
1525 (define_insn "mmx_pi2fw"
1526 [(set (match_operand:V2SF 0 "register_operand" "=y")
1527 (float:V2SF
1528 (sign_extend:V2SI
1529 (truncate:V2HI
1530 (match_operand:V2SI 1 "nonimmediate_operand" "ym")))))]
1531 "TARGET_3DNOW_A"
1532 "pi2fw\t{%1, %0|%0, %1}"
1533 [(set_attr "type" "mmxcvt")
1534 (set_attr "prefix_extra" "1")
1535 (set_attr "mode" "V2SF")])
1536
1537 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1538 ;;
1539 ;; Parallel single-precision floating point element swizzling
1540 ;;
1541 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1542
1543 (define_insn "mmx_pswapdv2sf2"
1544 [(set (match_operand:V2SF 0 "register_operand" "=y,x,Yv")
1545 (vec_select:V2SF
1546 (match_operand:V2SF 1 "register_mmxmem_operand" "ym,0,Yv")
1547 (parallel [(const_int 1) (const_int 0)])))]
1548 "TARGET_3DNOW_A || TARGET_MMX_WITH_SSE"
1549 "@
1550 pswapd\t{%1, %0|%0, %1}
1551 shufps\t{$0xe1, %1, %0|%0, %1, 0xe1}
1552 vshufps\t{$0xe1, %1, %1, %0|%0, %1, %1, 0xe1}"
1553 [(set_attr "isa" "*,sse_noavx,avx")
1554 (set_attr "mmx_isa" "native,*,*")
1555 (set_attr "type" "mmxcvt,ssemov,ssemov")
1556 (set_attr "prefix_extra" "1,*,*")
1557 (set_attr "mode" "V2SF,V4SF,V4SF")])
1558
1559 (define_insn "*mmx_movshdup"
1560 [(set (match_operand:V2SF 0 "register_operand" "=v,x")
1561 (vec_select:V2SF
1562 (match_operand:V2SF 1 "register_operand" "v,0")
1563 (parallel [(const_int 1) (const_int 1)])))]
1564 "TARGET_MMX_WITH_SSE"
1565 "@
1566 %vmovshdup\t{%1, %0|%0, %1}
1567 shufps\t{$0xe5, %0, %0|%0, %0, 0xe5}"
1568 [(set_attr "isa" "sse3,*")
1569 (set_attr "type" "sse,sseshuf1")
1570 (set_attr "length_immediate" "*,1")
1571 (set_attr "prefix_rep" "1,*")
1572 (set_attr "prefix" "maybe_vex,orig")
1573 (set_attr "mode" "V4SF")])
1574
1575 (define_insn "*mmx_movsldup"
1576 [(set (match_operand:V2SF 0 "register_operand" "=v,x")
1577 (vec_select:V2SF
1578 (match_operand:V2SF 1 "register_operand" "v,0")
1579 (parallel [(const_int 0) (const_int 0)])))]
1580 "TARGET_MMX_WITH_SSE"
1581 "@
1582 %vmovsldup\t{%1, %0|%0, %1}
1583 shufps\t{$0xe0, %0, %0|%0, %0, 0xe0}"
1584 [(set_attr "isa" "sse3,*")
1585 (set_attr "type" "sse,sseshuf1")
1586 (set_attr "length_immediate" "*,1")
1587 (set_attr "prefix_rep" "1,*")
1588 (set_attr "prefix" "maybe_vex,orig")
1589 (set_attr "mode" "V4SF")])
1590
1591 (define_insn_and_split "*vec_interleave_lowv2sf"
1592 [(set (match_operand:V2SF 0 "register_operand" "=x,v")
1593 (vec_select:V2SF
1594 (vec_concat:V4SF
1595 (match_operand:V2SF 1 "register_operand" "0,v")
1596 (match_operand:V2SF 2 "register_operand" "x,v"))
1597 (parallel [(const_int 0) (const_int 2)])))]
1598 "TARGET_MMX_WITH_SSE"
1599 "#"
1600 "&& reload_completed"
1601 [(const_int 0)]
1602 "ix86_split_mmx_punpck (operands, false); DONE;"
1603 [(set_attr "isa" "noavx,avx")
1604 (set_attr "type" "sselog")
1605 (set_attr "prefix" "orig,maybe_evex")
1606 (set_attr "mode" "V4SF")])
1607
1608 (define_insn_and_split "*vec_interleave_highv2sf"
1609 [(set (match_operand:V2SF 0 "register_operand" "=x,v")
1610 (vec_select:V2SF
1611 (vec_concat:V4SF
1612 (match_operand:V2SF 1 "register_operand" "0,v")
1613 (match_operand:V2SF 2 "register_operand" "x,v"))
1614 (parallel [(const_int 1) (const_int 3)])))]
1615 "TARGET_MMX_WITH_SSE"
1616 "#"
1617 "&& reload_completed"
1618 [(const_int 0)]
1619 "ix86_split_mmx_punpck (operands, true); DONE;"
1620 [(set_attr "isa" "noavx,avx")
1621 (set_attr "type" "sselog")
1622 (set_attr "prefix" "orig,vex")
1623 (set_attr "mode" "V4SF")])
1624
1625 (define_insn "*vec_dupv2sf"
1626 [(set (match_operand:V2SF 0 "register_operand" "=y,Yv,x")
1627 (vec_duplicate:V2SF
1628 (match_operand:SF 1 "register_operand" "0,Yv,0")))]
1629 "TARGET_MMX || TARGET_MMX_WITH_SSE"
1630 "@
1631 punpckldq\t%0, %0
1632 %vmovsldup\t{%1, %0|%0, %1}
1633 shufps\t{$0xe0, %0, %0|%0, %0, 0xe0}"
1634 [(set_attr "isa" "*,sse3,sse_noavx")
1635 (set_attr "mmx_isa" "native,*,*")
1636 (set_attr "type" "mmxcvt,sse,sseshuf1")
1637 (set_attr "length_immediate" "*,*,1")
1638 (set_attr "prefix_rep" "*,1,*")
1639 (set_attr "prefix" "*,maybe_vex,orig")
1640 (set_attr "mode" "DI,V4SF,V4SF")])
1641
1642 (define_insn "*mmx_movss_<mode>"
1643 [(set (match_operand:V2FI 0 "register_operand" "=x,v")
1644 (vec_merge:V2FI
1645 (match_operand:V2FI 2 "register_operand" " x,v")
1646 (match_operand:V2FI 1 "register_operand" " 0,v")
1647 (const_int 1)))]
1648 "TARGET_MMX_WITH_SSE"
1649 "@
1650 movss\t{%2, %0|%0, %2}
1651 vmovss\t{%2, %1, %0|%0, %1, %2}"
1652 [(set_attr "isa" "noavx,avx")
1653 (set_attr "type" "ssemov")
1654 (set_attr "prefix" "orig,maybe_evex")
1655 (set_attr "mode" "SF")])
1656
1657 (define_insn "*mmx_concatv2sf"
1658 [(set (match_operand:V2SF 0 "register_operand" "=y,y")
1659 (vec_concat:V2SF
1660 (match_operand:SF 1 "nonimmediate_operand" " 0,rm")
1661 (match_operand:SF 2 "nonimm_or_0_operand" "ym,C")))]
1662 "TARGET_MMX && !TARGET_SSE"
1663 "@
1664 punpckldq\t{%2, %0|%0, %2}
1665 movd\t{%1, %0|%0, %1}"
1666 [(set_attr "type" "mmxcvt,mmxmov")
1667 (set_attr "mode" "DI")])
1668
1669 (define_expand "vec_setv2sf"
1670 [(match_operand:V2SF 0 "register_operand")
1671 (match_operand:SF 1 "register_operand")
1672 (match_operand 2 "vec_setm_mmx_operand")]
1673 "TARGET_MMX || TARGET_MMX_WITH_SSE"
1674 {
1675 if (CONST_INT_P (operands[2]))
1676 ix86_expand_vector_set (TARGET_MMX_WITH_SSE, operands[0], operands[1],
1677 INTVAL (operands[2]));
1678 else
1679 ix86_expand_vector_set_var (operands[0], operands[1], operands[2]);
1680 DONE;
1681 })
1682
1683 ;; Avoid combining registers from different units in a single alternative,
1684 ;; see comment above inline_secondary_memory_needed function in i386.cc
1685 (define_insn_and_split "*vec_extractv2sf_0"
1686 [(set (match_operand:SF 0 "nonimmediate_operand" "=x, m,y ,m,f,r")
1687 (vec_select:SF
1688 (match_operand:V2SF 1 "nonimmediate_operand" " xm,x,ym,y,m,m")
1689 (parallel [(const_int 0)])))]
1690 "(TARGET_MMX || TARGET_MMX_WITH_SSE)
1691 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
1692 "#"
1693 "&& reload_completed"
1694 [(set (match_dup 0) (match_dup 1))]
1695 "operands[1] = gen_lowpart (SFmode, operands[1]);"
1696 [(set_attr "mmx_isa" "*,*,native,native,*,*")])
1697
1698 ;; Avoid combining registers from different units in a single alternative,
1699 ;; see comment above inline_secondary_memory_needed function in i386.cc
1700 (define_insn "*vec_extractv2sf_1"
1701 [(set (match_operand:SF 0 "nonimmediate_operand" "=y,x,x,y,x,f,r")
1702 (vec_select:SF
1703 (match_operand:V2SF 1 "nonimmediate_operand" " 0,x,0,o,o,o,o")
1704 (parallel [(const_int 1)])))]
1705 "(TARGET_MMX || TARGET_MMX_WITH_SSE)
1706 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
1707 "@
1708 punpckhdq\t%0, %0
1709 %vmovshdup\t{%1, %0|%0, %1}
1710 shufps\t{$0xe5, %0, %0|%0, %0, 0xe5}
1711 #
1712 #
1713 #
1714 #"
1715 [(set_attr "isa" "*,sse3,noavx,*,*,*,*")
1716 (set_attr "mmx_isa" "native,*,*,native,*,*,*")
1717 (set_attr "type" "mmxcvt,sse,sseshuf1,mmxmov,ssemov,fmov,imov")
1718 (set (attr "length_immediate")
1719 (if_then_else (eq_attr "alternative" "2")
1720 (const_string "1")
1721 (const_string "*")))
1722 (set (attr "prefix_rep")
1723 (if_then_else (eq_attr "alternative" "1")
1724 (const_string "1")
1725 (const_string "*")))
1726 (set_attr "prefix" "orig,maybe_vex,orig,orig,orig,orig,orig")
1727 (set_attr "mode" "DI,V4SF,V4SF,SF,SF,SF,SF")])
1728
1729 (define_split
1730 [(set (match_operand:SF 0 "register_operand")
1731 (vec_select:SF
1732 (match_operand:V2SF 1 "memory_operand")
1733 (parallel [(const_int 1)])))]
1734 "(TARGET_MMX || TARGET_MMX_WITH_SSE) && reload_completed"
1735 [(set (match_dup 0) (match_dup 1))]
1736 "operands[1] = adjust_address (operands[1], SFmode, 4);")
1737
1738 (define_expand "vec_extractv2sfsf"
1739 [(match_operand:SF 0 "register_operand")
1740 (match_operand:V2SF 1 "register_operand")
1741 (match_operand 2 "const_int_operand")]
1742 "TARGET_MMX || TARGET_MMX_WITH_SSE"
1743 {
1744 ix86_expand_vector_extract (TARGET_MMX_WITH_SSE, operands[0],
1745 operands[1], INTVAL (operands[2]));
1746 DONE;
1747 })
1748
1749 (define_expand "vec_initv2sfsf"
1750 [(match_operand:V2SF 0 "register_operand")
1751 (match_operand 1)]
1752 "(TARGET_MMX || TARGET_MMX_WITH_SSE) && TARGET_SSE"
1753 {
1754 ix86_expand_vector_init (TARGET_MMX_WITH_SSE, operands[0],
1755 operands[1]);
1756 DONE;
1757 })
1758
1759 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1760 ;;
1761 ;; Parallel single-precision floating point rounding operations.
1762 ;;
1763 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1764
1765 (define_expand "nearbyintv2sf2"
1766 [(match_operand:V2SF 0 "register_operand")
1767 (match_operand:V2SF 1 "nonimmediate_operand")]
1768 "TARGET_SSE4_1 && TARGET_MMX_WITH_SSE && ix86_partial_vec_fp_math"
1769 {
1770 rtx op1 = gen_reg_rtx (V4SFmode);
1771 rtx op0 = gen_reg_rtx (V4SFmode);
1772
1773 emit_insn (gen_movq_v2sf_to_sse (op1, operands[1]));
1774
1775 emit_insn (gen_nearbyintv4sf2 (op0, op1));
1776
1777 emit_move_insn (operands[0], lowpart_subreg (V2SFmode, op0, V4SFmode));
1778 DONE;
1779 })
1780
1781 (define_expand "rintv2sf2"
1782 [(match_operand:V2SF 0 "register_operand")
1783 (match_operand:V2SF 1 "nonimmediate_operand")]
1784 "TARGET_SSE4_1 && TARGET_MMX_WITH_SSE && ix86_partial_vec_fp_math"
1785 {
1786 rtx op1 = gen_reg_rtx (V4SFmode);
1787 rtx op0 = gen_reg_rtx (V4SFmode);
1788
1789 emit_insn (gen_movq_v2sf_to_sse (op1, operands[1]));
1790
1791 emit_insn (gen_rintv4sf2 (op0, op1));
1792
1793 emit_move_insn (operands[0], lowpart_subreg (V2SFmode, op0, V4SFmode));
1794 DONE;
1795 })
1796
1797 (define_expand "lrintv2sfv2si2"
1798 [(match_operand:V2SI 0 "register_operand")
1799 (match_operand:V2SF 1 "nonimmediate_operand")]
1800 "TARGET_SSE4_1 && !flag_trapping_math
1801 && TARGET_MMX_WITH_SSE && ix86_partial_vec_fp_math"
1802 {
1803 rtx op1 = gen_reg_rtx (V4SFmode);
1804 rtx op0 = gen_reg_rtx (V4SImode);
1805
1806 emit_insn (gen_movq_v2sf_to_sse (op1, operands[1]));
1807
1808 emit_insn (gen_lrintv4sfv4si2 (op0, op1));
1809
1810 emit_move_insn (operands[0], lowpart_subreg (V2SImode, op0, V4SImode));
1811 DONE;
1812 })
1813
1814 (define_expand "ceilv2sf2"
1815 [(match_operand:V2SF 0 "register_operand")
1816 (match_operand:V2SF 1 "nonimmediate_operand")]
1817 "TARGET_SSE4_1 && !flag_trapping_math
1818 && TARGET_MMX_WITH_SSE && ix86_partial_vec_fp_math"
1819 {
1820 rtx op1 = gen_reg_rtx (V4SFmode);
1821 rtx op0 = gen_reg_rtx (V4SFmode);
1822
1823 emit_insn (gen_movq_v2sf_to_sse (op1, operands[1]));
1824
1825 emit_insn (gen_ceilv4sf2 (op0, op1));
1826
1827 emit_move_insn (operands[0], lowpart_subreg (V2SFmode, op0, V4SFmode));
1828 DONE;
1829 })
1830
1831 (define_expand "lceilv2sfv2si2"
1832 [(match_operand:V2SI 0 "register_operand")
1833 (match_operand:V2SF 1 "nonimmediate_operand")]
1834 "TARGET_SSE4_1 && !flag_trapping_math
1835 && TARGET_MMX_WITH_SSE && ix86_partial_vec_fp_math"
1836 {
1837 rtx op1 = gen_reg_rtx (V4SFmode);
1838 rtx op0 = gen_reg_rtx (V4SImode);
1839
1840 emit_insn (gen_movq_v2sf_to_sse (op1, operands[1]));
1841
1842 emit_insn (gen_lceilv4sfv4si2 (op0, op1));
1843
1844 emit_move_insn (operands[0], lowpart_subreg (V2SImode, op0, V4SImode));
1845 DONE;
1846 })
1847
1848 (define_expand "floorv2sf2"
1849 [(match_operand:V2SF 0 "register_operand")
1850 (match_operand:V2SF 1 "nonimmediate_operand")]
1851 "TARGET_SSE4_1 && !flag_trapping_math
1852 && TARGET_MMX_WITH_SSE && ix86_partial_vec_fp_math"
1853 {
1854 rtx op1 = gen_reg_rtx (V4SFmode);
1855 rtx op0 = gen_reg_rtx (V4SFmode);
1856
1857 emit_insn (gen_movq_v2sf_to_sse (op1, operands[1]));
1858
1859 emit_insn (gen_floorv4sf2 (op0, op1));
1860
1861 emit_move_insn (operands[0], lowpart_subreg (V2SFmode, op0, V4SFmode));
1862 DONE;
1863 })
1864
1865 (define_expand "lfloorv2sfv2si2"
1866 [(match_operand:V2SI 0 "register_operand")
1867 (match_operand:V2SF 1 "nonimmediate_operand")]
1868 "TARGET_SSE4_1 && !flag_trapping_math
1869 && TARGET_MMX_WITH_SSE && ix86_partial_vec_fp_math"
1870 {
1871 rtx op1 = gen_reg_rtx (V4SFmode);
1872 rtx op0 = gen_reg_rtx (V4SImode);
1873
1874 emit_insn (gen_movq_v2sf_to_sse (op1, operands[1]));
1875
1876 emit_insn (gen_lfloorv4sfv4si2 (op0, op1));
1877
1878 emit_move_insn (operands[0], lowpart_subreg (V2SImode, op0, V4SImode));
1879 DONE;
1880 })
1881
1882 (define_expand "btruncv2sf2"
1883 [(match_operand:V2SF 0 "register_operand")
1884 (match_operand:V2SF 1 "nonimmediate_operand")]
1885 "TARGET_SSE4_1 && !flag_trapping_math
1886 && TARGET_MMX_WITH_SSE && ix86_partial_vec_fp_math"
1887 {
1888 rtx op1 = gen_reg_rtx (V4SFmode);
1889 rtx op0 = gen_reg_rtx (V4SFmode);
1890
1891 emit_insn (gen_movq_v2sf_to_sse (op1, operands[1]));
1892
1893 emit_insn (gen_btruncv4sf2 (op0, op1));
1894
1895 emit_move_insn (operands[0], lowpart_subreg (V2SFmode, op0, V4SFmode));
1896 DONE;
1897 })
1898
1899 (define_expand "roundv2sf2"
1900 [(match_operand:V2SF 0 "register_operand")
1901 (match_operand:V2SF 1 "nonimmediate_operand")]
1902 "TARGET_SSE4_1 && !flag_trapping_math
1903 && TARGET_MMX_WITH_SSE && ix86_partial_vec_fp_math"
1904 {
1905 rtx op1 = gen_reg_rtx (V4SFmode);
1906 rtx op0 = gen_reg_rtx (V4SFmode);
1907
1908 emit_insn (gen_movq_v2sf_to_sse (op1, operands[1]));
1909
1910 emit_insn (gen_roundv4sf2 (op0, op1));
1911
1912 emit_move_insn (operands[0], lowpart_subreg (V2SFmode, op0, V4SFmode));
1913 DONE;
1914 })
1915
1916 (define_expand "lroundv2sfv2si2"
1917 [(match_operand:V2SI 0 "register_operand")
1918 (match_operand:V2SF 1 "nonimmediate_operand")]
1919 "TARGET_SSE4_1 && !flag_trapping_math
1920 && TARGET_MMX_WITH_SSE && ix86_partial_vec_fp_math"
1921 {
1922 rtx op1 = gen_reg_rtx (V4SFmode);
1923 rtx op0 = gen_reg_rtx (V4SImode);
1924
1925 emit_insn (gen_movq_v2sf_to_sse (op1, operands[1]));
1926
1927 emit_insn (gen_lroundv4sfv4si2 (op0, op1));
1928
1929 emit_move_insn (operands[0], lowpart_subreg (V2SImode, op0, V4SImode));
1930 DONE;
1931 })
1932
1933 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1934 ;;
1935 ;; Parallel half-precision floating point arithmetic
1936 ;;
1937 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1938
1939 (define_expand "<insn>v4hf3"
1940 [(set (match_operand:V4HF 0 "register_operand")
1941 (plusminusmult:V4HF
1942 (match_operand:V4HF 1 "nonimmediate_operand")
1943 (match_operand:V4HF 2 "nonimmediate_operand")))]
1944 "TARGET_AVX512FP16 && TARGET_AVX512VL"
1945 {
1946 rtx op2 = gen_reg_rtx (V8HFmode);
1947 rtx op1 = gen_reg_rtx (V8HFmode);
1948 rtx op0 = gen_reg_rtx (V8HFmode);
1949
1950 emit_insn (gen_movq_v4hf_to_sse (op2, operands[2]));
1951 emit_insn (gen_movq_v4hf_to_sse (op1, operands[1]));
1952
1953 emit_insn (gen_<insn>v8hf3 (op0, op1, op2));
1954
1955 emit_move_insn (operands[0], lowpart_subreg (V4HFmode, op0, V8HFmode));
1956 DONE;
1957 })
1958
1959 (define_expand "divv4hf3"
1960 [(set (match_operand:V4HF 0 "register_operand")
1961 (div:V4HF
1962 (match_operand:V4HF 1 "nonimmediate_operand")
1963 (match_operand:V4HF 2 "nonimmediate_operand")))]
1964 "TARGET_AVX512FP16 && TARGET_AVX512VL"
1965 {
1966 rtx op2 = gen_reg_rtx (V8HFmode);
1967 rtx op1 = gen_reg_rtx (V8HFmode);
1968 rtx op0 = gen_reg_rtx (V8HFmode);
1969
1970 emit_insn (gen_movq_v4hf_to_sse (op1, operands[1]));
1971 rtx tmp = gen_rtx_VEC_CONCAT (V8HFmode, operands[2],
1972 force_reg (V4HFmode, CONST1_RTX (V4HFmode)));
1973 emit_insn (gen_rtx_SET (op2, tmp));
1974 emit_insn (gen_divv8hf3 (op0, op1, op2));
1975 emit_move_insn (operands[0], lowpart_subreg (V4HFmode, op0, V8HFmode));
1976 DONE;
1977 })
1978
1979 (define_expand "movd_v2hf_to_sse"
1980 [(set (match_operand:V8HF 0 "register_operand")
1981 (vec_merge:V8HF
1982 (vec_duplicate:V8HF
1983 (match_operand:V2HF 1 "nonimmediate_operand"))
1984 (match_operand:V8HF 2 "reg_or_0_operand")
1985 (const_int 3)))]
1986 "TARGET_SSE")
1987
1988 (define_expand "<insn>v2hf3"
1989 [(set (match_operand:V2HF 0 "register_operand")
1990 (plusminusmult:V2HF
1991 (match_operand:V2HF 1 "nonimmediate_operand")
1992 (match_operand:V2HF 2 "nonimmediate_operand")))]
1993 "TARGET_AVX512FP16 && TARGET_AVX512VL"
1994 {
1995 rtx op2 = gen_reg_rtx (V8HFmode);
1996 rtx op1 = gen_reg_rtx (V8HFmode);
1997 rtx op0 = gen_reg_rtx (V8HFmode);
1998
1999 emit_insn (gen_movd_v2hf_to_sse (op2, operands[2], CONST0_RTX (V8HFmode)));
2000 emit_insn (gen_movd_v2hf_to_sse (op1, operands[1], CONST0_RTX (V8HFmode)));
2001 emit_insn (gen_<insn>v8hf3 (op0, op1, op2));
2002
2003 emit_move_insn (operands[0], lowpart_subreg (V2HFmode, op0, V8HFmode));
2004 DONE;
2005 })
2006
2007 (define_expand "divv2hf3"
2008 [(set (match_operand:V2HF 0 "register_operand")
2009 (div:V2HF
2010 (match_operand:V2HF 1 "nonimmediate_operand")
2011 (match_operand:V2HF 2 "nonimmediate_operand")))]
2012 "TARGET_AVX512FP16 && TARGET_AVX512VL"
2013 {
2014 rtx op2 = gen_reg_rtx (V8HFmode);
2015 rtx op1 = gen_reg_rtx (V8HFmode);
2016 rtx op0 = gen_reg_rtx (V8HFmode);
2017
2018 emit_insn (gen_movd_v2hf_to_sse (op2, operands[2],
2019 force_reg (V8HFmode, CONST1_RTX (V8HFmode))));
2020 emit_insn (gen_movd_v2hf_to_sse (op1, operands[1], CONST0_RTX (V8HFmode)));
2021 emit_insn (gen_divv8hf3 (op0, op1, op2));
2022
2023 emit_move_insn (operands[0], lowpart_subreg (V2HFmode, op0, V8HFmode));
2024 DONE;
2025 })
2026
2027
2028 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2029 ;;
2030 ;; Parallel integral arithmetic
2031 ;;
2032 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2033
2034 (define_expand "neg<mode>2"
2035 [(set (match_operand:MMXMODEI 0 "register_operand")
2036 (minus:MMXMODEI
2037 (match_dup 2)
2038 (match_operand:MMXMODEI 1 "register_operand")))]
2039 "TARGET_MMX_WITH_SSE"
2040 "operands[2] = force_reg (<MODE>mode, CONST0_RTX (<MODE>mode));")
2041
2042 (define_expand "neg<mode>2"
2043 [(set (match_operand:VI_32 0 "register_operand")
2044 (minus:VI_32
2045 (match_dup 2)
2046 (match_operand:VI_32 1 "register_operand")))]
2047 "TARGET_SSE2"
2048 "operands[2] = force_reg (<MODE>mode, CONST0_RTX (<MODE>mode));")
2049
2050 (define_insn "negv2qi2"
2051 [(set (match_operand:V2QI 0 "register_operand" "=?Q,&Yw")
2052 (neg:V2QI
2053 (match_operand:V2QI 1 "register_operand" "0,Yw")))
2054 (clobber (reg:CC FLAGS_REG))]
2055 "!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)"
2056 "#"
2057 [(set_attr "isa" "*,sse2")
2058 (set_attr "type" "multi")
2059 (set_attr "mode" "QI,TI")])
2060
2061 (define_split
2062 [(set (match_operand:V2QI 0 "general_reg_operand")
2063 (neg:V2QI
2064 (match_operand:V2QI 1 "general_reg_operand")))
2065 (clobber (reg:CC FLAGS_REG))]
2066 "(!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))
2067 && reload_completed"
2068 [(parallel
2069 [(set (strict_low_part (match_dup 0))
2070 (neg:QI (match_dup 1)))
2071 (clobber (reg:CC FLAGS_REG))])
2072 (parallel
2073 [(set (zero_extract:HI (match_dup 2) (const_int 8) (const_int 8))
2074 (subreg:HI
2075 (neg:QI
2076 (subreg:QI
2077 (zero_extract:HI (match_dup 3)
2078 (const_int 8)
2079 (const_int 8)) 0)) 0))
2080 (clobber (reg:CC FLAGS_REG))])]
2081 {
2082 operands[3] = lowpart_subreg (HImode, operands[1], V2QImode);
2083 operands[2] = lowpart_subreg (HImode, operands[0], V2QImode);
2084 operands[1] = lowpart_subreg (QImode, operands[1], V2QImode);
2085 operands[0] = lowpart_subreg (QImode, operands[0], V2QImode);
2086 })
2087
2088 (define_split
2089 [(set (match_operand:V2QI 0 "sse_reg_operand")
2090 (neg:V2QI
2091 (match_operand:V2QI 1 "sse_reg_operand")))
2092 (clobber (reg:CC FLAGS_REG))]
2093 "(!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))
2094 && TARGET_SSE2 && reload_completed"
2095 [(set (match_dup 0) (match_dup 2))
2096 (set (match_dup 0)
2097 (minus:V16QI (match_dup 0) (match_dup 1)))]
2098 {
2099 operands[2] = CONST0_RTX (V16QImode);
2100 operands[1] = lowpart_subreg (V16QImode, operands[1], V2QImode);
2101 operands[0] = lowpart_subreg (V16QImode, operands[0], V2QImode);
2102 })
2103
2104 (define_expand "mmx_<insn><mode>3"
2105 [(set (match_operand:MMXMODEI8 0 "register_operand")
2106 (plusminus:MMXMODEI8
2107 (match_operand:MMXMODEI8 1 "register_mmxmem_operand")
2108 (match_operand:MMXMODEI8 2 "register_mmxmem_operand")))]
2109 "TARGET_MMX || TARGET_MMX_WITH_SSE"
2110 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
2111
2112 (define_expand "<insn><mode>3"
2113 [(set (match_operand:MMXMODEI 0 "register_operand")
2114 (plusminus:MMXMODEI
2115 (match_operand:MMXMODEI 1 "register_operand")
2116 (match_operand:MMXMODEI 2 "register_operand")))]
2117 "TARGET_MMX_WITH_SSE")
2118
2119 (define_insn "*mmx_<insn><mode>3"
2120 [(set (match_operand:MMXMODEI8 0 "register_operand" "=y,x,<Yv_Yw>")
2121 (plusminus:MMXMODEI8
2122 (match_operand:MMXMODEI8 1 "register_mmxmem_operand"
2123 "<comm>0,0,<Yv_Yw>")
2124 (match_operand:MMXMODEI8 2 "register_mmxmem_operand"
2125 "ym,x,<Yv_Yw>")))]
2126 "(TARGET_MMX || TARGET_MMX_WITH_SSE)
2127 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
2128 "@
2129 p<plusminus_mnemonic><mmxvecsize>\t{%2, %0|%0, %2}
2130 p<plusminus_mnemonic><mmxvecsize>\t{%2, %0|%0, %2}
2131 vp<plusminus_mnemonic><mmxvecsize>\t{%2, %1, %0|%0, %1, %2}"
2132 [(set_attr "isa" "*,sse2_noavx,avx")
2133 (set_attr "mmx_isa" "native,*,*")
2134 (set_attr "type" "mmxadd,sseadd,sseadd")
2135 (set_attr "mode" "DI,TI,TI")])
2136
2137 (define_insn "<insn><mode>3"
2138 [(set (match_operand:VI_32 0 "register_operand" "=x,Yw")
2139 (plusminus:VI_32
2140 (match_operand:VI_32 1 "register_operand" "<comm>0,Yw")
2141 (match_operand:VI_32 2 "register_operand" "x,Yw")))]
2142 "TARGET_SSE2"
2143 "@
2144 p<plusminus_mnemonic><mmxvecsize>\t{%2, %0|%0, %2}
2145 vp<plusminus_mnemonic><mmxvecsize>\t{%2, %1, %0|%0, %1, %2}"
2146 [(set_attr "isa" "noavx,avx")
2147 (set_attr "type" "sseadd")
2148 (set_attr "mode" "TI")])
2149
2150 (define_insn "<insn>v2qi3"
2151 [(set (match_operand:V2QI 0 "register_operand" "=?Q,x,Yw")
2152 (plusminus:V2QI
2153 (match_operand:V2QI 1 "register_operand" "<comm>0,0,Yw")
2154 (match_operand:V2QI 2 "register_operand" "Q,x,Yw")))
2155 (clobber (reg:CC FLAGS_REG))]
2156 "!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)"
2157 "#"
2158 [(set_attr "isa" "*,sse2_noavx,avx")
2159 (set_attr "type" "multi,sseadd,sseadd")
2160 (set_attr "mode" "QI,TI,TI")])
2161
2162 (define_split
2163 [(set (match_operand:V2QI 0 "general_reg_operand")
2164 (plusminus:V2QI
2165 (match_operand:V2QI 1 "general_reg_operand")
2166 (match_operand:V2QI 2 "general_reg_operand")))
2167 (clobber (reg:CC FLAGS_REG))]
2168 "(!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))
2169 && reload_completed"
2170 [(parallel
2171 [(set (strict_low_part (match_dup 0))
2172 (plusminus:QI (match_dup 1) (match_dup 2)))
2173 (clobber (reg:CC FLAGS_REG))])
2174 (parallel
2175 [(set (zero_extract:HI (match_dup 3) (const_int 8) (const_int 8))
2176 (subreg:HI
2177 (plusminus:QI
2178 (subreg:QI
2179 (zero_extract:HI (match_dup 4)
2180 (const_int 8)
2181 (const_int 8)) 0)
2182 (subreg:QI
2183 (zero_extract:HI (match_dup 5)
2184 (const_int 8)
2185 (const_int 8)) 0)) 0))
2186 (clobber (reg:CC FLAGS_REG))])]
2187 {
2188 operands[5] = lowpart_subreg (HImode, operands[2], V2QImode);
2189 operands[4] = lowpart_subreg (HImode, operands[1], V2QImode);
2190 operands[3] = lowpart_subreg (HImode, operands[0], V2QImode);
2191 operands[2] = lowpart_subreg (QImode, operands[2], V2QImode);
2192 operands[1] = lowpart_subreg (QImode, operands[1], V2QImode);
2193 operands[0] = lowpart_subreg (QImode, operands[0], V2QImode);
2194 })
2195
2196 (define_split
2197 [(set (match_operand:V2QI 0 "sse_reg_operand")
2198 (plusminus:V2QI
2199 (match_operand:V2QI 1 "sse_reg_operand")
2200 (match_operand:V2QI 2 "sse_reg_operand")))
2201 (clobber (reg:CC FLAGS_REG))]
2202 "(!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))
2203 && TARGET_SSE2 && reload_completed"
2204 [(set (match_dup 0)
2205 (plusminus:V16QI (match_dup 1) (match_dup 2)))]
2206 {
2207 operands[2] = lowpart_subreg (V16QImode, operands[2], V2QImode);
2208 operands[1] = lowpart_subreg (V16QImode, operands[1], V2QImode);
2209 operands[0] = lowpart_subreg (V16QImode, operands[0], V2QImode);
2210 })
2211
2212 (define_expand "mmx_<insn><mode>3"
2213 [(set (match_operand:MMXMODE12 0 "register_operand")
2214 (sat_plusminus:MMXMODE12
2215 (match_operand:MMXMODE12 1 "register_mmxmem_operand")
2216 (match_operand:MMXMODE12 2 "register_mmxmem_operand")))]
2217 "TARGET_MMX || TARGET_MMX_WITH_SSE"
2218 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
2219
2220 (define_insn "*mmx_<insn><mode>3"
2221 [(set (match_operand:MMXMODE12 0 "register_operand" "=y,x,Yw")
2222 (sat_plusminus:MMXMODE12
2223 (match_operand:MMXMODE12 1 "register_mmxmem_operand" "<comm>0,0,Yw")
2224 (match_operand:MMXMODE12 2 "register_mmxmem_operand" "ym,x,Yw")))]
2225 "(TARGET_MMX || TARGET_MMX_WITH_SSE)
2226 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
2227 "@
2228 p<plusminus_mnemonic><mmxvecsize>\t{%2, %0|%0, %2}
2229 p<plusminus_mnemonic><mmxvecsize>\t{%2, %0|%0, %2}
2230 vp<plusminus_mnemonic><mmxvecsize>\t{%2, %1, %0|%0, %1, %2}"
2231 [(set_attr "isa" "*,sse2_noavx,avx")
2232 (set_attr "mmx_isa" "native,*,*")
2233 (set_attr "type" "mmxadd,sseadd,sseadd")
2234 (set_attr "mode" "DI,TI,TI")])
2235
2236 (define_insn "*<insn><mode>3"
2237 [(set (match_operand:VI_16_32 0 "register_operand" "=x,Yw")
2238 (sat_plusminus:VI_16_32
2239 (match_operand:VI_16_32 1 "register_operand" "<comm>0,Yw")
2240 (match_operand:VI_16_32 2 "register_operand" "x,Yw")))]
2241 "TARGET_SSE2"
2242 "@
2243 p<plusminus_mnemonic><mmxvecsize>\t{%2, %0|%0, %2}
2244 vp<plusminus_mnemonic><mmxvecsize>\t{%2, %1, %0|%0, %1, %2}"
2245 [(set_attr "isa" "noavx,avx")
2246 (set_attr "type" "sseadd")
2247 (set_attr "mode" "TI")])
2248
2249 (define_insn "mulv2si3"
2250 [(set (match_operand:V2SI 0 "register_operand" "=Yr,*x,v")
2251 (mult:V2SI
2252 (match_operand:V2SI 1 "register_operand" "%0,0,v")
2253 (match_operand:V2SI 2 "register_operand" "Yr,*x,v")))]
2254 "TARGET_SSE4_1 && TARGET_MMX_WITH_SSE"
2255 "@
2256 pmulld\t{%2, %0|%0, %2}
2257 pmulld\t{%2, %0|%0, %2}
2258 vpmulld\t{%2, %1, %0|%0, %1, %2}"
2259 [(set_attr "isa" "noavx,noavx,avx")
2260 (set_attr "type" "sseimul")
2261 (set_attr "prefix_extra" "1")
2262 (set_attr "prefix" "orig,orig,vex")
2263 (set_attr "btver2_decode" "vector")
2264 (set_attr "mode" "TI")])
2265
2266 (define_expand "mmx_mulv4hi3"
2267 [(set (match_operand:V4HI 0 "register_operand")
2268 (mult:V4HI (match_operand:V4HI 1 "register_mmxmem_operand")
2269 (match_operand:V4HI 2 "register_mmxmem_operand")))]
2270 "TARGET_MMX || TARGET_MMX_WITH_SSE"
2271 "ix86_fixup_binary_operands_no_copy (MULT, V4HImode, operands);")
2272
2273 (define_expand "mulv4hi3"
2274 [(set (match_operand:V4HI 0 "register_operand")
2275 (mult:V4HI (match_operand:V4HI 1 "register_operand")
2276 (match_operand:V4HI 2 "register_operand")))]
2277 "TARGET_MMX_WITH_SSE")
2278
2279 (define_insn "*mmx_mulv4hi3"
2280 [(set (match_operand:V4HI 0 "register_operand" "=y,x,Yw")
2281 (mult:V4HI (match_operand:V4HI 1 "register_mmxmem_operand" "%0,0,Yw")
2282 (match_operand:V4HI 2 "register_mmxmem_operand" "ym,x,Yw")))]
2283 "(TARGET_MMX || TARGET_MMX_WITH_SSE)
2284 && ix86_binary_operator_ok (MULT, V4HImode, operands)"
2285 "@
2286 pmullw\t{%2, %0|%0, %2}
2287 pmullw\t{%2, %0|%0, %2}
2288 vpmullw\t{%2, %1, %0|%0, %1, %2}"
2289 [(set_attr "isa" "*,sse2_noavx,avx")
2290 (set_attr "mmx_isa" "native,*,*")
2291 (set_attr "type" "mmxmul,ssemul,ssemul")
2292 (set_attr "mode" "DI,TI,TI")])
2293
2294 (define_insn "mulv2hi3"
2295 [(set (match_operand:V2HI 0 "register_operand" "=x,Yw")
2296 (mult:V2HI (match_operand:V2HI 1 "register_operand" "%0,Yw")
2297 (match_operand:V2HI 2 "register_operand" "x,Yw")))]
2298 "TARGET_SSE2"
2299 "@
2300 pmullw\t{%2, %0|%0, %2}
2301 vpmullw\t{%2, %1, %0|%0, %1, %2}"
2302 [(set_attr "isa" "noavx,avx")
2303 (set_attr "type" "ssemul")
2304 (set_attr "mode" "TI")])
2305
2306 (define_expand "mulv8qi3"
2307 [(set (match_operand:V8QI 0 "register_operand")
2308 (mult:V8QI (match_operand:V8QI 1 "register_operand")
2309 (match_operand:V8QI 2 "register_operand")))]
2310 "TARGET_MMX_WITH_SSE"
2311 {
2312 ix86_expand_vecop_qihi_partial (MULT, operands[0], operands[1], operands[2]);
2313 DONE;
2314 })
2315
2316 (define_expand "mulv4qi3"
2317 [(set (match_operand:V4QI 0 "register_operand")
2318 (mult:V4QI (match_operand:V4QI 1 "register_operand")
2319 (match_operand:V4QI 2 "register_operand")))]
2320 "TARGET_SSE2"
2321 {
2322 ix86_expand_vecop_qihi_partial (MULT, operands[0], operands[1], operands[2]);
2323 DONE;
2324 })
2325
2326 (define_expand "mmx_smulv4hi3_highpart"
2327 [(set (match_operand:V4HI 0 "register_operand")
2328 (truncate:V4HI
2329 (lshiftrt:V4SI
2330 (mult:V4SI
2331 (sign_extend:V4SI
2332 (match_operand:V4HI 1 "register_mmxmem_operand"))
2333 (sign_extend:V4SI
2334 (match_operand:V4HI 2 "register_mmxmem_operand")))
2335 (const_int 16))))]
2336 "TARGET_MMX || TARGET_MMX_WITH_SSE"
2337 "ix86_fixup_binary_operands_no_copy (MULT, V4HImode, operands);")
2338
2339 (define_insn "*mmx_smulv4hi3_highpart"
2340 [(set (match_operand:V4HI 0 "register_operand" "=y,x,Yw")
2341 (truncate:V4HI
2342 (lshiftrt:V4SI
2343 (mult:V4SI
2344 (sign_extend:V4SI
2345 (match_operand:V4HI 1 "register_mmxmem_operand" "%0,0,Yw"))
2346 (sign_extend:V4SI
2347 (match_operand:V4HI 2 "register_mmxmem_operand" "ym,x,Yw")))
2348 (const_int 16))))]
2349 "(TARGET_MMX || TARGET_MMX_WITH_SSE)
2350 && ix86_binary_operator_ok (MULT, V4HImode, operands)"
2351 "@
2352 pmulhw\t{%2, %0|%0, %2}
2353 pmulhw\t{%2, %0|%0, %2}
2354 vpmulhw\t{%2, %1, %0|%0, %1, %2}"
2355 [(set_attr "isa" "*,sse2_noavx,avx")
2356 (set_attr "mmx_isa" "native,*,*")
2357 (set_attr "type" "mmxmul,ssemul,ssemul")
2358 (set_attr "mode" "DI,TI,TI")])
2359
2360 (define_expand "mmx_umulv4hi3_highpart"
2361 [(set (match_operand:V4HI 0 "register_operand")
2362 (truncate:V4HI
2363 (lshiftrt:V4SI
2364 (mult:V4SI
2365 (zero_extend:V4SI
2366 (match_operand:V4HI 1 "register_mmxmem_operand"))
2367 (zero_extend:V4SI
2368 (match_operand:V4HI 2 "register_mmxmem_operand")))
2369 (const_int 16))))]
2370 "(TARGET_MMX || TARGET_MMX_WITH_SSE)
2371 && (TARGET_SSE || TARGET_3DNOW_A)"
2372 "ix86_fixup_binary_operands_no_copy (MULT, V4HImode, operands);")
2373
2374 (define_insn "*mmx_umulv4hi3_highpart"
2375 [(set (match_operand:V4HI 0 "register_operand" "=y,x,Yw")
2376 (truncate:V4HI
2377 (lshiftrt:V4SI
2378 (mult:V4SI
2379 (zero_extend:V4SI
2380 (match_operand:V4HI 1 "register_mmxmem_operand" "%0,0,Yw"))
2381 (zero_extend:V4SI
2382 (match_operand:V4HI 2 "register_mmxmem_operand" "ym,x,Yw")))
2383 (const_int 16))))]
2384 "(TARGET_MMX || TARGET_MMX_WITH_SSE)
2385 && (TARGET_SSE || TARGET_3DNOW_A)
2386 && ix86_binary_operator_ok (MULT, V4HImode, operands)"
2387 "@
2388 pmulhuw\t{%2, %0|%0, %2}
2389 pmulhuw\t{%2, %0|%0, %2}
2390 vpmulhuw\t{%2, %1, %0|%0, %1, %2}"
2391 [(set_attr "isa" "*,sse2_noavx,avx")
2392 (set_attr "mmx_isa" "native,*,*")
2393 (set_attr "type" "mmxmul,ssemul,ssemul")
2394 (set_attr "mode" "DI,TI,TI")])
2395
2396 (define_expand "<s>mulv4hi3_highpart"
2397 [(set (match_operand:V4HI 0 "register_operand")
2398 (truncate:V4HI
2399 (lshiftrt:V4SI
2400 (mult:V4SI
2401 (any_extend:V4SI
2402 (match_operand:V4HI 1 "register_operand"))
2403 (any_extend:V4SI
2404 (match_operand:V4HI 2 "register_operand")))
2405 (const_int 16))))]
2406 "TARGET_MMX_WITH_SSE")
2407
2408 (define_insn "<s>mulv2hi3_highpart"
2409 [(set (match_operand:V2HI 0 "register_operand" "=x,Yw")
2410 (truncate:V2HI
2411 (lshiftrt:V2SI
2412 (mult:V2SI
2413 (any_extend:V2SI
2414 (match_operand:V2HI 1 "register_operand" "%0,Yw"))
2415 (any_extend:V2SI
2416 (match_operand:V2HI 2 "register_operand" "x,Yw")))
2417 (const_int 16))))]
2418 "TARGET_SSE2"
2419 "@
2420 pmulh<u>w\t{%2, %0|%0, %2}
2421 vpmulh<u>w\t{%2, %1, %0|%0, %1, %2}"
2422 [(set_attr "isa" "noavx,avx")
2423 (set_attr "type" "ssemul")
2424 (set_attr "mode" "TI")])
2425
2426 (define_expand "mmx_pmaddwd"
2427 [(set (match_operand:V2SI 0 "register_operand")
2428 (plus:V2SI
2429 (mult:V2SI
2430 (sign_extend:V2SI
2431 (vec_select:V2HI
2432 (match_operand:V4HI 1 "register_mmxmem_operand")
2433 (parallel [(const_int 0) (const_int 2)])))
2434 (sign_extend:V2SI
2435 (vec_select:V2HI
2436 (match_operand:V4HI 2 "register_mmxmem_operand")
2437 (parallel [(const_int 0) (const_int 2)]))))
2438 (mult:V2SI
2439 (sign_extend:V2SI
2440 (vec_select:V2HI (match_dup 1)
2441 (parallel [(const_int 1) (const_int 3)])))
2442 (sign_extend:V2SI
2443 (vec_select:V2HI (match_dup 2)
2444 (parallel [(const_int 1) (const_int 3)]))))))]
2445 "TARGET_MMX || TARGET_MMX_WITH_SSE"
2446 "ix86_fixup_binary_operands_no_copy (MULT, V4HImode, operands);")
2447
2448 (define_insn "*mmx_pmaddwd"
2449 [(set (match_operand:V2SI 0 "register_operand" "=y,x,Yw")
2450 (plus:V2SI
2451 (mult:V2SI
2452 (sign_extend:V2SI
2453 (vec_select:V2HI
2454 (match_operand:V4HI 1 "register_mmxmem_operand" "%0,0,Yw")
2455 (parallel [(const_int 0) (const_int 2)])))
2456 (sign_extend:V2SI
2457 (vec_select:V2HI
2458 (match_operand:V4HI 2 "register_mmxmem_operand" "ym,x,Yw")
2459 (parallel [(const_int 0) (const_int 2)]))))
2460 (mult:V2SI
2461 (sign_extend:V2SI
2462 (vec_select:V2HI (match_dup 1)
2463 (parallel [(const_int 1) (const_int 3)])))
2464 (sign_extend:V2SI
2465 (vec_select:V2HI (match_dup 2)
2466 (parallel [(const_int 1) (const_int 3)]))))))]
2467 "(TARGET_MMX || TARGET_MMX_WITH_SSE)
2468 && ix86_binary_operator_ok (MULT, V4HImode, operands)"
2469 "@
2470 pmaddwd\t{%2, %0|%0, %2}
2471 pmaddwd\t{%2, %0|%0, %2}
2472 vpmaddwd\t{%2, %1, %0|%0, %1, %2}"
2473 [(set_attr "isa" "*,sse2_noavx,avx")
2474 (set_attr "mmx_isa" "native,*,*")
2475 (set_attr "type" "mmxmul,sseiadd,sseiadd")
2476 (set_attr "mode" "DI,TI,TI")])
2477
2478 (define_expand "mmx_pmulhrwv4hi3"
2479 [(set (match_operand:V4HI 0 "register_operand")
2480 (truncate:V4HI
2481 (lshiftrt:V4SI
2482 (plus:V4SI
2483 (mult:V4SI
2484 (sign_extend:V4SI
2485 (match_operand:V4HI 1 "nonimmediate_operand"))
2486 (sign_extend:V4SI
2487 (match_operand:V4HI 2 "nonimmediate_operand")))
2488 (const_vector:V4SI [(const_int 32768) (const_int 32768)
2489 (const_int 32768) (const_int 32768)]))
2490 (const_int 16))))]
2491 "TARGET_3DNOW"
2492 "ix86_fixup_binary_operands_no_copy (MULT, V4HImode, operands);")
2493
2494 (define_insn "*mmx_pmulhrwv4hi3"
2495 [(set (match_operand:V4HI 0 "register_operand" "=y")
2496 (truncate:V4HI
2497 (lshiftrt:V4SI
2498 (plus:V4SI
2499 (mult:V4SI
2500 (sign_extend:V4SI
2501 (match_operand:V4HI 1 "nonimmediate_operand" "%0"))
2502 (sign_extend:V4SI
2503 (match_operand:V4HI 2 "nonimmediate_operand" "ym")))
2504 (const_vector:V4SI [(const_int 32768) (const_int 32768)
2505 (const_int 32768) (const_int 32768)]))
2506 (const_int 16))))]
2507 "TARGET_3DNOW && ix86_binary_operator_ok (MULT, V4HImode, operands)"
2508 "pmulhrw\t{%2, %0|%0, %2}"
2509 [(set_attr "type" "mmxmul")
2510 (set_attr "prefix_extra" "1")
2511 (set_attr "mode" "DI")])
2512
2513 (define_expand "sse2_umulv1siv1di3"
2514 [(set (match_operand:V1DI 0 "register_operand")
2515 (mult:V1DI
2516 (zero_extend:V1DI
2517 (vec_select:V1SI
2518 (match_operand:V2SI 1 "register_mmxmem_operand")
2519 (parallel [(const_int 0)])))
2520 (zero_extend:V1DI
2521 (vec_select:V1SI
2522 (match_operand:V2SI 2 "register_mmxmem_operand")
2523 (parallel [(const_int 0)])))))]
2524 "(TARGET_MMX || TARGET_MMX_WITH_SSE) && TARGET_SSE2"
2525 "ix86_fixup_binary_operands_no_copy (MULT, V2SImode, operands);")
2526
2527 (define_insn "*sse2_umulv1siv1di3"
2528 [(set (match_operand:V1DI 0 "register_operand" "=y,x,Yv")
2529 (mult:V1DI
2530 (zero_extend:V1DI
2531 (vec_select:V1SI
2532 (match_operand:V2SI 1 "register_mmxmem_operand" "%0,0,Yv")
2533 (parallel [(const_int 0)])))
2534 (zero_extend:V1DI
2535 (vec_select:V1SI
2536 (match_operand:V2SI 2 "register_mmxmem_operand" "ym,x,Yv")
2537 (parallel [(const_int 0)])))))]
2538 "(TARGET_MMX || TARGET_MMX_WITH_SSE)
2539 && TARGET_SSE2
2540 && ix86_binary_operator_ok (MULT, V2SImode, operands)"
2541 "@
2542 pmuludq\t{%2, %0|%0, %2}
2543 pmuludq\t{%2, %0|%0, %2}
2544 vpmuludq\t{%2, %1, %0|%0, %1, %2}"
2545 [(set_attr "isa" "*,sse2_noavx,avx")
2546 (set_attr "mmx_isa" "native,*,*")
2547 (set_attr "type" "mmxmul,ssemul,ssemul")
2548 (set_attr "mode" "DI,TI,TI")])
2549
2550 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2551 ;;
2552 ;; Parallel integral shifts
2553 ;;
2554 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2555
2556 (define_insn "<code><mode>3"
2557 [(set (match_operand:MMXMODE14 0 "register_operand" "=Yr,*x,Yv")
2558 (smaxmin:MMXMODE14
2559 (match_operand:MMXMODE14 1 "register_operand" "%0,0,Yv")
2560 (match_operand:MMXMODE14 2 "register_operand" "Yr,*x,Yv")))]
2561 "TARGET_SSE4_1 && TARGET_MMX_WITH_SSE"
2562 "@
2563 p<maxmin_int><mmxvecsize>\t{%2, %0|%0, %2}
2564 p<maxmin_int><mmxvecsize>\t{%2, %0|%0, %2}
2565 vp<maxmin_int><mmxvecsize>\t{%2, %1, %0|%0, %1, %2}"
2566 [(set_attr "isa" "noavx,noavx,avx")
2567 (set_attr "type" "sseiadd")
2568 (set_attr "prefix_extra" "1")
2569 (set_attr "prefix" "orig,orig,vex")
2570 (set_attr "mode" "TI")])
2571
2572 (define_expand "mmx_<code>v4hi3"
2573 [(set (match_operand:V4HI 0 "register_operand")
2574 (smaxmin:V4HI
2575 (match_operand:V4HI 1 "register_mmxmem_operand")
2576 (match_operand:V4HI 2 "register_mmxmem_operand")))]
2577 "(TARGET_MMX || TARGET_MMX_WITH_SSE)
2578 && (TARGET_SSE || TARGET_3DNOW_A)"
2579 "ix86_fixup_binary_operands_no_copy (<CODE>, V4HImode, operands);")
2580
2581 (define_insn "*mmx_<code>v4hi3"
2582 [(set (match_operand:V4HI 0 "register_operand" "=y,x,Yw")
2583 (smaxmin:V4HI
2584 (match_operand:V4HI 1 "register_mmxmem_operand" "%0,0,Yw")
2585 (match_operand:V4HI 2 "register_mmxmem_operand" "ym,x,Yw")))]
2586 "(TARGET_MMX || TARGET_MMX_WITH_SSE)
2587 && (TARGET_SSE || TARGET_3DNOW_A)
2588 && ix86_binary_operator_ok (<CODE>, V4HImode, operands)"
2589 "@
2590 p<maxmin_int>w\t{%2, %0|%0, %2}
2591 p<maxmin_int>w\t{%2, %0|%0, %2}
2592 vp<maxmin_int>w\t{%2, %1, %0|%0, %1, %2}"
2593 [(set_attr "isa" "*,sse2_noavx,avx")
2594 (set_attr "mmx_isa" "native,*,*")
2595 (set_attr "type" "mmxadd,sseiadd,sseiadd")
2596 (set_attr "mode" "DI,TI,TI")])
2597
2598 (define_expand "<code>v4hi3"
2599 [(set (match_operand:V4HI 0 "register_operand")
2600 (smaxmin:V4HI
2601 (match_operand:V4HI 1 "register_operand")
2602 (match_operand:V4HI 2 "register_operand")))]
2603 "TARGET_MMX_WITH_SSE")
2604
2605 (define_insn "<code><mode>3"
2606 [(set (match_operand:VI1_16_32 0 "register_operand" "=Yr,*x,Yv")
2607 (smaxmin:VI1_16_32
2608 (match_operand:VI1_16_32 1 "register_operand" "%0,0,Yv")
2609 (match_operand:VI1_16_32 2 "register_operand" "Yr,*x,Yv")))]
2610 "TARGET_SSE4_1"
2611 "@
2612 p<maxmin_int>b\t{%2, %0|%0, %2}
2613 p<maxmin_int>b\t{%2, %0|%0, %2}
2614 vp<maxmin_int>b\t{%2, %1, %0|%0, %1, %2}"
2615 [(set_attr "isa" "noavx,noavx,avx")
2616 (set_attr "type" "sseiadd")
2617 (set_attr "prefix_extra" "1")
2618 (set_attr "prefix" "orig,orig,vex")
2619 (set_attr "mode" "TI")])
2620
2621 (define_insn "<code>v2hi3"
2622 [(set (match_operand:V2HI 0 "register_operand" "=x,Yw")
2623 (smaxmin:V2HI
2624 (match_operand:V2HI 1 "register_operand" "%0,Yw")
2625 (match_operand:V2HI 2 "register_operand" "x,Yw")))]
2626 "TARGET_SSE2"
2627 "@
2628 p<maxmin_int>w\t{%2, %0|%0, %2}
2629 vp<maxmin_int>w\t{%2, %1, %0|%0, %1, %2}"
2630 [(set_attr "isa" "noavx,avx")
2631 (set_attr "type" "sseiadd")
2632 (set_attr "mode" "TI")])
2633
2634 (define_insn "<code><mode>3"
2635 [(set (match_operand:MMXMODE24 0 "register_operand" "=Yr,*x,Yv")
2636 (umaxmin:MMXMODE24
2637 (match_operand:MMXMODE24 1 "register_operand" "%0,0,Yv")
2638 (match_operand:MMXMODE24 2 "register_operand" "Yr,*x,Yv")))]
2639 "TARGET_SSE4_1 && TARGET_MMX_WITH_SSE"
2640 "@
2641 p<maxmin_int><mmxvecsize>\t{%2, %0|%0, %2}
2642 p<maxmin_int><mmxvecsize>\t{%2, %0|%0, %2}
2643 vp<maxmin_int><mmxvecsize>\t{%2, %1, %0|%0, %1, %2}"
2644 [(set_attr "isa" "noavx,noavx,avx")
2645 (set_attr "type" "sseiadd")
2646 (set_attr "prefix_extra" "1")
2647 (set_attr "prefix" "orig,orig,vex")
2648 (set_attr "mode" "TI")])
2649
2650 (define_expand "mmx_<code>v8qi3"
2651 [(set (match_operand:V8QI 0 "register_operand")
2652 (umaxmin:V8QI
2653 (match_operand:V8QI 1 "register_mmxmem_operand")
2654 (match_operand:V8QI 2 "register_mmxmem_operand")))]
2655 "(TARGET_MMX || TARGET_MMX_WITH_SSE)
2656 && (TARGET_SSE || TARGET_3DNOW_A)"
2657 "ix86_fixup_binary_operands_no_copy (<CODE>, V8QImode, operands);")
2658
2659 (define_insn "*mmx_<code>v8qi3"
2660 [(set (match_operand:V8QI 0 "register_operand" "=y,x,Yw")
2661 (umaxmin:V8QI
2662 (match_operand:V8QI 1 "register_mmxmem_operand" "%0,0,Yw")
2663 (match_operand:V8QI 2 "register_mmxmem_operand" "ym,x,Yw")))]
2664 "(TARGET_MMX || TARGET_MMX_WITH_SSE)
2665 && (TARGET_SSE || TARGET_3DNOW_A)
2666 && ix86_binary_operator_ok (<CODE>, V8QImode, operands)"
2667 "@
2668 p<maxmin_int>b\t{%2, %0|%0, %2}
2669 p<maxmin_int>b\t{%2, %0|%0, %2}
2670 vp<maxmin_int>b\t{%2, %1, %0|%0, %1, %2}"
2671 [(set_attr "isa" "*,sse2_noavx,avx")
2672 (set_attr "mmx_isa" "native,*,*")
2673 (set_attr "type" "mmxadd,sseiadd,sseiadd")
2674 (set_attr "mode" "DI,TI,TI")])
2675
2676 (define_expand "<code>v8qi3"
2677 [(set (match_operand:V8QI 0 "register_operand")
2678 (umaxmin:V8QI
2679 (match_operand:V8QI 1 "register_operand")
2680 (match_operand:V8QI 2 "register_operand")))]
2681 "TARGET_MMX_WITH_SSE")
2682
2683 (define_insn "<code><mode>3"
2684 [(set (match_operand:VI1_16_32 0 "register_operand" "=x,Yw")
2685 (umaxmin:VI1_16_32
2686 (match_operand:VI1_16_32 1 "register_operand" "%0,Yw")
2687 (match_operand:VI1_16_32 2 "register_operand" "x,Yw")))]
2688 "TARGET_SSE2"
2689 "@
2690 p<maxmin_int>b\t{%2, %0|%0, %2}
2691 vp<maxmin_int>b\t{%2, %1, %0|%0, %1, %2}"
2692 [(set_attr "isa" "noavx,avx")
2693 (set_attr "type" "sseiadd")
2694 (set_attr "mode" "TI")])
2695
2696 (define_insn "<code>v2hi3"
2697 [(set (match_operand:V2HI 0 "register_operand" "=Yr,*x,Yv")
2698 (umaxmin:V2HI
2699 (match_operand:V2HI 1 "register_operand" "%0,0,Yv")
2700 (match_operand:V2HI 2 "register_operand" "Yr,*x,Yv")))]
2701 "TARGET_SSE4_1"
2702 "@
2703 p<maxmin_int>w\t{%2, %0|%0, %2}
2704 p<maxmin_int>w\t{%2, %0|%0, %2}
2705 vp<maxmin_int>w\t{%2, %1, %0|%0, %1, %2}"
2706 [(set_attr "isa" "noavx,noavx,avx")
2707 (set_attr "type" "sseiadd")
2708 (set_attr "prefix_extra" "1")
2709 (set_attr "prefix" "orig,orig,vex")
2710 (set_attr "mode" "TI")])
2711
2712 (define_insn "ssse3_abs<mode>2"
2713 [(set (match_operand:MMXMODEI 0 "register_operand" "=y,Yv")
2714 (abs:MMXMODEI
2715 (match_operand:MMXMODEI 1 "register_mmxmem_operand" "ym,Yv")))]
2716 "(TARGET_MMX || TARGET_MMX_WITH_SSE) && TARGET_SSSE3"
2717 "@
2718 pabs<mmxvecsize>\t{%1, %0|%0, %1}
2719 %vpabs<mmxvecsize>\t{%1, %0|%0, %1}"
2720 [(set_attr "mmx_isa" "native,*")
2721 (set_attr "type" "sselog1")
2722 (set_attr "prefix_rep" "0")
2723 (set_attr "prefix_extra" "1")
2724 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
2725 (set_attr "mode" "DI,TI")])
2726
2727 (define_expand "abs<mode>2"
2728 [(set (match_operand:MMXMODEI 0 "register_operand")
2729 (abs:MMXMODEI
2730 (match_operand:MMXMODEI 1 "register_operand")))]
2731 "TARGET_SSSE3 && TARGET_MMX_WITH_SSE")
2732
2733 (define_insn "abs<mode>2"
2734 [(set (match_operand:VI_16_32 0 "register_operand" "=Yv")
2735 (abs:VI_16_32
2736 (match_operand:VI_16_32 1 "register_operand" "Yv")))]
2737 "TARGET_SSSE3"
2738 "%vpabs<mmxvecsize>\t{%1, %0|%0, %1}"
2739 [(set_attr "type" "sselog1")
2740 (set_attr "prefix_rep" "0")
2741 (set_attr "prefix_extra" "1")
2742 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
2743 (set_attr "mode" "TI")])
2744
2745 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2746 ;;
2747 ;; Parallel integral shifts
2748 ;;
2749 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2750
2751 (define_insn "mmx_ashr<mode>3"
2752 [(set (match_operand:MMXMODE24 0 "register_operand" "=y,x,<Yv_Yw>")
2753 (ashiftrt:MMXMODE24
2754 (match_operand:MMXMODE24 1 "register_operand" "0,0,<Yv_Yw>")
2755 (match_operand:DI 2 "nonmemory_operand" "yN,xN,<Yv_Yw>N")))]
2756 "TARGET_MMX || TARGET_MMX_WITH_SSE"
2757 "@
2758 psra<mmxvecsize>\t{%2, %0|%0, %2}
2759 psra<mmxvecsize>\t{%2, %0|%0, %2}
2760 vpsra<mmxvecsize>\t{%2, %1, %0|%0, %1, %2}"
2761 [(set_attr "isa" "*,sse2_noavx,avx")
2762 (set_attr "mmx_isa" "native,*,*")
2763 (set_attr "type" "mmxshft,sseishft,sseishft")
2764 (set (attr "length_immediate")
2765 (if_then_else (match_operand 2 "const_int_operand")
2766 (const_string "1")
2767 (const_string "0")))
2768 (set_attr "mode" "DI,TI,TI")])
2769
2770 (define_expand "ashr<mode>3"
2771 [(set (match_operand:MMXMODE24 0 "register_operand")
2772 (ashiftrt:MMXMODE24
2773 (match_operand:MMXMODE24 1 "register_operand")
2774 (match_operand:DI 2 "nonmemory_operand")))]
2775 "TARGET_MMX_WITH_SSE")
2776
2777 (define_insn "mmx_<insn><mode>3"
2778 [(set (match_operand:MMXMODE248 0 "register_operand" "=y,x,<Yv_Yw>")
2779 (any_lshift:MMXMODE248
2780 (match_operand:MMXMODE248 1 "register_operand" "0,0,<Yv_Yw>")
2781 (match_operand:DI 2 "nonmemory_operand" "yN,xN,<Yv_Yw>N")))]
2782 "TARGET_MMX || TARGET_MMX_WITH_SSE"
2783 "@
2784 p<vshift><mmxvecsize>\t{%2, %0|%0, %2}
2785 p<vshift><mmxvecsize>\t{%2, %0|%0, %2}
2786 vp<vshift><mmxvecsize>\t{%2, %1, %0|%0, %1, %2}"
2787 [(set_attr "isa" "*,sse2_noavx,avx")
2788 (set_attr "mmx_isa" "native,*,*")
2789 (set_attr "type" "mmxshft,sseishft,sseishft")
2790 (set (attr "length_immediate")
2791 (if_then_else (match_operand 2 "const_int_operand")
2792 (const_string "1")
2793 (const_string "0")))
2794 (set_attr "mode" "DI,TI,TI")])
2795
2796 (define_expand "<insn><mode>3"
2797 [(set (match_operand:MMXMODE24 0 "register_operand")
2798 (any_lshift:MMXMODE24
2799 (match_operand:MMXMODE24 1 "register_operand")
2800 (match_operand:DI 2 "nonmemory_operand")))]
2801 "TARGET_MMX_WITH_SSE")
2802
2803 (define_insn "mmx_<insn>v1si3"
2804 [(set (match_operand:V1SI 0 "register_operand" "=x,Yw")
2805 (any_lshift:V1SI
2806 (match_operand:V1SI 1 "register_operand" "0,Yw")
2807 (match_operand:DI 2 "nonmemory_operand" "xN,YwN")))]
2808 "TARGET_SSE2"
2809 "@
2810 p<vshift>d\t{%2, %0|%0, %2}
2811 vp<vshift>d\t{%2, %1, %0|%0, %1, %2}"
2812 [(set_attr "isa" "noavx,avx")
2813 (set_attr "type" "sseishft")
2814 (set (attr "length_immediate")
2815 (if_then_else (match_operand 2 "const_int_operand")
2816 (const_string "1")
2817 (const_string "0")))
2818 (set_attr "mode" "TI")])
2819
2820 (define_insn "<insn>v2hi3"
2821 [(set (match_operand:V2HI 0 "register_operand" "=x,Yw")
2822 (any_shift:V2HI
2823 (match_operand:V2HI 1 "register_operand" "0,Yw")
2824 (match_operand:DI 2 "nonmemory_operand" "xN,YwN")))]
2825 "TARGET_SSE2"
2826 "@
2827 p<vshift>w\t{%2, %0|%0, %2}
2828 vp<vshift>w\t{%2, %1, %0|%0, %1, %2}"
2829 [(set_attr "isa" "noavx,avx")
2830 (set_attr "type" "sseishft")
2831 (set (attr "length_immediate")
2832 (if_then_else (match_operand 2 "const_int_operand")
2833 (const_string "1")
2834 (const_string "0")))
2835 (set_attr "mode" "TI")])
2836
2837 (define_expand "<insn>v8qi3"
2838 [(set (match_operand:V8QI 0 "register_operand")
2839 (any_shift:V8QI (match_operand:V8QI 1 "register_operand")
2840 (match_operand:DI 2 "nonmemory_operand")))]
2841 "TARGET_MMX_WITH_SSE"
2842 {
2843 ix86_expand_vecop_qihi_partial (<CODE>, operands[0],
2844 operands[1], operands[2]);
2845 DONE;
2846 })
2847
2848 (define_expand "<insn>v4qi3"
2849 [(set (match_operand:V4QI 0 "register_operand")
2850 (any_shift:V4QI (match_operand:V4QI 1 "register_operand")
2851 (match_operand:DI 2 "nonmemory_operand")))]
2852 "TARGET_SSE2"
2853 {
2854 ix86_expand_vecop_qihi_partial (<CODE>, operands[0],
2855 operands[1], operands[2]);
2856 DONE;
2857 })
2858
2859 (define_insn_and_split "<insn>v2qi3"
2860 [(set (match_operand:V2QI 0 "register_operand" "=Q")
2861 (any_shift:V2QI
2862 (match_operand:V2QI 1 "register_operand" "0")
2863 (match_operand:QI 2 "nonmemory_operand" "cI")))
2864 (clobber (reg:CC FLAGS_REG))]
2865 "!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)"
2866 "#"
2867 "&& reload_completed"
2868 [(parallel
2869 [(set (zero_extract:HI (match_dup 3) (const_int 8) (const_int 8))
2870 (subreg:HI
2871 (any_shift:QI
2872 (subreg:QI
2873 (zero_extract:HI (match_dup 4)
2874 (const_int 8)
2875 (const_int 8)) 0)
2876 (match_dup 2)) 0))
2877 (clobber (reg:CC FLAGS_REG))])
2878 (parallel
2879 [(set (strict_low_part (match_dup 0))
2880 (any_shift:QI (match_dup 1) (match_dup 2)))
2881 (clobber (reg:CC FLAGS_REG))])]
2882 {
2883 operands[4] = lowpart_subreg (HImode, operands[1], V2QImode);
2884 operands[3] = lowpart_subreg (HImode, operands[0], V2QImode);
2885 operands[1] = lowpart_subreg (QImode, operands[1], V2QImode);
2886 operands[0] = lowpart_subreg (QImode, operands[0], V2QImode);
2887 }
2888 [(set_attr "type" "multi")
2889 (set_attr "mode" "QI")])
2890
2891 (define_expand "v<insn>v8qi3"
2892 [(set (match_operand:V8QI 0 "register_operand")
2893 (any_shift:V8QI
2894 (match_operand:V8QI 1 "register_operand")
2895 (match_operand:V8QI 2 "register_operand")))]
2896 "TARGET_AVX512BW && TARGET_AVX512VL && TARGET_MMX_WITH_SSE"
2897 {
2898 ix86_expand_vecop_qihi_partial (<CODE>, operands[0],
2899 operands[1], operands[2]);
2900 DONE;
2901 })
2902
2903 (define_expand "v<insn>v4qi3"
2904 [(set (match_operand:V4QI 0 "register_operand")
2905 (any_shift:V4QI
2906 (match_operand:V4QI 1 "register_operand")
2907 (match_operand:V4QI 2 "register_operand")))]
2908 "TARGET_AVX512BW && TARGET_AVX512VL"
2909 {
2910 ix86_expand_vecop_qihi_partial (<CODE>, operands[0],
2911 operands[1], operands[2]);
2912 DONE;
2913 })
2914
2915 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2916 ;;
2917 ;; Parallel integral comparisons
2918 ;;
2919 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2920
2921 (define_expand "mmx_eq<mode>3"
2922 [(set (match_operand:MMXMODEI 0 "register_operand")
2923 (eq:MMXMODEI
2924 (match_operand:MMXMODEI 1 "register_mmxmem_operand")
2925 (match_operand:MMXMODEI 2 "register_mmxmem_operand")))]
2926 "TARGET_MMX || TARGET_MMX_WITH_SSE"
2927 "ix86_fixup_binary_operands_no_copy (EQ, <MODE>mode, operands);")
2928
2929 (define_insn "*mmx_eq<mode>3"
2930 [(set (match_operand:MMXMODEI 0 "register_operand" "=y,x,x")
2931 (eq:MMXMODEI
2932 (match_operand:MMXMODEI 1 "register_mmxmem_operand" "%0,0,x")
2933 (match_operand:MMXMODEI 2 "register_mmxmem_operand" "ym,x,x")))]
2934 "(TARGET_MMX || TARGET_MMX_WITH_SSE)
2935 && ix86_binary_operator_ok (EQ, <MODE>mode, operands)"
2936 "@
2937 pcmpeq<mmxvecsize>\t{%2, %0|%0, %2}
2938 pcmpeq<mmxvecsize>\t{%2, %0|%0, %2}
2939 vpcmpeq<mmxvecsize>\t{%2, %1, %0|%0, %1, %2}"
2940 [(set_attr "isa" "*,sse2_noavx,avx")
2941 (set_attr "mmx_isa" "native,*,*")
2942 (set_attr "type" "mmxcmp,ssecmp,ssecmp")
2943 (set_attr "mode" "DI,TI,TI")])
2944
2945 (define_insn "*eq<mode>3"
2946 [(set (match_operand:VI_16_32 0 "register_operand" "=x,x")
2947 (eq:VI_16_32
2948 (match_operand:VI_16_32 1 "register_operand" "%0,x")
2949 (match_operand:VI_16_32 2 "register_operand" "x,x")))]
2950 "TARGET_SSE2"
2951 "@
2952 pcmpeq<mmxvecsize>\t{%2, %0|%0, %2}
2953 vpcmpeq<mmxvecsize>\t{%2, %1, %0|%0, %1, %2}"
2954 [(set_attr "isa" "noavx,avx")
2955 (set_attr "type" "ssecmp")
2956 (set_attr "mode" "TI")])
2957
2958 (define_insn "mmx_gt<mode>3"
2959 [(set (match_operand:MMXMODEI 0 "register_operand" "=y,x,x")
2960 (gt:MMXMODEI
2961 (match_operand:MMXMODEI 1 "register_operand" "0,0,x")
2962 (match_operand:MMXMODEI 2 "register_mmxmem_operand" "ym,x,x")))]
2963 "TARGET_MMX || TARGET_MMX_WITH_SSE"
2964 "@
2965 pcmpgt<mmxvecsize>\t{%2, %0|%0, %2}
2966 pcmpgt<mmxvecsize>\t{%2, %0|%0, %2}
2967 vpcmpgt<mmxvecsize>\t{%2, %1, %0|%0, %1, %2}"
2968 [(set_attr "isa" "*,sse2_noavx,avx")
2969 (set_attr "mmx_isa" "native,*,*")
2970 (set_attr "type" "mmxcmp,ssecmp,ssecmp")
2971 (set_attr "mode" "DI,TI,TI")])
2972
2973 (define_insn "*gt<mode>3"
2974 [(set (match_operand:VI_16_32 0 "register_operand" "=x,x")
2975 (gt:VI_16_32
2976 (match_operand:VI_16_32 1 "register_operand" "0,x")
2977 (match_operand:VI_16_32 2 "register_operand" "x,x")))]
2978 "TARGET_SSE2"
2979 "@
2980 pcmpgt<mmxvecsize>\t{%2, %0|%0, %2}
2981 vpcmpgt<mmxvecsize>\t{%2, %1, %0|%0, %1, %2}"
2982 [(set_attr "isa" "noavx,avx")
2983 (set_attr "type" "ssecmp")
2984 (set_attr "mode" "TI")])
2985
2986 (define_insn "*xop_maskcmp<mode>3"
2987 [(set (match_operand:MMXMODEI 0 "register_operand" "=x")
2988 (match_operator:MMXMODEI 1 "ix86_comparison_int_operator"
2989 [(match_operand:MMXMODEI 2 "register_operand" "x")
2990 (match_operand:MMXMODEI 3 "register_operand" "x")]))]
2991 "TARGET_XOP"
2992 "vpcom%Y1<mmxvecsize>\t{%3, %2, %0|%0, %2, %3}"
2993 [(set_attr "type" "sse4arg")
2994 (set_attr "mode" "TI")])
2995
2996 (define_insn "*xop_maskcmp<mode>3"
2997 [(set (match_operand:VI_16_32 0 "register_operand" "=x")
2998 (match_operator:VI_16_32 1 "ix86_comparison_int_operator"
2999 [(match_operand:VI_16_32 2 "register_operand" "x")
3000 (match_operand:VI_16_32 3 "register_operand" "x")]))]
3001 "TARGET_XOP"
3002 "vpcom%Y1<mmxvecsize>\t{%3, %2, %0|%0, %2, %3}"
3003 [(set_attr "type" "sse4arg")
3004 (set_attr "mode" "TI")])
3005
3006 (define_insn "*xop_maskcmp_uns<mode>3"
3007 [(set (match_operand:MMXMODEI 0 "register_operand" "=x")
3008 (match_operator:MMXMODEI 1 "ix86_comparison_uns_operator"
3009 [(match_operand:MMXMODEI 2 "register_operand" "x")
3010 (match_operand:MMXMODEI 3 "register_operand" "x")]))]
3011 "TARGET_XOP"
3012 "vpcom%Y1u<mmxvecsize>\t{%3, %2, %0|%0, %2, %3}"
3013 [(set_attr "type" "sse4arg")
3014 (set_attr "mode" "TI")])
3015
3016 (define_insn "*xop_maskcmp_uns<mode>3"
3017 [(set (match_operand:VI_16_32 0 "register_operand" "=x")
3018 (match_operator:VI_16_32 1 "ix86_comparison_uns_operator"
3019 [(match_operand:VI_16_32 2 "register_operand" "x")
3020 (match_operand:VI_16_32 3 "register_operand" "x")]))]
3021 "TARGET_XOP"
3022 "vpcom%Y1u<mmxvecsize>\t{%3, %2, %0|%0, %2, %3}"
3023 [(set_attr "type" "sse4arg")
3024 (set_attr "mode" "TI")])
3025
3026 (define_expand "vec_cmp<mode><mode>"
3027 [(set (match_operand:MMXMODEI 0 "register_operand")
3028 (match_operator:MMXMODEI 1 ""
3029 [(match_operand:MMXMODEI 2 "register_operand")
3030 (match_operand:MMXMODEI 3 "register_operand")]))]
3031 "TARGET_MMX_WITH_SSE"
3032 {
3033 bool ok = ix86_expand_int_vec_cmp (operands);
3034 gcc_assert (ok);
3035 DONE;
3036 })
3037
3038 (define_expand "vec_cmp<mode><mode>"
3039 [(set (match_operand:VI_16_32 0 "register_operand")
3040 (match_operator:VI_16_32 1 ""
3041 [(match_operand:VI_16_32 2 "register_operand")
3042 (match_operand:VI_16_32 3 "register_operand")]))]
3043 "TARGET_SSE2"
3044 {
3045 bool ok = ix86_expand_int_vec_cmp (operands);
3046 gcc_assert (ok);
3047 DONE;
3048 })
3049
3050 (define_expand "vec_cmpu<mode><mode>"
3051 [(set (match_operand:MMXMODEI 0 "register_operand")
3052 (match_operator:MMXMODEI 1 ""
3053 [(match_operand:MMXMODEI 2 "register_operand")
3054 (match_operand:MMXMODEI 3 "register_operand")]))]
3055 "TARGET_MMX_WITH_SSE"
3056 {
3057 bool ok = ix86_expand_int_vec_cmp (operands);
3058 gcc_assert (ok);
3059 DONE;
3060 })
3061
3062 (define_expand "vec_cmpu<mode><mode>"
3063 [(set (match_operand:VI_16_32 0 "register_operand")
3064 (match_operator:VI_16_32 1 ""
3065 [(match_operand:VI_16_32 2 "register_operand")
3066 (match_operand:VI_16_32 3 "register_operand")]))]
3067 "TARGET_SSE2"
3068 {
3069 bool ok = ix86_expand_int_vec_cmp (operands);
3070 gcc_assert (ok);
3071 DONE;
3072 })
3073
3074 (define_expand "vcond<MMXMODE124:mode><MMXMODEI:mode>"
3075 [(set (match_operand:MMXMODE124 0 "register_operand")
3076 (if_then_else:MMXMODE124
3077 (match_operator 3 ""
3078 [(match_operand:MMXMODEI 4 "register_operand")
3079 (match_operand:MMXMODEI 5 "register_operand")])
3080 (match_operand:MMXMODE124 1)
3081 (match_operand:MMXMODE124 2)))]
3082 "TARGET_MMX_WITH_SSE
3083 && (GET_MODE_NUNITS (<MMXMODE124:MODE>mode)
3084 == GET_MODE_NUNITS (<MMXMODEI:MODE>mode))"
3085 {
3086 bool ok = ix86_expand_int_vcond (operands);
3087 gcc_assert (ok);
3088 DONE;
3089 })
3090
3091 (define_expand "vcond<mode><mode>"
3092 [(set (match_operand:VI_16_32 0 "register_operand")
3093 (if_then_else:VI_16_32
3094 (match_operator 3 ""
3095 [(match_operand:VI_16_32 4 "register_operand")
3096 (match_operand:VI_16_32 5 "register_operand")])
3097 (match_operand:VI_16_32 1)
3098 (match_operand:VI_16_32 2)))]
3099 "TARGET_SSE2"
3100 {
3101 bool ok = ix86_expand_int_vcond (operands);
3102 gcc_assert (ok);
3103 DONE;
3104 })
3105
3106 (define_expand "vcondu<MMXMODE124:mode><MMXMODEI:mode>"
3107 [(set (match_operand:MMXMODE124 0 "register_operand")
3108 (if_then_else:MMXMODE124
3109 (match_operator 3 ""
3110 [(match_operand:MMXMODEI 4 "register_operand")
3111 (match_operand:MMXMODEI 5 "register_operand")])
3112 (match_operand:MMXMODE124 1)
3113 (match_operand:MMXMODE124 2)))]
3114 "TARGET_MMX_WITH_SSE
3115 && (GET_MODE_NUNITS (<MMXMODE124:MODE>mode)
3116 == GET_MODE_NUNITS (<MMXMODEI:MODE>mode))"
3117 {
3118 bool ok = ix86_expand_int_vcond (operands);
3119 gcc_assert (ok);
3120 DONE;
3121 })
3122
3123 (define_expand "vcondu<mode><mode>"
3124 [(set (match_operand:VI_16_32 0 "register_operand")
3125 (if_then_else:VI_16_32
3126 (match_operator 3 ""
3127 [(match_operand:VI_16_32 4 "register_operand")
3128 (match_operand:VI_16_32 5 "register_operand")])
3129 (match_operand:VI_16_32 1)
3130 (match_operand:VI_16_32 2)))]
3131 "TARGET_SSE2"
3132 {
3133 bool ok = ix86_expand_int_vcond (operands);
3134 gcc_assert (ok);
3135 DONE;
3136 })
3137
3138 (define_expand "vcond_mask_<mode><mmxintvecmodelower>"
3139 [(set (match_operand:MMXMODE124 0 "register_operand")
3140 (vec_merge:MMXMODE124
3141 (match_operand:MMXMODE124 1 "register_operand")
3142 (match_operand:MMXMODE124 2 "register_operand")
3143 (match_operand:<mmxintvecmode> 3 "register_operand")))]
3144 "TARGET_MMX_WITH_SSE"
3145 {
3146 ix86_expand_sse_movcc (operands[0], operands[3],
3147 operands[1], operands[2]);
3148 DONE;
3149 })
3150
3151 (define_expand "vcond_mask_<mode><mode>"
3152 [(set (match_operand:VI_16_32 0 "register_operand")
3153 (vec_merge:VI_16_32
3154 (match_operand:VI_16_32 1 "register_operand")
3155 (match_operand:VI_16_32 2 "register_operand")
3156 (match_operand:VI_16_32 3 "register_operand")))]
3157 "TARGET_SSE2"
3158 {
3159 ix86_expand_sse_movcc (operands[0], operands[3],
3160 operands[1], operands[2]);
3161 DONE;
3162 })
3163
3164 (define_insn "mmx_pblendvb_v8qi"
3165 [(set (match_operand:V8QI 0 "register_operand" "=Yr,*x,x")
3166 (unspec:V8QI
3167 [(match_operand:V8QI 1 "register_operand" "0,0,x")
3168 (match_operand:V8QI 2 "register_operand" "Yr,*x,x")
3169 (match_operand:V8QI 3 "register_operand" "Yz,Yz,x")]
3170 UNSPEC_BLENDV))]
3171 "TARGET_SSE4_1 && TARGET_MMX_WITH_SSE"
3172 "@
3173 pblendvb\t{%3, %2, %0|%0, %2, %3}
3174 pblendvb\t{%3, %2, %0|%0, %2, %3}
3175 vpblendvb\t{%3, %2, %1, %0|%0, %1, %2, %3}"
3176 [(set_attr "isa" "noavx,noavx,avx")
3177 (set_attr "type" "ssemov")
3178 (set_attr "prefix_extra" "1")
3179 (set_attr "length_immediate" "1")
3180 (set_attr "prefix" "orig,orig,vex")
3181 (set_attr "btver2_decode" "vector")
3182 (set_attr "mode" "TI")])
3183
3184 (define_insn "mmx_pblendvb_<mode>"
3185 [(set (match_operand:VI_16_32 0 "register_operand" "=Yr,*x,x")
3186 (unspec:VI_16_32
3187 [(match_operand:VI_16_32 1 "register_operand" "0,0,x")
3188 (match_operand:VI_16_32 2 "register_operand" "Yr,*x,x")
3189 (match_operand:VI_16_32 3 "register_operand" "Yz,Yz,x")]
3190 UNSPEC_BLENDV))]
3191 "TARGET_SSE4_1"
3192 "@
3193 pblendvb\t{%3, %2, %0|%0, %2, %3}
3194 pblendvb\t{%3, %2, %0|%0, %2, %3}
3195 vpblendvb\t{%3, %2, %1, %0|%0, %1, %2, %3}"
3196 [(set_attr "isa" "noavx,noavx,avx")
3197 (set_attr "type" "ssemov")
3198 (set_attr "prefix_extra" "1")
3199 (set_attr "length_immediate" "1")
3200 (set_attr "prefix" "orig,orig,vex")
3201 (set_attr "btver2_decode" "vector")
3202 (set_attr "mode" "TI")])
3203
3204 ;; XOP parallel XMM conditional moves
3205 (define_insn "*xop_pcmov_<mode>"
3206 [(set (match_operand:MMXMODE124 0 "register_operand" "=x")
3207 (if_then_else:MMXMODE124
3208 (match_operand:MMXMODE124 3 "register_operand" "x")
3209 (match_operand:MMXMODE124 1 "register_operand" "x")
3210 (match_operand:MMXMODE124 2 "register_operand" "x")))]
3211 "TARGET_XOP && TARGET_MMX_WITH_SSE"
3212 "vpcmov\t{%3, %2, %1, %0|%0, %1, %2, %3}"
3213 [(set_attr "type" "sse4arg")
3214 (set_attr "mode" "TI")])
3215
3216 (define_insn "*xop_pcmov_<mode>"
3217 [(set (match_operand:VI_16_32 0 "register_operand" "=x")
3218 (if_then_else:VI_16_32
3219 (match_operand:VI_16_32 3 "register_operand" "x")
3220 (match_operand:VI_16_32 1 "register_operand" "x")
3221 (match_operand:VI_16_32 2 "register_operand" "x")))]
3222 "TARGET_XOP"
3223 "vpcmov\t{%3, %2, %1, %0|%0, %1, %2, %3}"
3224 [(set_attr "type" "sse4arg")
3225 (set_attr "mode" "TI")])
3226
3227 ;; XOP permute instructions
3228 (define_insn "mmx_ppermv64"
3229 [(set (match_operand:V8QI 0 "register_operand" "=x")
3230 (unspec:V8QI
3231 [(match_operand:V8QI 1 "register_operand" "x")
3232 (match_operand:V8QI 2 "register_operand" "x")
3233 (match_operand:V16QI 3 "nonimmediate_operand" "xm")]
3234 UNSPEC_XOP_PERMUTE))]
3235 "TARGET_XOP && TARGET_MMX_WITH_SSE"
3236 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
3237 [(set_attr "type" "sse4arg")
3238 (set_attr "mode" "TI")])
3239
3240 (define_insn "mmx_ppermv32"
3241 [(set (match_operand:V4QI 0 "register_operand" "=x")
3242 (unspec:V4QI
3243 [(match_operand:V4QI 1 "register_operand" "x")
3244 (match_operand:V4QI 2 "register_operand" "x")
3245 (match_operand:V16QI 3 "nonimmediate_operand" "xm")]
3246 UNSPEC_XOP_PERMUTE))]
3247 "TARGET_XOP"
3248 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
3249 [(set_attr "type" "sse4arg")
3250 (set_attr "mode" "TI")])
3251
3252 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3253 ;;
3254 ;; Parallel integral logical operations
3255 ;;
3256 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3257
3258 (define_expand "one_cmpl<mode>2"
3259 [(set (match_operand:MMXMODEI 0 "register_operand")
3260 (xor:MMXMODEI
3261 (match_operand:MMXMODEI 1 "register_operand")
3262 (match_dup 2)))]
3263 "TARGET_MMX_WITH_SSE"
3264 "operands[2] = force_reg (<MODE>mode, CONSTM1_RTX (<MODE>mode));")
3265
3266 (define_insn "one_cmpl<mode>2"
3267 [(set (match_operand:VI_16_32 0 "register_operand" "=?r,&x,&v")
3268 (not:VI_16_32
3269 (match_operand:VI_16_32 1 "register_operand" "0,x,v")))]
3270 ""
3271 "#"
3272 [(set_attr "isa" "*,sse2,avx512vl")
3273 (set_attr "type" "negnot,sselog1,sselog1")
3274 (set_attr "mode" "SI,TI,TI")])
3275
3276 (define_split
3277 [(set (match_operand:VI_16_32 0 "general_reg_operand")
3278 (not:VI_16_32
3279 (match_operand:VI_16_32 1 "general_reg_operand")))]
3280 "reload_completed"
3281 [(set (match_dup 0)
3282 (not:SI (match_dup 1)))]
3283 {
3284 operands[1] = lowpart_subreg (SImode, operands[1], <MODE>mode);
3285 operands[0] = lowpart_subreg (SImode, operands[0], <MODE>mode);
3286 })
3287
3288 (define_split
3289 [(set (match_operand:VI_16_32 0 "sse_reg_operand")
3290 (not:VI_16_32
3291 (match_operand:VI_16_32 1 "sse_reg_operand")))]
3292 "TARGET_SSE2 && reload_completed"
3293 [(set (match_dup 0) (match_dup 2))
3294 (set (match_dup 0)
3295 (xor:V16QI
3296 (match_dup 0) (match_dup 1)))]
3297 {
3298 operands[2] = CONSTM1_RTX (V16QImode);
3299 operands[1] = lowpart_subreg (V16QImode, operands[1], <MODE>mode);
3300 operands[0] = lowpart_subreg (V16QImode, operands[0], <MODE>mode);
3301 })
3302
3303 (define_insn "mmx_andnot<mode>3"
3304 [(set (match_operand:MMXMODEI 0 "register_operand" "=y,x,x,v")
3305 (and:MMXMODEI
3306 (not:MMXMODEI (match_operand:MMXMODEI 1 "register_operand" "0,0,x,v"))
3307 (match_operand:MMXMODEI 2 "register_mmxmem_operand" "ym,x,x,v")))]
3308 "TARGET_MMX || TARGET_MMX_WITH_SSE"
3309 "@
3310 pandn\t{%2, %0|%0, %2}
3311 pandn\t{%2, %0|%0, %2}
3312 vpandn\t{%2, %1, %0|%0, %1, %2}
3313 vpandnd\t{%2, %1, %0|%0, %1, %2}"
3314 [(set_attr "isa" "*,sse2_noavx,avx,avx512vl")
3315 (set_attr "mmx_isa" "native,*,*,*")
3316 (set_attr "type" "mmxadd,sselog,sselog,sselog")
3317 (set_attr "mode" "DI,TI,TI,TI")])
3318
3319 (define_insn "*andnot<mode>3"
3320 [(set (match_operand:VI_16_32 0 "register_operand" "=?&r,?r,x,x,v")
3321 (and:VI_16_32
3322 (not:VI_16_32
3323 (match_operand:VI_16_32 1 "register_operand" "0,r,0,x,v"))
3324 (match_operand:VI_16_32 2 "register_operand" "r,r,x,x,v")))
3325 (clobber (reg:CC FLAGS_REG))]
3326 ""
3327 "#"
3328 [(set_attr "isa" "*,bmi,sse2_noavx,avx,avx512vl")
3329 (set_attr "type" "alu,bitmanip,sselog,sselog,sselog")
3330 (set_attr "mode" "SI,SI,TI,TI,TI")])
3331
3332 (define_split
3333 [(set (match_operand:VI_16_32 0 "general_reg_operand")
3334 (and:VI_16_32
3335 (not:VI_16_32 (match_operand:VI_16_32 1 "general_reg_operand"))
3336 (match_operand:VI_16_32 2 "general_reg_operand")))
3337 (clobber (reg:CC FLAGS_REG))]
3338 "TARGET_BMI && reload_completed"
3339 [(parallel
3340 [(set (match_dup 0)
3341 (and:SI (not:SI (match_dup 1)) (match_dup 2)))
3342 (clobber (reg:CC FLAGS_REG))])]
3343 {
3344 operands[2] = lowpart_subreg (SImode, operands[2], <MODE>mode);
3345 operands[1] = lowpart_subreg (SImode, operands[1], <MODE>mode);
3346 operands[0] = lowpart_subreg (SImode, operands[0], <MODE>mode);
3347 })
3348
3349 (define_split
3350 [(set (match_operand:VI_16_32 0 "general_reg_operand")
3351 (and:VI_16_32
3352 (not:VI_16_32 (match_operand:VI_16_32 1 "general_reg_operand"))
3353 (match_operand:VI_16_32 2 "general_reg_operand")))
3354 (clobber (reg:CC FLAGS_REG))]
3355 "!TARGET_BMI && reload_completed"
3356 [(set (match_dup 0)
3357 (not:SI (match_dup 1)))
3358 (parallel
3359 [(set (match_dup 0)
3360 (and:SI (match_dup 0) (match_dup 2)))
3361 (clobber (reg:CC FLAGS_REG))])]
3362 {
3363 operands[2] = lowpart_subreg (SImode, operands[2], <MODE>mode);
3364 operands[1] = lowpart_subreg (SImode, operands[1], <MODE>mode);
3365 operands[0] = lowpart_subreg (SImode, operands[0], <MODE>mode);
3366 })
3367
3368 (define_split
3369 [(set (match_operand:VI_16_32 0 "sse_reg_operand")
3370 (and:VI_16_32
3371 (not:VI_16_32 (match_operand:VI_16_32 1 "sse_reg_operand"))
3372 (match_operand:VI_16_32 2 "sse_reg_operand")))
3373 (clobber (reg:CC FLAGS_REG))]
3374 "TARGET_SSE2 && reload_completed"
3375 [(set (match_dup 0)
3376 (and:V16QI (not:V16QI (match_dup 1)) (match_dup 2)))]
3377 {
3378 operands[2] = lowpart_subreg (V16QImode, operands[2], <MODE>mode);
3379 operands[1] = lowpart_subreg (V16QImode, operands[1], <MODE>mode);
3380 operands[0] = lowpart_subreg (V16QImode, operands[0], <MODE>mode);
3381 })
3382
3383 (define_expand "mmx_<code><mode>3"
3384 [(set (match_operand:MMXMODEI 0 "register_operand")
3385 (any_logic:MMXMODEI
3386 (match_operand:MMXMODEI 1 "register_mmxmem_operand")
3387 (match_operand:MMXMODEI 2 "register_mmxmem_operand")))]
3388 "TARGET_MMX || TARGET_MMX_WITH_SSE"
3389 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
3390
3391 (define_expand "<code><mode>3"
3392 [(set (match_operand:MMXMODEI 0 "register_operand")
3393 (any_logic:MMXMODEI
3394 (match_operand:MMXMODEI 1 "register_operand")
3395 (match_operand:MMXMODEI 2 "register_operand")))]
3396 "TARGET_MMX_WITH_SSE")
3397
3398 (define_insn "*mmx_<code><mode>3"
3399 [(set (match_operand:MMXMODEI 0 "register_operand" "=y,x,x,v")
3400 (any_logic:MMXMODEI
3401 (match_operand:MMXMODEI 1 "register_mmxmem_operand" "%0,0,x,v")
3402 (match_operand:MMXMODEI 2 "register_mmxmem_operand" "ym,x,x,v")))]
3403 "(TARGET_MMX || TARGET_MMX_WITH_SSE)
3404 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
3405 "@
3406 p<logic>\t{%2, %0|%0, %2}
3407 p<logic>\t{%2, %0|%0, %2}
3408 vp<logic>\t{%2, %1, %0|%0, %1, %2}
3409 vp<logic>d\t{%2, %1, %0|%0, %1, %2}"
3410 [(set_attr "isa" "*,sse2_noavx,avx,avx512vl")
3411 (set_attr "mmx_isa" "native,*,*,*")
3412 (set_attr "type" "mmxadd,sselog,sselog,sselog")
3413 (set_attr "mode" "DI,TI,TI,TI")])
3414
3415 (define_expand "<code><mode>3"
3416 [(set (match_operand:VI_16_32 0 "nonimmediate_operand")
3417 (any_logic:VI_16_32
3418 (match_operand:VI_16_32 1 "nonimmediate_operand")
3419 (match_operand:VI_16_32 2 "nonimmediate_or_x86_64_const_vector_operand")))]
3420 ""
3421 "ix86_expand_binary_operator (<CODE>, <MODE>mode, operands); DONE;")
3422
3423 (define_insn "*<code><mode>3"
3424 [(set (match_operand:VI_16_32 0 "nonimmediate_operand" "=?r,m,x,x,v")
3425 (any_logic:VI_16_32
3426 (match_operand:VI_16_32 1 "nonimmediate_operand" "%0,0,0,x,v")
3427 (match_operand:VI_16_32 2 "nonimmediate_or_x86_64_const_vector_operand" "r,i,x,x,v")))
3428 (clobber (reg:CC FLAGS_REG))]
3429 "ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
3430 "#"
3431 [(set_attr "isa" "*,*,sse2_noavx,avx,avx512vl")
3432 (set_attr "type" "alu,alu,sselog,sselog,sselog")
3433 (set_attr "mode" "SI,SI,TI,TI,TI")])
3434
3435 (define_split
3436 [(set (match_operand:VI_16_32 0 "nonimmediate_gr_operand")
3437 (any_logic:VI_16_32
3438 (match_operand:VI_16_32 1 "nonimmediate_gr_operand")
3439 (match_operand:VI_16_32 2 "reg_or_const_vector_operand")))
3440 (clobber (reg:CC FLAGS_REG))]
3441 "reload_completed"
3442 [(parallel
3443 [(set (match_dup 0)
3444 (any_logic:<mmxinsnmode> (match_dup 1) (match_dup 2)))
3445 (clobber (reg:CC FLAGS_REG))])]
3446 {
3447 if (!register_operand (operands[2], <MODE>mode))
3448 {
3449 HOST_WIDE_INT val = ix86_convert_const_vector_to_integer (operands[2],
3450 <MODE>mode);
3451 operands[2] = GEN_INT (val);
3452 }
3453 else
3454 operands[2] = lowpart_subreg (<mmxinsnmode>mode, operands[2], <MODE>mode);
3455 operands[1] = lowpart_subreg (<mmxinsnmode>mode, operands[1], <MODE>mode);
3456 operands[0] = lowpart_subreg (<mmxinsnmode>mode, operands[0], <MODE>mode);
3457 })
3458
3459 (define_split
3460 [(set (match_operand:VI_16_32 0 "sse_reg_operand")
3461 (any_logic:VI_16_32
3462 (match_operand:VI_16_32 1 "sse_reg_operand")
3463 (match_operand:VI_16_32 2 "sse_reg_operand")))
3464 (clobber (reg:CC FLAGS_REG))]
3465 "TARGET_SSE2 && reload_completed"
3466 [(set (match_dup 0)
3467 (any_logic:V16QI (match_dup 1) (match_dup 2)))]
3468 {
3469 operands[2] = lowpart_subreg (V16QImode, operands[2], <MODE>mode);
3470 operands[1] = lowpart_subreg (V16QImode, operands[1], <MODE>mode);
3471 operands[0] = lowpart_subreg (V16QImode, operands[0], <MODE>mode);
3472 })
3473
3474 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3475 ;;
3476 ;; Parallel integral element swizzling
3477 ;;
3478 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3479
3480 (define_insn_and_split "mmx_packsswb"
3481 [(set (match_operand:V8QI 0 "register_operand" "=y,x,Yw")
3482 (vec_concat:V8QI
3483 (ss_truncate:V4QI
3484 (match_operand:V4HI 1 "register_operand" "0,0,Yw"))
3485 (ss_truncate:V4QI
3486 (match_operand:V4HI 2 "register_mmxmem_operand" "ym,x,Yw"))))]
3487 "TARGET_MMX || TARGET_MMX_WITH_SSE"
3488 "@
3489 packsswb\t{%2, %0|%0, %2}
3490 #
3491 #"
3492 "&& reload_completed
3493 && SSE_REGNO_P (REGNO (operands[0]))"
3494 [(const_int 0)]
3495 "ix86_split_mmx_pack (operands, SS_TRUNCATE); DONE;"
3496 [(set_attr "mmx_isa" "native,sse_noavx,avx")
3497 (set_attr "type" "mmxshft,sselog,sselog")
3498 (set_attr "mode" "DI,TI,TI")])
3499
3500 ;; This instruction does unsigned saturation of signed source
3501 ;; and is different from generic us_truncate RTX.
3502 (define_insn_and_split "mmx_packuswb"
3503 [(set (match_operand:V8QI 0 "register_operand" "=y,x,Yw")
3504 (unspec:V8QI
3505 [(match_operand:V4HI 1 "register_operand" "0,0,Yw")
3506 (match_operand:V4HI 2 "register_mmxmem_operand" "ym,x,Yw")]
3507 UNSPEC_US_TRUNCATE))]
3508 "TARGET_MMX || TARGET_MMX_WITH_SSE"
3509 "@
3510 packuswb\t{%2, %0|%0, %2}
3511 #
3512 #"
3513 "&& reload_completed
3514 && SSE_REGNO_P (REGNO (operands[0]))"
3515 [(const_int 0)]
3516 "ix86_split_mmx_pack (operands, US_TRUNCATE); DONE;"
3517 [(set_attr "mmx_isa" "native,sse_noavx,avx")
3518 (set_attr "type" "mmxshft,sselog,sselog")
3519 (set_attr "mode" "DI,TI,TI")])
3520
3521 (define_insn_and_split "mmx_packssdw"
3522 [(set (match_operand:V4HI 0 "register_operand" "=y,x,Yw")
3523 (vec_concat:V4HI
3524 (ss_truncate:V2HI
3525 (match_operand:V2SI 1 "register_operand" "0,0,Yw"))
3526 (ss_truncate:V2HI
3527 (match_operand:V2SI 2 "register_mmxmem_operand" "ym,x,Yw"))))]
3528 "TARGET_MMX || TARGET_MMX_WITH_SSE"
3529 "@
3530 packssdw\t{%2, %0|%0, %2}
3531 #
3532 #"
3533 "&& reload_completed
3534 && SSE_REGNO_P (REGNO (operands[0]))"
3535 [(const_int 0)]
3536 "ix86_split_mmx_pack (operands, SS_TRUNCATE); DONE;"
3537 [(set_attr "mmx_isa" "native,sse_noavx,avx")
3538 (set_attr "type" "mmxshft,sselog,sselog")
3539 (set_attr "mode" "DI,TI,TI")])
3540
3541 (define_insn_and_split "mmx_packusdw"
3542 [(set (match_operand:V4HI 0 "register_operand" "=Yr,*x,Yw")
3543 (unspec:V4HI
3544 [(match_operand:V2SI 1 "register_operand" "0,0,Yw")
3545 (match_operand:V2SI 2 "register_operand" "Yr,*x,Yw")]
3546 UNSPEC_US_TRUNCATE))]
3547 "TARGET_SSE4_1 && TARGET_MMX_WITH_SSE"
3548 "#"
3549 "&& reload_completed"
3550 [(const_int 0)]
3551 "ix86_split_mmx_pack (operands, US_TRUNCATE); DONE;"
3552 [(set_attr "isa" "noavx,noavx,avx")
3553 (set_attr "type" "sselog")
3554 (set_attr "mode" "TI")])
3555
3556 (define_insn_and_split "mmx_punpckhbw"
3557 [(set (match_operand:V8QI 0 "register_operand" "=y,x,Yw")
3558 (vec_select:V8QI
3559 (vec_concat:V16QI
3560 (match_operand:V8QI 1 "register_operand" "0,0,Yw")
3561 (match_operand:V8QI 2 "register_mmxmem_operand" "ym,x,Yw"))
3562 (parallel [(const_int 4) (const_int 12)
3563 (const_int 5) (const_int 13)
3564 (const_int 6) (const_int 14)
3565 (const_int 7) (const_int 15)])))]
3566 "TARGET_MMX || TARGET_MMX_WITH_SSE"
3567 "@
3568 punpckhbw\t{%2, %0|%0, %2}
3569 #
3570 #"
3571 "&& reload_completed
3572 && SSE_REGNO_P (REGNO (operands[0]))"
3573 [(const_int 0)]
3574 "ix86_split_mmx_punpck (operands, true); DONE;"
3575 [(set_attr "mmx_isa" "native,sse_noavx,avx")
3576 (set_attr "type" "mmxcvt,sselog,sselog")
3577 (set_attr "mode" "DI,TI,TI")])
3578
3579 (define_insn_and_split "mmx_punpckhbw_low"
3580 [(set (match_operand:V4QI 0 "register_operand" "=x,Yw")
3581 (vec_select:V4QI
3582 (vec_concat:V8QI
3583 (match_operand:V4QI 1 "register_operand" "0,Yw")
3584 (match_operand:V4QI 2 "register_operand" "x,Yw"))
3585 (parallel [(const_int 2) (const_int 6)
3586 (const_int 3) (const_int 7)])))]
3587 "TARGET_SSE2"
3588 "#"
3589 "&& reload_completed"
3590 [(const_int 0)]
3591 "ix86_split_mmx_punpck (operands, true); DONE;"
3592 [(set_attr "isa" "noavx,avx")
3593 (set_attr "type" "sselog")
3594 (set_attr "mode" "TI")])
3595
3596 (define_insn_and_split "mmx_punpcklbw"
3597 [(set (match_operand:V8QI 0 "register_operand" "=y,x,Yw")
3598 (vec_select:V8QI
3599 (vec_concat:V16QI
3600 (match_operand:V8QI 1 "register_operand" "0,0,Yw")
3601 (match_operand:V8QI 2 "register_mmxmem_operand" "ym,x,Yw"))
3602 (parallel [(const_int 0) (const_int 8)
3603 (const_int 1) (const_int 9)
3604 (const_int 2) (const_int 10)
3605 (const_int 3) (const_int 11)])))]
3606 "TARGET_MMX || TARGET_MMX_WITH_SSE"
3607 "@
3608 punpcklbw\t{%2, %0|%0, %k2}
3609 #
3610 #"
3611 "&& reload_completed
3612 && SSE_REGNO_P (REGNO (operands[0]))"
3613 [(const_int 0)]
3614 "ix86_split_mmx_punpck (operands, false); DONE;"
3615 [(set_attr "mmx_isa" "native,sse_noavx,avx")
3616 (set_attr "type" "mmxcvt,sselog,sselog")
3617 (set_attr "mode" "DI,TI,TI")])
3618
3619 (define_insn_and_split "mmx_punpcklbw_low"
3620 [(set (match_operand:V4QI 0 "register_operand" "=x,Yw")
3621 (vec_select:V4QI
3622 (vec_concat:V8QI
3623 (match_operand:V4QI 1 "register_operand" "0,Yw")
3624 (match_operand:V4QI 2 "register_operand" "x,Yw"))
3625 (parallel [(const_int 0) (const_int 4)
3626 (const_int 1) (const_int 5)])))]
3627 "TARGET_SSE2"
3628 "#"
3629 "&& reload_completed"
3630 [(const_int 0)]
3631 "ix86_split_mmx_punpck (operands, false); DONE;"
3632 [(set_attr "isa" "noavx,avx")
3633 (set_attr "type" "sselog")
3634 (set_attr "mode" "TI")])
3635
3636 (define_insn_and_split "mmx_punpckhwd"
3637 [(set (match_operand:V4HI 0 "register_operand" "=y,x,Yw")
3638 (vec_select:V4HI
3639 (vec_concat:V8HI
3640 (match_operand:V4HI 1 "register_operand" "0,0,Yw")
3641 (match_operand:V4HI 2 "register_mmxmem_operand" "ym,x,Yw"))
3642 (parallel [(const_int 2) (const_int 6)
3643 (const_int 3) (const_int 7)])))]
3644 "TARGET_MMX || TARGET_MMX_WITH_SSE"
3645 "@
3646 punpckhwd\t{%2, %0|%0, %2}
3647 #
3648 #"
3649 "&& reload_completed
3650 && SSE_REGNO_P (REGNO (operands[0]))"
3651 [(const_int 0)]
3652 "ix86_split_mmx_punpck (operands, true); DONE;"
3653 [(set_attr "mmx_isa" "native,sse_noavx,avx")
3654 (set_attr "type" "mmxcvt,sselog,sselog")
3655 (set_attr "mode" "DI,TI,TI")])
3656
3657 (define_insn_and_split "mmx_punpcklwd"
3658 [(set (match_operand:V4HI 0 "register_operand" "=y,x,Yw")
3659 (vec_select:V4HI
3660 (vec_concat:V8HI
3661 (match_operand:V4HI 1 "register_operand" "0,0,Yw")
3662 (match_operand:V4HI 2 "register_mmxmem_operand" "ym,x,Yw"))
3663 (parallel [(const_int 0) (const_int 4)
3664 (const_int 1) (const_int 5)])))]
3665 "TARGET_MMX || TARGET_MMX_WITH_SSE"
3666 "@
3667 punpcklwd\t{%2, %0|%0, %k2}
3668 #
3669 #"
3670 "&& reload_completed
3671 && SSE_REGNO_P (REGNO (operands[0]))"
3672 [(const_int 0)]
3673 "ix86_split_mmx_punpck (operands, false); DONE;"
3674 [(set_attr "mmx_isa" "native,sse_noavx,avx")
3675 (set_attr "type" "mmxcvt,sselog,sselog")
3676 (set_attr "mode" "DI,TI,TI")])
3677
3678 (define_insn_and_split "mmx_punpckhdq"
3679 [(set (match_operand:V2SI 0 "register_operand" "=y,x,Yv")
3680 (vec_select:V2SI
3681 (vec_concat:V4SI
3682 (match_operand:V2SI 1 "register_operand" "0,0,Yv")
3683 (match_operand:V2SI 2 "register_mmxmem_operand" "ym,x,Yv"))
3684 (parallel [(const_int 1)
3685 (const_int 3)])))]
3686 "TARGET_MMX || TARGET_MMX_WITH_SSE"
3687 "@
3688 punpckhdq\t{%2, %0|%0, %2}
3689 #
3690 #"
3691 "&& reload_completed
3692 && SSE_REGNO_P (REGNO (operands[0]))"
3693 [(const_int 0)]
3694 "ix86_split_mmx_punpck (operands, true); DONE;"
3695 [(set_attr "mmx_isa" "native,sse_noavx,avx")
3696 (set_attr "type" "mmxcvt,sselog,sselog")
3697 (set_attr "mode" "DI,TI,TI")])
3698
3699 (define_insn_and_split "mmx_punpckldq"
3700 [(set (match_operand:V2SI 0 "register_operand" "=y,x,Yv")
3701 (vec_select:V2SI
3702 (vec_concat:V4SI
3703 (match_operand:V2SI 1 "register_operand" "0,0,Yv")
3704 (match_operand:V2SI 2 "register_mmxmem_operand" "ym,x,Yv"))
3705 (parallel [(const_int 0)
3706 (const_int 2)])))]
3707 "TARGET_MMX || TARGET_MMX_WITH_SSE"
3708 "@
3709 punpckldq\t{%2, %0|%0, %k2}
3710 #
3711 #"
3712 "&& reload_completed
3713 && SSE_REGNO_P (REGNO (operands[0]))"
3714 [(const_int 0)]
3715 "ix86_split_mmx_punpck (operands, false); DONE;"
3716 [(set_attr "mmx_isa" "native,sse_noavx,avx")
3717 (set_attr "type" "mmxcvt,sselog,sselog")
3718 (set_attr "mode" "DI,TI,TI")])
3719
3720 (define_insn "sse4_1_<code>v4qiv4hi2"
3721 [(set (match_operand:V4HI 0 "register_operand" "=Yr,*x,Yw")
3722 (any_extend:V4HI
3723 (vec_select:V4QI
3724 (match_operand:V8QI 1 "register_operand" "Yr,*x,Yw")
3725 (parallel [(const_int 0) (const_int 1)
3726 (const_int 2) (const_int 3)]))))]
3727 "TARGET_SSE4_1 && TARGET_MMX_WITH_SSE"
3728 "%vpmov<extsuffix>bw\t{%1, %0|%0, %1}"
3729 [(set_attr "isa" "noavx,noavx,avx")
3730 (set_attr "type" "ssemov")
3731 (set_attr "prefix_extra" "1")
3732 (set_attr "prefix" "orig,orig,maybe_evex")
3733 (set_attr "mode" "TI")])
3734
3735 (define_expand "<insn>v4qiv4hi2"
3736 [(set (match_operand:V4HI 0 "register_operand")
3737 (any_extend:V4HI
3738 (match_operand:V4QI 1 "register_operand")))]
3739 "TARGET_SSE4_1 && TARGET_MMX_WITH_SSE"
3740 {
3741 rtx op1 = force_reg (V4QImode, operands[1]);
3742 op1 = lowpart_subreg (V8QImode, op1, V4QImode);
3743 emit_insn (gen_sse4_1_<code>v4qiv4hi2 (operands[0], op1));
3744 DONE;
3745 })
3746
3747 (define_insn "sse4_1_<code>v2hiv2si2"
3748 [(set (match_operand:V2SI 0 "register_operand" "=Yr,*x,v")
3749 (any_extend:V2SI
3750 (vec_select:V2HI
3751 (match_operand:V4HI 1 "register_operand" "Yr,*x,v")
3752 (parallel [(const_int 0) (const_int 1)]))))]
3753 "TARGET_SSE4_1 && TARGET_MMX_WITH_SSE"
3754 "%vpmov<extsuffix>wd\t{%1, %0|%0, %1}"
3755 [(set_attr "isa" "noavx,noavx,avx")
3756 (set_attr "type" "ssemov")
3757 (set_attr "prefix_extra" "1")
3758 (set_attr "prefix" "orig,orig,maybe_evex")
3759 (set_attr "mode" "TI")])
3760
3761 (define_expand "<insn>v2hiv2si2"
3762 [(set (match_operand:V2SI 0 "register_operand")
3763 (any_extend:V2SI
3764 (match_operand:V2HI 1 "register_operand")))]
3765 "TARGET_SSE4_1 && TARGET_MMX_WITH_SSE"
3766 {
3767 rtx op1 = force_reg (V2HImode, operands[1]);
3768 op1 = lowpart_subreg (V4HImode, op1, V2HImode);
3769 emit_insn (gen_sse4_1_<code>v2hiv2si2 (operands[0], op1));
3770 DONE;
3771 })
3772
3773 (define_insn "sse4_1_<code>v2qiv2si2"
3774 [(set (match_operand:V2SI 0 "register_operand" "=Yr,*x,v")
3775 (any_extend:V2SI
3776 (vec_select:V2QI
3777 (match_operand:V4QI 1 "register_operand" "Yr,*x,v")
3778 (parallel [(const_int 0) (const_int 1)]))))]
3779 "TARGET_SSE4_1 && TARGET_MMX_WITH_SSE"
3780 "%vpmov<extsuffix>bd\t{%1, %0|%0, %1}"
3781 [(set_attr "isa" "noavx,noavx,avx")
3782 (set_attr "type" "ssemov")
3783 (set_attr "prefix_extra" "1")
3784 (set_attr "prefix" "orig,orig,maybe_evex")
3785 (set_attr "mode" "TI")])
3786
3787 (define_expand "<insn>v2qiv2si2"
3788 [(set (match_operand:V2SI 0 "register_operand")
3789 (any_extend:V2SI
3790 (match_operand:V2QI 1 "register_operand")))]
3791 "TARGET_SSE4_1 && TARGET_MMX_WITH_SSE"
3792 {
3793 rtx op1 = force_reg (V2QImode, operands[1]);
3794 op1 = lowpart_subreg (V4QImode, op1, V2QImode);
3795 emit_insn (gen_sse4_1_<code>v2qiv2si2 (operands[0], op1));
3796 DONE;
3797 })
3798
3799 (define_insn "sse4_1_<code>v2qiv2hi2"
3800 [(set (match_operand:V2HI 0 "register_operand" "=Yr,*x,Yw")
3801 (any_extend:V2HI
3802 (vec_select:V2QI
3803 (match_operand:V4QI 1 "register_operand" "Yr,*x,Yw")
3804 (parallel [(const_int 0) (const_int 1)]))))]
3805 "TARGET_SSE4_1"
3806 "%vpmov<extsuffix>bw\t{%1, %0|%0, %1}"
3807 [(set_attr "isa" "noavx,noavx,avx")
3808 (set_attr "type" "ssemov")
3809 (set_attr "prefix_extra" "1")
3810 (set_attr "prefix" "orig,orig,maybe_evex")
3811 (set_attr "mode" "TI")])
3812
3813 (define_expand "<insn>v2qiv2hi2"
3814 [(set (match_operand:V2HI 0 "register_operand")
3815 (any_extend:V2HI
3816 (match_operand:V2QI 1 "register_operand")))]
3817 "TARGET_SSE4_1"
3818 {
3819 rtx op1 = force_reg (V2QImode, operands[1]);
3820 op1 = lowpart_subreg (V4QImode, op1, V2QImode);
3821 emit_insn (gen_sse4_1_<code>v2qiv2hi2 (operands[0], op1));
3822 DONE;
3823 })
3824
3825 (define_insn "truncv2hiv2qi2"
3826 [(set (match_operand:V2QI 0 "register_operand" "=v")
3827 (truncate:V2QI
3828 (match_operand:V2HI 1 "register_operand" "v")))]
3829 "TARGET_AVX512VL && TARGET_AVX512BW"
3830 "vpmovwb\t{%1, %0|%0, %1}"
3831 [(set_attr "type" "ssemov")
3832 (set_attr "prefix" "evex")
3833 (set_attr "mode" "TI")])
3834
3835 (define_mode_iterator V2QI_V2HI [V2QI V2HI])
3836 (define_insn "truncv2si<mode>2"
3837 [(set (match_operand:V2QI_V2HI 0 "register_operand" "=v")
3838 (truncate:V2QI_V2HI
3839 (match_operand:V2SI 1 "register_operand" "v")))]
3840 "TARGET_AVX512VL && TARGET_MMX_WITH_SSE"
3841 "vpmovd<mmxvecsize>\t{%1, %0|%0, %1}"
3842 [(set_attr "type" "ssemov")
3843 (set_attr "prefix" "evex")
3844 (set_attr "mode" "TI")])
3845
3846 ;; Pack/unpack vector modes
3847 (define_mode_attr mmxpackmode
3848 [(V4HI "V8QI") (V2SI "V4HI")])
3849
3850 (define_expand "vec_pack_trunc_<mode>"
3851 [(match_operand:<mmxpackmode> 0 "register_operand")
3852 (match_operand:MMXMODE24 1 "register_operand")
3853 (match_operand:MMXMODE24 2 "register_operand")]
3854 "TARGET_MMX_WITH_SSE"
3855 {
3856 rtx op1 = gen_lowpart (<mmxpackmode>mode, operands[1]);
3857 rtx op2 = gen_lowpart (<mmxpackmode>mode, operands[2]);
3858 ix86_expand_vec_extract_even_odd (operands[0], op1, op2, 0);
3859 DONE;
3860 })
3861
3862 (define_expand "vec_pack_trunc_v2hi"
3863 [(match_operand:V4QI 0 "register_operand")
3864 (match_operand:V2HI 1 "register_operand")
3865 (match_operand:V2HI 2 "register_operand")]
3866 "TARGET_SSE2"
3867 {
3868 rtx op1 = gen_lowpart (V4QImode, operands[1]);
3869 rtx op2 = gen_lowpart (V4QImode, operands[2]);
3870 ix86_expand_vec_extract_even_odd (operands[0], op1, op2, 0);
3871 DONE;
3872 })
3873
3874 (define_mode_attr mmxunpackmode
3875 [(V8QI "V4HI") (V4HI "V2SI")])
3876
3877 (define_expand "vec_unpacks_lo_<mode>"
3878 [(match_operand:<mmxunpackmode> 0 "register_operand")
3879 (match_operand:MMXMODE12 1 "register_operand")]
3880 "TARGET_MMX_WITH_SSE"
3881 "ix86_expand_sse_unpack (operands[0], operands[1], false, false); DONE;")
3882
3883 (define_expand "vec_unpacks_hi_<mode>"
3884 [(match_operand:<mmxunpackmode> 0 "register_operand")
3885 (match_operand:MMXMODE12 1 "register_operand")]
3886 "TARGET_MMX_WITH_SSE"
3887 "ix86_expand_sse_unpack (operands[0], operands[1], false, true); DONE;")
3888
3889 (define_expand "vec_unpacku_lo_<mode>"
3890 [(match_operand:<mmxunpackmode> 0 "register_operand")
3891 (match_operand:MMXMODE12 1 "register_operand")]
3892 "TARGET_MMX_WITH_SSE"
3893 "ix86_expand_sse_unpack (operands[0], operands[1], true, false); DONE;")
3894
3895 (define_expand "vec_unpacku_hi_<mode>"
3896 [(match_operand:<mmxunpackmode> 0 "register_operand")
3897 (match_operand:MMXMODE12 1 "register_operand")]
3898 "TARGET_MMX_WITH_SSE"
3899 "ix86_expand_sse_unpack (operands[0], operands[1], true, true); DONE;")
3900
3901 (define_expand "vec_unpacks_lo_v4qi"
3902 [(match_operand:V2HI 0 "register_operand")
3903 (match_operand:V4QI 1 "register_operand")]
3904 "TARGET_SSE2"
3905 "ix86_expand_sse_unpack (operands[0], operands[1], false, false); DONE;")
3906
3907 (define_expand "vec_unpacks_hi_v4qi"
3908 [(match_operand:V2HI 0 "register_operand")
3909 (match_operand:V4QI 1 "register_operand")]
3910 "TARGET_SSE2"
3911 "ix86_expand_sse_unpack (operands[0], operands[1], false, true); DONE;")
3912
3913 (define_expand "vec_unpacku_lo_v4qi"
3914 [(match_operand:V2HI 0 "register_operand")
3915 (match_operand:V4QI 1 "register_operand")]
3916 "TARGET_SSE2"
3917 "ix86_expand_sse_unpack (operands[0], operands[1], true, false); DONE;")
3918
3919 (define_expand "vec_unpacku_hi_v4qi"
3920 [(match_operand:V2HI 0 "register_operand")
3921 (match_operand:V4QI 1 "register_operand")]
3922 "TARGET_SSE2"
3923 "ix86_expand_sse_unpack (operands[0], operands[1], true, true); DONE;")
3924
3925 (define_insn "*mmx_pinsrd"
3926 [(set (match_operand:V2SI 0 "register_operand" "=x,Yv")
3927 (vec_merge:V2SI
3928 (vec_duplicate:V2SI
3929 (match_operand:SI 2 "nonimmediate_operand" "rm,rm"))
3930 (match_operand:V2SI 1 "register_operand" "0,Yv")
3931 (match_operand:SI 3 "const_int_operand")))]
3932 "TARGET_SSE4_1 && TARGET_MMX_WITH_SSE
3933 && ((unsigned) exact_log2 (INTVAL (operands[3]))
3934 < GET_MODE_NUNITS (V2SImode))"
3935 {
3936 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
3937 switch (which_alternative)
3938 {
3939 case 1:
3940 return "vpinsrd\t{%3, %2, %1, %0|%0, %1, %2, %3}";
3941 case 0:
3942 return "pinsrd\t{%3, %2, %0|%0, %2, %3}";
3943 default:
3944 gcc_unreachable ();
3945 }
3946 }
3947 [(set_attr "isa" "noavx,avx")
3948 (set_attr "prefix_extra" "1")
3949 (set_attr "type" "sselog")
3950 (set_attr "length_immediate" "1")
3951 (set_attr "prefix" "orig,vex")
3952 (set_attr "mode" "TI")])
3953
3954 (define_expand "mmx_pinsrw"
3955 [(set (match_operand:V4HI 0 "register_operand")
3956 (vec_merge:V4HI
3957 (vec_duplicate:V4HI
3958 (match_operand:SI 2 "nonimmediate_operand"))
3959 (match_operand:V4HI 1 "register_operand")
3960 (match_operand:SI 3 "const_0_to_3_operand")))]
3961 "(TARGET_MMX || TARGET_MMX_WITH_SSE)
3962 && (TARGET_SSE || TARGET_3DNOW_A)"
3963 {
3964 operands[2] = gen_lowpart (HImode, operands[2]);
3965 operands[3] = GEN_INT (1 << INTVAL (operands[3]));
3966 })
3967
3968 (define_insn "*mmx_pinsrw"
3969 [(set (match_operand:V4HI 0 "register_operand" "=y,x,YW")
3970 (vec_merge:V4HI
3971 (vec_duplicate:V4HI
3972 (match_operand:HI 2 "nonimmediate_operand" "rm,rm,rm"))
3973 (match_operand:V4HI 1 "register_operand" "0,0,YW")
3974 (match_operand:SI 3 "const_int_operand")))]
3975 "(TARGET_MMX || TARGET_MMX_WITH_SSE)
3976 && (TARGET_SSE || TARGET_3DNOW_A)
3977 && ((unsigned) exact_log2 (INTVAL (operands[3]))
3978 < GET_MODE_NUNITS (V4HImode))"
3979 {
3980 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
3981 switch (which_alternative)
3982 {
3983 case 2:
3984 if (MEM_P (operands[2]))
3985 return "vpinsrw\t{%3, %2, %1, %0|%0, %1, %2, %3}";
3986 else
3987 return "vpinsrw\t{%3, %k2, %1, %0|%0, %1, %k2, %3}";
3988 case 1:
3989 case 0:
3990 if (MEM_P (operands[2]))
3991 return "pinsrw\t{%3, %2, %0|%0, %2, %3}";
3992 else
3993 return "pinsrw\t{%3, %k2, %0|%0, %k2, %3}";
3994 default:
3995 gcc_unreachable ();
3996 }
3997 }
3998 [(set_attr "isa" "*,sse2_noavx,avx")
3999 (set_attr "mmx_isa" "native,*,*")
4000 (set_attr "type" "mmxcvt,sselog,sselog")
4001 (set_attr "length_immediate" "1")
4002 (set_attr "mode" "DI,TI,TI")])
4003
4004 (define_insn "*mmx_pinsrb"
4005 [(set (match_operand:V8QI 0 "register_operand" "=x,YW")
4006 (vec_merge:V8QI
4007 (vec_duplicate:V8QI
4008 (match_operand:QI 2 "nonimmediate_operand" "rm,rm"))
4009 (match_operand:V8QI 1 "register_operand" "0,YW")
4010 (match_operand:SI 3 "const_int_operand")))]
4011 "TARGET_SSE4_1 && TARGET_MMX_WITH_SSE
4012 && ((unsigned) exact_log2 (INTVAL (operands[3]))
4013 < GET_MODE_NUNITS (V8QImode))"
4014 {
4015 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
4016 switch (which_alternative)
4017 {
4018 case 1:
4019 if (MEM_P (operands[2]))
4020 return "vpinsrb\t{%3, %2, %1, %0|%0, %1, %2, %3}";
4021 else
4022 return "vpinsrb\t{%3, %k2, %1, %0|%0, %1, %k2, %3}";
4023 case 0:
4024 if (MEM_P (operands[2]))
4025 return "pinsrb\t{%3, %2, %0|%0, %2, %3}";
4026 else
4027 return "pinsrb\t{%3, %k2, %0|%0, %k2, %3}";
4028 default:
4029 gcc_unreachable ();
4030 }
4031 }
4032 [(set_attr "isa" "noavx,avx")
4033 (set_attr "type" "sselog")
4034 (set_attr "prefix_extra" "1")
4035 (set_attr "length_immediate" "1")
4036 (set_attr "prefix" "orig,vex")
4037 (set_attr "mode" "TI")])
4038
4039 (define_insn "*mmx_pextrw"
4040 [(set (match_operand:HI 0 "register_sse4nonimm_operand" "=r,r,m")
4041 (vec_select:HI
4042 (match_operand:V4HI 1 "register_operand" "y,YW,YW")
4043 (parallel [(match_operand:SI 2 "const_0_to_3_operand")])))]
4044 "(TARGET_MMX || TARGET_MMX_WITH_SSE)
4045 && (TARGET_SSE || TARGET_3DNOW_A)"
4046 "@
4047 pextrw\t{%2, %1, %k0|%k0, %1, %2}
4048 %vpextrw\t{%2, %1, %k0|%k0, %1, %2}
4049 %vpextrw\t{%2, %1, %0|%0, %1, %2}"
4050 [(set_attr "isa" "*,sse2,sse4")
4051 (set_attr "mmx_isa" "native,*,*")
4052 (set_attr "type" "mmxcvt,sselog1,sselog1")
4053 (set_attr "length_immediate" "1")
4054 (set_attr "prefix" "orig,maybe_vex,maybe_vex")
4055 (set_attr "mode" "DI,TI,TI")])
4056
4057 (define_insn "*mmx_pextrw_zext"
4058 [(set (match_operand:SWI48 0 "register_operand" "=r,r")
4059 (zero_extend:SWI48
4060 (vec_select:HI
4061 (match_operand:V4HI 1 "register_operand" "y,YW")
4062 (parallel [(match_operand:SI 2 "const_0_to_3_operand")]))))]
4063 "(TARGET_MMX || TARGET_MMX_WITH_SSE)
4064 && (TARGET_SSE || TARGET_3DNOW_A)"
4065 "@
4066 pextrw\t{%2, %1, %k0|%k0, %1, %2}
4067 %vpextrw\t{%2, %1, %k0|%k0, %1, %2}"
4068 [(set_attr "isa" "*,sse2")
4069 (set_attr "mmx_isa" "native,*")
4070 (set_attr "type" "mmxcvt,sselog1")
4071 (set_attr "length_immediate" "1")
4072 (set_attr "prefix" "orig,maybe_vex")
4073 (set_attr "mode" "DI,TI")])
4074
4075 (define_insn "*mmx_pextrb"
4076 [(set (match_operand:QI 0 "nonimmediate_operand" "=r,m")
4077 (vec_select:QI
4078 (match_operand:V8QI 1 "register_operand" "YW,YW")
4079 (parallel [(match_operand:SI 2 "const_0_to_7_operand")])))]
4080 "TARGET_SSE4_1 && TARGET_MMX_WITH_SSE"
4081 "@
4082 %vpextrb\t{%2, %1, %k0|%k0, %1, %2}
4083 %vpextrb\t{%2, %1, %0|%0, %1, %2}"
4084 [(set_attr "type" "sselog1")
4085 (set_attr "prefix_extra" "1")
4086 (set_attr "length_immediate" "1")
4087 (set_attr "prefix" "maybe_vex")
4088 (set_attr "mode" "TI")])
4089
4090 (define_insn "*mmx_pextrb_zext"
4091 [(set (match_operand:SWI248 0 "register_operand" "=r")
4092 (zero_extend:SWI248
4093 (vec_select:QI
4094 (match_operand:V8QI 1 "register_operand" "YW")
4095 (parallel [(match_operand:SI 2 "const_0_to_7_operand")]))))]
4096 "TARGET_SSE4_1 && TARGET_MMX_WITH_SSE"
4097 "%vpextrb\t{%2, %1, %k0|%k0, %1, %2}"
4098 [(set_attr "type" "sselog1")
4099 (set_attr "prefix_extra" "1")
4100 (set_attr "length_immediate" "1")
4101 (set_attr "prefix" "maybe_vex")
4102 (set_attr "mode" "TI")])
4103
4104 (define_insn "mmx_pshufbv8qi3"
4105 [(set (match_operand:V8QI 0 "register_operand" "=x,Yw")
4106 (unspec:V8QI
4107 [(match_operand:V8QI 1 "register_operand" "0,Yw")
4108 (match_operand:V16QI 2 "vector_operand" "xBm,Ywm")]
4109 UNSPEC_PSHUFB))]
4110 "TARGET_SSSE3 && TARGET_MMX_WITH_SSE"
4111 "@
4112 pshufb\t{%2, %0|%0, %2}
4113 vpshufb\t{%2, %1, %0|%0, %1, %2}"
4114 [(set_attr "isa" "noavx,avx")
4115 (set_attr "type" "sselog1")
4116 (set_attr "prefix_extra" "1")
4117 (set_attr "prefix" "orig,maybe_evex")
4118 (set_attr "btver2_decode" "vector")
4119 (set_attr "mode" "TI")])
4120
4121 (define_insn "mmx_pshufbv4qi3"
4122 [(set (match_operand:V4QI 0 "register_operand" "=x,Yw")
4123 (unspec:V4QI
4124 [(match_operand:V4QI 1 "register_operand" "0,Yw")
4125 (match_operand:V16QI 2 "vector_operand" "xBm,Ywm")]
4126 UNSPEC_PSHUFB))]
4127 "TARGET_SSSE3"
4128 "@
4129 pshufb\t{%2, %0|%0, %2}
4130 vpshufb\t{%2, %1, %0|%0, %1, %2}"
4131 [(set_attr "isa" "noavx,avx")
4132 (set_attr "type" "sselog1")
4133 (set_attr "prefix_extra" "1")
4134 (set_attr "prefix" "orig,maybe_evex")
4135 (set_attr "btver2_decode" "vector")
4136 (set_attr "mode" "TI")])
4137
4138 (define_expand "mmx_pshufw"
4139 [(match_operand:V4HI 0 "register_operand")
4140 (match_operand:V4HI 1 "register_mmxmem_operand")
4141 (match_operand:SI 2 "const_int_operand")]
4142 "(TARGET_MMX || TARGET_MMX_WITH_SSE)
4143 && (TARGET_SSE || TARGET_3DNOW_A)"
4144 {
4145 int mask = INTVAL (operands[2]);
4146 emit_insn (gen_mmx_pshufw_1 (operands[0], operands[1],
4147 GEN_INT ((mask >> 0) & 3),
4148 GEN_INT ((mask >> 2) & 3),
4149 GEN_INT ((mask >> 4) & 3),
4150 GEN_INT ((mask >> 6) & 3)));
4151 DONE;
4152 })
4153
4154 (define_insn "mmx_pshufw_1"
4155 [(set (match_operand:V4HI 0 "register_operand" "=y,Yw")
4156 (vec_select:V4HI
4157 (match_operand:V4HI 1 "register_mmxmem_operand" "ym,Yw")
4158 (parallel [(match_operand 2 "const_0_to_3_operand")
4159 (match_operand 3 "const_0_to_3_operand")
4160 (match_operand 4 "const_0_to_3_operand")
4161 (match_operand 5 "const_0_to_3_operand")])))]
4162 "(TARGET_MMX || TARGET_MMX_WITH_SSE)
4163 && (TARGET_SSE || TARGET_3DNOW_A)"
4164 {
4165 int mask = 0;
4166 mask |= INTVAL (operands[2]) << 0;
4167 mask |= INTVAL (operands[3]) << 2;
4168 mask |= INTVAL (operands[4]) << 4;
4169 mask |= INTVAL (operands[5]) << 6;
4170 operands[2] = GEN_INT (mask);
4171
4172 switch (which_alternative)
4173 {
4174 case 0:
4175 return "pshufw\t{%2, %1, %0|%0, %1, %2}";
4176 case 1:
4177 return "%vpshuflw\t{%2, %1, %0|%0, %1, %2}";
4178 default:
4179 gcc_unreachable ();
4180 }
4181 }
4182 [(set_attr "isa" "*,sse2")
4183 (set_attr "mmx_isa" "native,*")
4184 (set_attr "type" "mmxcvt,sselog1")
4185 (set_attr "length_immediate" "1")
4186 (set_attr "mode" "DI,TI")])
4187
4188 (define_insn "*mmx_pshufd_1"
4189 [(set (match_operand:V2SI 0 "register_operand" "=Yv")
4190 (vec_select:V2SI
4191 (match_operand:V2SI 1 "register_operand" "Yv")
4192 (parallel [(match_operand 2 "const_0_to_1_operand")
4193 (match_operand 3 "const_0_to_1_operand")])))]
4194 "TARGET_MMX_WITH_SSE"
4195 {
4196 int mask = 0;
4197 mask |= INTVAL (operands[2]) << 0;
4198 mask |= INTVAL (operands[3]) << 2;
4199 mask |= 2 << 4;
4200 mask |= 3 << 6;
4201 operands[2] = GEN_INT (mask);
4202
4203 return "%vpshufd\t{%2, %1, %0|%0, %1, %2}";
4204 }
4205 [(set_attr "type" "sselog1")
4206 (set_attr "prefix_data16" "1")
4207 (set_attr "length_immediate" "1")
4208 (set_attr "mode" "TI")])
4209
4210 (define_insn "*mmx_pblendw64"
4211 [(set (match_operand:V4HI 0 "register_operand" "=Yr,*x,x")
4212 (vec_merge:V4HI
4213 (match_operand:V4HI 2 "register_operand" "Yr,*x,x")
4214 (match_operand:V4HI 1 "register_operand" "0,0,x")
4215 (match_operand:SI 3 "const_0_to_15_operand")))]
4216 "TARGET_SSE4_1 && TARGET_MMX_WITH_SSE"
4217 "@
4218 pblendw\t{%3, %2, %0|%0, %2, %3}
4219 pblendw\t{%3, %2, %0|%0, %2, %3}
4220 vpblendw\t{%3, %2, %1, %0|%0, %1, %2, %3}"
4221 [(set_attr "isa" "noavx,noavx,avx")
4222 (set_attr "type" "ssemov")
4223 (set_attr "prefix_extra" "1")
4224 (set_attr "length_immediate" "1")
4225 (set_attr "prefix" "orig,orig,vex")
4226 (set_attr "mode" "TI")])
4227
4228 (define_insn "*mmx_pblendw32"
4229 [(set (match_operand:V2HI 0 "register_operand" "=Yr,*x,x")
4230 (vec_merge:V2HI
4231 (match_operand:V2HI 2 "register_operand" "Yr,*x,x")
4232 (match_operand:V2HI 1 "register_operand" "0,0,x")
4233 (match_operand:SI 3 "const_0_to_7_operand")))]
4234 "TARGET_SSE4_1"
4235 "@
4236 pblendw\t{%3, %2, %0|%0, %2, %3}
4237 pblendw\t{%3, %2, %0|%0, %2, %3}
4238 vpblendw\t{%3, %2, %1, %0|%0, %1, %2, %3}"
4239 [(set_attr "isa" "noavx,noavx,avx")
4240 (set_attr "type" "ssemov")
4241 (set_attr "prefix_extra" "1")
4242 (set_attr "length_immediate" "1")
4243 (set_attr "prefix" "orig,orig,vex")
4244 (set_attr "mode" "TI")])
4245
4246 ;; Optimize V2SImode load from memory, swapping the elements and
4247 ;; storing back into the memory into DImode rotate of the memory by 32.
4248 (define_split
4249 [(set (match_operand:V2SI 0 "memory_operand")
4250 (vec_select:V2SI (match_dup 0)
4251 (parallel [(const_int 1) (const_int 0)])))]
4252 "TARGET_64BIT && (TARGET_READ_MODIFY_WRITE || optimize_insn_for_size_p ())"
4253 [(set (match_dup 0)
4254 (rotate:DI (match_dup 0) (const_int 32)))]
4255 "operands[0] = adjust_address (operands[0], DImode, 0);")
4256
4257 (define_insn "mmx_pswapdv2si2"
4258 [(set (match_operand:V2SI 0 "register_operand" "=y,Yv")
4259 (vec_select:V2SI
4260 (match_operand:V2SI 1 "register_mmxmem_operand" "ym,Yv")
4261 (parallel [(const_int 1) (const_int 0)])))]
4262 "TARGET_3DNOW_A"
4263 "@
4264 pswapd\t{%1, %0|%0, %1}
4265 %vpshufd\t{$0xe1, %1, %0|%0, %1, 0xe1}";
4266 [(set_attr "isa" "*,sse2")
4267 (set_attr "mmx_isa" "native,*")
4268 (set_attr "type" "mmxcvt,sselog1")
4269 (set_attr "prefix_extra" "1,*")
4270 (set_attr "length_immediate" "*,1")
4271 (set_attr "mode" "DI,TI")])
4272
4273 (define_insn "*vec_dupv4hi"
4274 [(set (match_operand:V4HI 0 "register_operand" "=y,Yw")
4275 (vec_duplicate:V4HI
4276 (truncate:HI
4277 (match_operand:SI 1 "register_operand" "0,Yw"))))]
4278 "(TARGET_MMX || TARGET_MMX_WITH_SSE)
4279 && (TARGET_SSE || TARGET_3DNOW_A)"
4280 "@
4281 pshufw\t{$0, %0, %0|%0, %0, 0}
4282 %vpshuflw\t{$0, %1, %0|%0, %1, 0}"
4283 [(set_attr "isa" "*,sse2")
4284 (set_attr "mmx_isa" "native,*")
4285 (set_attr "type" "mmxcvt,sselog1")
4286 (set_attr "length_immediate" "1")
4287 (set_attr "mode" "DI,TI")])
4288
4289
4290 (define_insn "*vec_dupv2si"
4291 [(set (match_operand:V2SI 0 "register_operand" "=y,Yv")
4292 (vec_duplicate:V2SI
4293 (match_operand:SI 1 "register_operand" "0,Yv")))]
4294 "TARGET_MMX || TARGET_MMX_WITH_SSE"
4295 "@
4296 punpckldq\t%0, %0
4297 %vpshufd\t{$0xe0, %1, %0|%0, %1, 0xe0}"
4298 [(set_attr "isa" "*,sse2")
4299 (set_attr "mmx_isa" "native,*")
4300 (set_attr "type" "mmxcvt,sselog1")
4301 (set_attr "prefix_data16" "*,1")
4302 (set_attr "length_immediate" "*,1")
4303 (set_attr "mode" "DI,TI")])
4304
4305 (define_insn "*mmx_concatv2si"
4306 [(set (match_operand:V2SI 0 "register_operand" "=y,y")
4307 (vec_concat:V2SI
4308 (match_operand:SI 1 "nonimmediate_operand" " 0,rm")
4309 (match_operand:SI 2 "nonimm_or_0_operand" "ym,C")))]
4310 "TARGET_MMX && !TARGET_SSE"
4311 "@
4312 punpckldq\t{%2, %0|%0, %2}
4313 movd\t{%1, %0|%0, %1}"
4314 [(set_attr "type" "mmxcvt,mmxmov")
4315 (set_attr "mode" "DI")])
4316
4317 (define_expand "vec_setv2si"
4318 [(match_operand:V2SI 0 "register_operand")
4319 (match_operand:SI 1 "register_operand")
4320 (match_operand 2 "vec_setm_mmx_operand")]
4321 "TARGET_MMX || TARGET_MMX_WITH_SSE"
4322 {
4323 if (CONST_INT_P (operands[2]))
4324 ix86_expand_vector_set (TARGET_MMX_WITH_SSE, operands[0], operands[1],
4325 INTVAL (operands[2]));
4326 else
4327 ix86_expand_vector_set_var (operands[0], operands[1], operands[2]);
4328 DONE;
4329 })
4330
4331 ;; Avoid combining registers from different units in a single alternative,
4332 ;; see comment above inline_secondary_memory_needed function in i386.cc
4333 (define_insn_and_split "*vec_extractv2si_0"
4334 [(set (match_operand:SI 0 "nonimmediate_operand" "=x,m,y, m,r,r")
4335 (vec_select:SI
4336 (match_operand:V2SI 1 "nonimmediate_operand" "xm,x,ym,y,m,x")
4337 (parallel [(const_int 0)])))]
4338 "(TARGET_MMX || TARGET_MMX_WITH_SSE)
4339 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4340 "#"
4341 "&& reload_completed"
4342 [(set (match_dup 0) (match_dup 1))]
4343 "operands[1] = gen_lowpart (SImode, operands[1]);"
4344 [(set_attr "isa" "*,*,*,*,*,sse2")
4345 (set_attr "mmx_isa" "*,*,native,native,*,*")
4346 (set (attr "preferred_for_speed")
4347 (cond [(eq_attr "alternative" "5")
4348 (symbol_ref "TARGET_INTER_UNIT_MOVES_FROM_VEC")
4349 ]
4350 (symbol_ref "true")))])
4351
4352 (define_insn "*vec_extractv2si_0_zext_sse4"
4353 [(set (match_operand:DI 0 "register_operand" "=r,x")
4354 (zero_extend:DI
4355 (vec_select:SI
4356 (match_operand:V2SI 1 "register_operand" "x,x")
4357 (parallel [(const_int 0)]))))]
4358 "(TARGET_MMX || TARGET_MMX_WITH_SSE) && TARGET_SSE4_1"
4359 "#"
4360 [(set_attr "isa" "x64,*")
4361 (set (attr "preferred_for_speed")
4362 (cond [(eq_attr "alternative" "0")
4363 (symbol_ref "TARGET_INTER_UNIT_MOVES_FROM_VEC")
4364 ]
4365 (symbol_ref "true")))])
4366
4367 (define_insn "*vec_extractv2si_0_zext"
4368 [(set (match_operand:DI 0 "register_operand" "=r")
4369 (zero_extend:DI
4370 (vec_select:SI
4371 (match_operand:V2SI 1 "register_operand" "x")
4372 (parallel [(const_int 0)]))))]
4373 "(TARGET_MMX || TARGET_MMX_WITH_SSE)
4374 && TARGET_64BIT && TARGET_SSE2 && TARGET_INTER_UNIT_MOVES_FROM_VEC"
4375 "#")
4376
4377 (define_split
4378 [(set (match_operand:DI 0 "register_operand")
4379 (zero_extend:DI
4380 (vec_select:SI
4381 (match_operand:V2SI 1 "register_operand")
4382 (parallel [(const_int 0)]))))]
4383 "(TARGET_MMX || TARGET_MMX_WITH_SSE)
4384 && TARGET_SSE2 && reload_completed"
4385 [(set (match_dup 0) (zero_extend:DI (match_dup 1)))]
4386 "operands[1] = gen_lowpart (SImode, operands[1]);")
4387
4388 ;; Avoid combining registers from different units in a single alternative,
4389 ;; see comment above inline_secondary_memory_needed function in i386.cc
4390 (define_insn "*vec_extractv2si_1"
4391 [(set (match_operand:SI 0 "nonimmediate_operand" "=y,rm,x,x,y,x,r")
4392 (vec_select:SI
4393 (match_operand:V2SI 1 "nonimmediate_operand" " 0,x ,x,0,o,o,o")
4394 (parallel [(const_int 1)])))]
4395 "(TARGET_MMX || TARGET_MMX_WITH_SSE)
4396 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4397 "@
4398 punpckhdq\t%0, %0
4399 %vpextrd\t{$1, %1, %0|%0, %1, 1}
4400 %vpshufd\t{$0xe5, %1, %0|%0, %1, 0xe5}
4401 shufps\t{$0xe5, %0, %0|%0, %0, 0xe5}
4402 #
4403 #
4404 #"
4405 [(set_attr "isa" "*,sse4,sse2,noavx,*,*,*")
4406 (set_attr "mmx_isa" "native,*,*,*,native,*,*")
4407 (set_attr "type" "mmxcvt,ssemov,sseshuf1,sseshuf1,mmxmov,ssemov,imov")
4408 (set (attr "length_immediate")
4409 (if_then_else (eq_attr "alternative" "1,2,3")
4410 (const_string "1")
4411 (const_string "*")))
4412 (set_attr "prefix" "orig,maybe_vex,maybe_vex,orig,orig,orig,orig")
4413 (set_attr "mode" "DI,TI,TI,V4SF,SI,SI,SI")])
4414
4415 (define_split
4416 [(set (match_operand:SI 0 "register_operand")
4417 (vec_select:SI
4418 (match_operand:V2SI 1 "memory_operand")
4419 (parallel [(const_int 1)])))]
4420 "(TARGET_MMX || TARGET_MMX_WITH_SSE) && reload_completed"
4421 [(set (match_dup 0) (match_dup 1))]
4422 "operands[1] = adjust_address (operands[1], SImode, 4);")
4423
4424 (define_insn "*vec_extractv2si_1_zext"
4425 [(set (match_operand:DI 0 "register_operand" "=r")
4426 (zero_extend:DI
4427 (vec_select:SI
4428 (match_operand:V2SI 1 "register_operand" "x")
4429 (parallel [(const_int 1)]))))]
4430 "(TARGET_MMX || TARGET_MMX_WITH_SSE)
4431 && TARGET_64BIT && TARGET_SSE4_1"
4432 "%vpextrd\t{$1, %1, %k0|%k0, %1, 1}"
4433 [(set_attr "type" "sselog1")
4434 (set_attr "prefix_extra" "1")
4435 (set_attr "length_immediate" "1")
4436 (set_attr "prefix" "maybe_vex")
4437 (set_attr "mode" "TI")])
4438
4439 (define_insn_and_split "*vec_extractv2si_zext_mem"
4440 [(set (match_operand:DI 0 "register_operand" "=y,x,r")
4441 (zero_extend:DI
4442 (vec_select:SI
4443 (match_operand:V2SI 1 "memory_operand" "o,o,o")
4444 (parallel [(match_operand:SI 2 "const_0_to_1_operand")]))))]
4445 "(TARGET_MMX || TARGET_MMX_WITH_SSE) && TARGET_64BIT"
4446 "#"
4447 "&& reload_completed"
4448 [(set (match_dup 0) (zero_extend:DI (match_dup 1)))]
4449 {
4450 operands[1] = adjust_address (operands[1], SImode, INTVAL (operands[2]) * 4);
4451 }
4452 [(set_attr "isa" "*,sse2,*")
4453 (set_attr "mmx_isa" "native,*,*")])
4454
4455 (define_expand "vec_extractv2sisi"
4456 [(match_operand:SI 0 "register_operand")
4457 (match_operand:V2SI 1 "register_operand")
4458 (match_operand 2 "const_int_operand")]
4459 "TARGET_MMX || TARGET_MMX_WITH_SSE"
4460 {
4461 ix86_expand_vector_extract (TARGET_MMX_WITH_SSE, operands[0],
4462 operands[1], INTVAL (operands[2]));
4463 DONE;
4464 })
4465
4466 (define_expand "vec_initv2sisi"
4467 [(match_operand:V2SI 0 "register_operand")
4468 (match_operand 1)]
4469 "(TARGET_MMX || TARGET_MMX_WITH_SSE) && TARGET_SSE"
4470 {
4471 ix86_expand_vector_init (TARGET_MMX_WITH_SSE, operands[0],
4472 operands[1]);
4473 DONE;
4474 })
4475
4476 (define_expand "vec_setv4hi"
4477 [(match_operand:V4HI 0 "register_operand")
4478 (match_operand:HI 1 "register_operand")
4479 (match_operand 2 "vec_setm_mmx_operand")]
4480 "TARGET_MMX || TARGET_MMX_WITH_SSE"
4481 {
4482 if (CONST_INT_P (operands[2]))
4483 ix86_expand_vector_set (TARGET_MMX_WITH_SSE, operands[0], operands[1],
4484 INTVAL (operands[2]));
4485 else
4486 ix86_expand_vector_set_var (operands[0], operands[1], operands[2]);
4487 DONE;
4488 })
4489
4490 (define_expand "vec_extractv4hihi"
4491 [(match_operand:HI 0 "register_operand")
4492 (match_operand:V4HI 1 "register_operand")
4493 (match_operand 2 "const_int_operand")]
4494 "TARGET_MMX || TARGET_MMX_WITH_SSE"
4495 {
4496 ix86_expand_vector_extract (TARGET_MMX_WITH_SSE, operands[0],
4497 operands[1], INTVAL (operands[2]));
4498 DONE;
4499 })
4500
4501 (define_expand "vec_initv4hihi"
4502 [(match_operand:V4HI 0 "register_operand")
4503 (match_operand 1)]
4504 "(TARGET_MMX || TARGET_MMX_WITH_SSE) && TARGET_SSE"
4505 {
4506 ix86_expand_vector_init (TARGET_MMX_WITH_SSE, operands[0],
4507 operands[1]);
4508 DONE;
4509 })
4510
4511 (define_expand "vec_setv8qi"
4512 [(match_operand:V8QI 0 "register_operand")
4513 (match_operand:QI 1 "register_operand")
4514 (match_operand 2 "vec_setm_mmx_operand")]
4515 "TARGET_SSE4_1 && TARGET_MMX_WITH_SSE"
4516 {
4517 if (CONST_INT_P (operands[2]))
4518 ix86_expand_vector_set (TARGET_MMX_WITH_SSE, operands[0], operands[1],
4519 INTVAL (operands[2]));
4520 else
4521 ix86_expand_vector_set_var (operands[0], operands[1], operands[2]);
4522 DONE;
4523 })
4524
4525 (define_expand "vec_extractv8qiqi"
4526 [(match_operand:QI 0 "register_operand")
4527 (match_operand:V8QI 1 "register_operand")
4528 (match_operand 2 "const_int_operand")]
4529 "TARGET_SSE4_1 && TARGET_MMX_WITH_SSE"
4530 {
4531 ix86_expand_vector_extract (TARGET_MMX_WITH_SSE, operands[0],
4532 operands[1], INTVAL (operands[2]));
4533 DONE;
4534 })
4535
4536 (define_expand "vec_initv8qiqi"
4537 [(match_operand:V8QI 0 "register_operand")
4538 (match_operand 1)]
4539 "(TARGET_MMX || TARGET_MMX_WITH_SSE) && TARGET_SSE"
4540 {
4541 ix86_expand_vector_init (TARGET_MMX_WITH_SSE, operands[0],
4542 operands[1]);
4543 DONE;
4544 })
4545
4546 (define_insn "*pinsrw"
4547 [(set (match_operand:V2HI 0 "register_operand" "=x,YW")
4548 (vec_merge:V2HI
4549 (vec_duplicate:V2HI
4550 (match_operand:HI 2 "nonimmediate_operand" "rm,rm"))
4551 (match_operand:V2HI 1 "register_operand" "0,YW")
4552 (match_operand:SI 3 "const_int_operand")))]
4553 "TARGET_SSE2
4554 && ((unsigned) exact_log2 (INTVAL (operands[3]))
4555 < GET_MODE_NUNITS (V2HImode))"
4556 {
4557 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
4558 switch (which_alternative)
4559 {
4560 case 1:
4561 if (MEM_P (operands[2]))
4562 return "vpinsrw\t{%3, %2, %1, %0|%0, %1, %2, %3}";
4563 else
4564 return "vpinsrw\t{%3, %k2, %1, %0|%0, %1, %k2, %3}";
4565 case 0:
4566 if (MEM_P (operands[2]))
4567 return "pinsrw\t{%3, %2, %0|%0, %2, %3}";
4568 else
4569 return "pinsrw\t{%3, %k2, %0|%0, %k2, %3}";
4570 default:
4571 gcc_unreachable ();
4572 }
4573 }
4574 [(set_attr "isa" "noavx,avx")
4575 (set_attr "type" "sselog")
4576 (set_attr "length_immediate" "1")
4577 (set_attr "mode" "TI")])
4578
4579 (define_insn "*pinsrb"
4580 [(set (match_operand:V4QI 0 "register_operand" "=x,YW")
4581 (vec_merge:V4QI
4582 (vec_duplicate:V4QI
4583 (match_operand:QI 2 "nonimmediate_operand" "rm,rm"))
4584 (match_operand:V4QI 1 "register_operand" "0,YW")
4585 (match_operand:SI 3 "const_int_operand")))]
4586 "TARGET_SSE4_1
4587 && ((unsigned) exact_log2 (INTVAL (operands[3]))
4588 < GET_MODE_NUNITS (V4QImode))"
4589 {
4590 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
4591 switch (which_alternative)
4592 {
4593 case 1:
4594 if (MEM_P (operands[2]))
4595 return "vpinsrb\t{%3, %2, %1, %0|%0, %1, %2, %3}";
4596 else
4597 return "vpinsrb\t{%3, %k2, %1, %0|%0, %1, %k2, %3}";
4598 case 0:
4599 if (MEM_P (operands[2]))
4600 return "pinsrb\t{%3, %2, %0|%0, %2, %3}";
4601 else
4602 return "pinsrb\t{%3, %k2, %0|%0, %k2, %3}";
4603 default:
4604 gcc_unreachable ();
4605 }
4606 }
4607 [(set_attr "isa" "noavx,avx")
4608 (set_attr "type" "sselog")
4609 (set_attr "prefix_extra" "1")
4610 (set_attr "length_immediate" "1")
4611 (set_attr "prefix" "orig,vex")
4612 (set_attr "mode" "TI")])
4613
4614 (define_insn "*pextrw"
4615 [(set (match_operand:HI 0 "register_sse4nonimm_operand" "=r,m")
4616 (vec_select:HI
4617 (match_operand:V2HI 1 "register_operand" "YW,YW")
4618 (parallel [(match_operand:SI 2 "const_0_to_1_operand")])))]
4619 "TARGET_SSE2"
4620 "@
4621 %vpextrw\t{%2, %1, %k0|%k0, %1, %2}
4622 %vpextrw\t{%2, %1, %0|%0, %1, %2}"
4623 [(set_attr "isa" "*,sse4")
4624 (set_attr "type" "sselog1")
4625 (set_attr "length_immediate" "1")
4626 (set_attr "prefix" "maybe_vex")
4627 (set_attr "mode" "TI")])
4628
4629 (define_insn "*pextrw_zext"
4630 [(set (match_operand:SWI48 0 "register_operand" "=r")
4631 (zero_extend:SWI48
4632 (vec_select:HI
4633 (match_operand:V2HI 1 "register_operand" "YW")
4634 (parallel [(match_operand:SI 2 "const_0_to_1_operand")]))))]
4635 "TARGET_SSE2"
4636 "%vpextrw\t{%2, %1, %k0|%k0, %1, %2}"
4637 [(set_attr "type" "sselog1")
4638 (set_attr "length_immediate" "1")
4639 (set_attr "prefix" "maybe_vex")
4640 (set_attr "mode" "TI")])
4641
4642 (define_insn "*pextrb"
4643 [(set (match_operand:QI 0 "nonimmediate_operand" "=r,m")
4644 (vec_select:QI
4645 (match_operand:V4QI 1 "register_operand" "YW,YW")
4646 (parallel [(match_operand:SI 2 "const_0_to_3_operand")])))]
4647 "TARGET_SSE4_1"
4648 "@
4649 %vpextrb\t{%2, %1, %k0|%k0, %1, %2}
4650 %vpextrb\t{%2, %1, %0|%0, %1, %2}"
4651 [(set_attr "type" "sselog1")
4652 (set_attr "prefix_extra" "1")
4653 (set_attr "length_immediate" "1")
4654 (set_attr "prefix" "maybe_vex")
4655 (set_attr "mode" "TI")])
4656
4657 (define_insn "*pextrb_zext"
4658 [(set (match_operand:SWI248 0 "register_operand" "=r")
4659 (zero_extend:SWI248
4660 (vec_select:QI
4661 (match_operand:V4QI 1 "register_operand" "YW")
4662 (parallel [(match_operand:SI 2 "const_0_to_3_operand")]))))]
4663 "TARGET_SSE4_1"
4664 "%vpextrb\t{%2, %1, %k0|%k0, %1, %2}"
4665 [(set_attr "type" "sselog1")
4666 (set_attr "prefix_extra" "1")
4667 (set_attr "length_immediate" "1")
4668 (set_attr "prefix" "maybe_vex")
4669 (set_attr "mode" "TI")])
4670
4671 (define_expand "vec_setv2hi"
4672 [(match_operand:V2HI 0 "register_operand")
4673 (match_operand:HI 1 "register_operand")
4674 (match_operand 2 "vec_setm_sse41_operand")]
4675 "TARGET_SSE2"
4676 {
4677 if (CONST_INT_P (operands[2]))
4678 ix86_expand_vector_set (false, operands[0], operands[1],
4679 INTVAL (operands[2]));
4680 else
4681 ix86_expand_vector_set_var (operands[0], operands[1], operands[2]);
4682 DONE;
4683 })
4684
4685 (define_expand "vec_extractv2hihi"
4686 [(match_operand:HI 0 "register_operand")
4687 (match_operand:V2HI 1 "register_operand")
4688 (match_operand 2 "const_int_operand")]
4689 "TARGET_SSE2"
4690 {
4691 ix86_expand_vector_extract (false, operands[0],
4692 operands[1], INTVAL (operands[2]));
4693 DONE;
4694 })
4695
4696 (define_expand "vec_setv4qi"
4697 [(match_operand:V4QI 0 "register_operand")
4698 (match_operand:QI 1 "register_operand")
4699 (match_operand 2 "vec_setm_mmx_operand")]
4700 "TARGET_SSE4_1"
4701 {
4702 if (CONST_INT_P (operands[2]))
4703 ix86_expand_vector_set (false, operands[0], operands[1],
4704 INTVAL (operands[2]));
4705 else
4706 ix86_expand_vector_set_var (operands[0], operands[1], operands[2]);
4707 DONE;
4708 })
4709
4710 (define_expand "vec_extractv4qiqi"
4711 [(match_operand:QI 0 "register_operand")
4712 (match_operand:V4QI 1 "register_operand")
4713 (match_operand 2 "const_int_operand")]
4714 "TARGET_SSE4_1"
4715 {
4716 ix86_expand_vector_extract (false, operands[0],
4717 operands[1], INTVAL (operands[2]));
4718 DONE;
4719 })
4720
4721 (define_insn_and_split "*punpckwd"
4722 [(set (match_operand:V2HI 0 "register_operand" "=x,Yw")
4723 (vec_select:V2HI
4724 (vec_concat:V4HI
4725 (match_operand:V2HI 1 "register_operand" "0,Yw")
4726 (match_operand:V2HI 2 "register_operand" "x,Yw"))
4727 (parallel [(match_operand 3 "const_0_to_3_operand")
4728 (match_operand 4 "const_0_to_3_operand")])))]
4729 "TARGET_SSE2"
4730 "#"
4731 "&& reload_completed"
4732 [(set (match_dup 5)
4733 (vec_select:V8HI
4734 (match_dup 5)
4735 (parallel [(match_dup 3) (match_dup 4)
4736 (const_int 2) (const_int 3)
4737 (const_int 4) (const_int 5)
4738 (const_int 6) (const_int 7)])))]
4739 {
4740 rtx dest = lowpart_subreg (V8HImode, operands[0], V2HImode);
4741 rtx op1 = lowpart_subreg (V8HImode, operands[1], V2HImode);
4742 rtx op2 = lowpart_subreg (V8HImode, operands[2], V2HImode);
4743
4744 emit_insn (gen_vec_interleave_lowv8hi (dest, op1, op2));
4745
4746 static const int map[4] = { 0, 2, 1, 3 };
4747
4748 int sel0 = map[INTVAL (operands[3])];
4749 int sel1 = map[INTVAL (operands[4])];
4750
4751 if (sel0 == 0 && sel1 == 1)
4752 DONE;
4753
4754 operands[3] = GEN_INT (sel0);
4755 operands[4] = GEN_INT (sel1);
4756 operands[5] = dest;
4757 }
4758 [(set_attr "isa" "noavx,avx")
4759 (set_attr "type" "sselog")
4760 (set_attr "mode" "TI")])
4761
4762 (define_insn "*pshufw_1"
4763 [(set (match_operand:V2HI 0 "register_operand" "=Yw")
4764 (vec_select:V2HI
4765 (match_operand:V2HI 1 "register_operand" "Yw")
4766 (parallel [(match_operand 2 "const_0_to_1_operand")
4767 (match_operand 3 "const_0_to_1_operand")])))]
4768 "TARGET_SSE2"
4769 {
4770 int mask = 0;
4771 mask |= INTVAL (operands[2]) << 0;
4772 mask |= INTVAL (operands[3]) << 2;
4773 mask |= 2 << 4;
4774 mask |= 3 << 6;
4775 operands[2] = GEN_INT (mask);
4776
4777 return "%vpshuflw\t{%2, %1, %0|%0, %1, %2}";
4778 }
4779 [(set_attr "type" "sselog1")
4780 (set_attr "length_immediate" "1")
4781 (set_attr "mode" "TI")])
4782
4783 (define_insn "*vec_dupv2hi"
4784 [(set (match_operand:V2HI 0 "register_operand" "=Yw")
4785 (vec_duplicate:V2HI
4786 (truncate:HI
4787 (match_operand:SI 1 "register_operand" "Yw"))))]
4788 "TARGET_SSE2"
4789 "%vpshuflw\t{$0, %1, %0|%0, %1, 0}"
4790 [(set_attr "type" "sselog1")
4791 (set_attr "length_immediate" "1")
4792 (set_attr "mode" "TI")])
4793
4794 (define_expand "vec_initv2hihi"
4795 [(match_operand:V2HI 0 "register_operand")
4796 (match_operand 1)]
4797 "TARGET_SSE2"
4798 {
4799 ix86_expand_vector_init (TARGET_MMX_WITH_SSE, operands[0],
4800 operands[1]);
4801 DONE;
4802 })
4803
4804 (define_expand "vec_initv4qiqi"
4805 [(match_operand:V2HI 0 "register_operand")
4806 (match_operand 1)]
4807 "TARGET_SSE2"
4808 {
4809 ix86_expand_vector_init (TARGET_MMX_WITH_SSE, operands[0],
4810 operands[1]);
4811 DONE;
4812 })
4813
4814 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
4815 ;;
4816 ;; Miscellaneous
4817 ;;
4818 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
4819
4820 (define_expand "mmx_uavg<mode>3"
4821 [(set (match_operand:MMXMODE12 0 "register_operand")
4822 (truncate:MMXMODE12
4823 (lshiftrt:<mmxdoublemode>
4824 (plus:<mmxdoublemode>
4825 (plus:<mmxdoublemode>
4826 (zero_extend:<mmxdoublemode>
4827 (match_operand:MMXMODE12 1 "register_mmxmem_operand"))
4828 (zero_extend:<mmxdoublemode>
4829 (match_operand:MMXMODE12 2 "register_mmxmem_operand")))
4830 (match_dup 3))
4831 (const_int 1))))]
4832 "(TARGET_MMX || TARGET_MMX_WITH_SSE)
4833 && (TARGET_SSE || TARGET_3DNOW)"
4834 {
4835 operands[3] = CONST1_RTX(<mmxdoublemode>mode);
4836 ix86_fixup_binary_operands_no_copy (PLUS, <MODE>mode, operands);
4837 })
4838
4839 (define_insn "*mmx_uavgv8qi3"
4840 [(set (match_operand:V8QI 0 "register_operand" "=y,x,Yw")
4841 (truncate:V8QI
4842 (lshiftrt:V8HI
4843 (plus:V8HI
4844 (plus:V8HI
4845 (zero_extend:V8HI
4846 (match_operand:V8QI 1 "register_mmxmem_operand" "%0,0,Yw"))
4847 (zero_extend:V8HI
4848 (match_operand:V8QI 2 "register_mmxmem_operand" "ym,x,Yw")))
4849 (const_vector:V8HI [(const_int 1) (const_int 1)
4850 (const_int 1) (const_int 1)
4851 (const_int 1) (const_int 1)
4852 (const_int 1) (const_int 1)]))
4853 (const_int 1))))]
4854 "(TARGET_MMX || TARGET_MMX_WITH_SSE)
4855 && (TARGET_SSE || TARGET_3DNOW)
4856 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
4857 {
4858 switch (which_alternative)
4859 {
4860 case 2:
4861 return "vpavgb\t{%2, %1, %0|%0, %1, %2}";
4862 case 1:
4863 case 0:
4864 /* These two instructions have the same operation, but their encoding
4865 is different. Prefer the one that is de facto standard. */
4866 if (TARGET_SSE || TARGET_3DNOW_A)
4867 return "pavgb\t{%2, %0|%0, %2}";
4868 else
4869 return "pavgusb\t{%2, %0|%0, %2}";
4870 default:
4871 gcc_unreachable ();
4872 }
4873 }
4874 [(set_attr "isa" "*,sse2_noavx,avx")
4875 (set_attr "mmx_isa" "native,*,*")
4876 (set_attr "type" "mmxshft,sseiadd,sseiadd")
4877 (set (attr "prefix_extra")
4878 (if_then_else
4879 (not (ior (match_test "TARGET_SSE")
4880 (match_test "TARGET_3DNOW_A")))
4881 (const_string "1")
4882 (const_string "*")))
4883 (set_attr "mode" "DI,TI,TI")])
4884
4885 (define_insn "*mmx_uavgv4hi3"
4886 [(set (match_operand:V4HI 0 "register_operand" "=y,x,Yw")
4887 (truncate:V4HI
4888 (lshiftrt:V4SI
4889 (plus:V4SI
4890 (plus:V4SI
4891 (zero_extend:V4SI
4892 (match_operand:V4HI 1 "register_mmxmem_operand" "%0,0,Yw"))
4893 (zero_extend:V4SI
4894 (match_operand:V4HI 2 "register_mmxmem_operand" "ym,x,Yw")))
4895 (const_vector:V4SI [(const_int 1) (const_int 1)
4896 (const_int 1) (const_int 1)]))
4897 (const_int 1))))]
4898 "(TARGET_MMX || TARGET_MMX_WITH_SSE)
4899 && (TARGET_SSE || TARGET_3DNOW_A)
4900 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
4901 "@
4902 pavgw\t{%2, %0|%0, %2}
4903 pavgw\t{%2, %0|%0, %2}
4904 vpavgw\t{%2, %1, %0|%0, %1, %2}"
4905 [(set_attr "isa" "*,sse2_noavx,avx")
4906 (set_attr "mmx_isa" "native,*,*")
4907 (set_attr "type" "mmxshft,sseiadd,sseiadd")
4908 (set_attr "mode" "DI,TI,TI")])
4909
4910 (define_expand "uavg<mode>3_ceil"
4911 [(set (match_operand:MMXMODE12 0 "register_operand")
4912 (truncate:MMXMODE12
4913 (lshiftrt:<mmxdoublemode>
4914 (plus:<mmxdoublemode>
4915 (plus:<mmxdoublemode>
4916 (zero_extend:<mmxdoublemode>
4917 (match_operand:MMXMODE12 1 "register_operand"))
4918 (zero_extend:<mmxdoublemode>
4919 (match_operand:MMXMODE12 2 "register_operand")))
4920 (match_dup 3))
4921 (const_int 1))))]
4922 "TARGET_MMX_WITH_SSE"
4923 "operands[3] = CONST1_RTX(<mmxdoublemode>mode);")
4924
4925 (define_insn "uavgv4qi3_ceil"
4926 [(set (match_operand:V4QI 0 "register_operand" "=x,Yw")
4927 (truncate:V4QI
4928 (lshiftrt:V4HI
4929 (plus:V4HI
4930 (plus:V4HI
4931 (zero_extend:V4HI
4932 (match_operand:V4QI 1 "register_operand" "%0,Yw"))
4933 (zero_extend:V4HI
4934 (match_operand:V4QI 2 "register_operand" "x,Yw")))
4935 (const_vector:V4HI [(const_int 1) (const_int 1)
4936 (const_int 1) (const_int 1)]))
4937 (const_int 1))))]
4938 "TARGET_SSE2"
4939 "@
4940 pavgb\t{%2, %0|%0, %2}
4941 vpavgb\t{%2, %1, %0|%0, %1, %2}"
4942 [(set_attr "isa" "noavx,avx")
4943 (set_attr "type" "sseiadd")
4944 (set_attr "mode" "TI")])
4945
4946 (define_insn "uavgv2qi3_ceil"
4947 [(set (match_operand:V2QI 0 "register_operand" "=x,Yw")
4948 (truncate:V2QI
4949 (lshiftrt:V2HI
4950 (plus:V2HI
4951 (plus:V2HI
4952 (zero_extend:V2HI
4953 (match_operand:V2QI 1 "register_operand" "%0,Yw"))
4954 (zero_extend:V2HI
4955 (match_operand:V2QI 2 "register_operand" "x,Yw")))
4956 (const_vector:V2HI [(const_int 1) (const_int 1)]))
4957 (const_int 1))))]
4958 "TARGET_SSE2"
4959 "@
4960 pavgb\t{%2, %0|%0, %2}
4961 vpavgb\t{%2, %1, %0|%0, %1, %2}"
4962 [(set_attr "isa" "noavx,avx")
4963 (set_attr "type" "sseiadd")
4964 (set_attr "mode" "TI")])
4965
4966 (define_insn "uavgv2hi3_ceil"
4967 [(set (match_operand:V2HI 0 "register_operand" "=x,Yw")
4968 (truncate:V2HI
4969 (lshiftrt:V2SI
4970 (plus:V2SI
4971 (plus:V2SI
4972 (zero_extend:V2SI
4973 (match_operand:V2HI 1 "register_operand" "%0,Yw"))
4974 (zero_extend:V2SI
4975 (match_operand:V2HI 2 "register_operand" "x,Yw")))
4976 (const_vector:V2SI [(const_int 1) (const_int 1)]))
4977 (const_int 1))))]
4978 "TARGET_SSE2"
4979 "@
4980 pavgw\t{%2, %0|%0, %2}
4981 vpavgw\t{%2, %1, %0|%0, %1, %2}"
4982 [(set_attr "isa" "noavx,avx")
4983 (set_attr "type" "sseiadd")
4984 (set_attr "mode" "TI")])
4985
4986 (define_expand "mmx_psadbw"
4987 [(set (match_operand:V1DI 0 "register_operand")
4988 (unspec:V1DI [(match_operand:V8QI 1 "register_mmxmem_operand")
4989 (match_operand:V8QI 2 "register_mmxmem_operand")]
4990 UNSPEC_PSADBW))]
4991 "(TARGET_MMX || TARGET_MMX_WITH_SSE) && (TARGET_SSE || TARGET_3DNOW_A)"
4992 "ix86_fixup_binary_operands_no_copy (PLUS, V8QImode, operands);")
4993
4994 (define_insn "*mmx_psadbw"
4995 [(set (match_operand:V1DI 0 "register_operand" "=y,x,Yw")
4996 (unspec:V1DI [(match_operand:V8QI 1 "register_mmxmem_operand" "%0,0,Yw")
4997 (match_operand:V8QI 2 "register_mmxmem_operand" "ym,x,Yw")]
4998 UNSPEC_PSADBW))]
4999 "(TARGET_MMX || TARGET_MMX_WITH_SSE) && (TARGET_SSE || TARGET_3DNOW_A)
5000 && ix86_binary_operator_ok (PLUS, V8QImode, operands)"
5001 "@
5002 psadbw\t{%2, %0|%0, %2}
5003 psadbw\t{%2, %0|%0, %2}
5004 vpsadbw\t{%2, %1, %0|%0, %1, %2}"
5005 [(set_attr "isa" "*,sse2_noavx,avx")
5006 (set_attr "mmx_isa" "native,*,*")
5007 (set_attr "type" "mmxshft,sseiadd,sseiadd")
5008 (set_attr "mode" "DI,TI,TI")])
5009
5010 (define_expand "reduc_plus_scal_v8qi"
5011 [(plus:V8QI
5012 (match_operand:QI 0 "register_operand")
5013 (match_operand:V8QI 1 "register_operand"))]
5014 "TARGET_MMX_WITH_SSE"
5015 {
5016 rtx tmp = gen_reg_rtx (V8QImode);
5017 emit_move_insn (tmp, CONST0_RTX (V8QImode));
5018 rtx tmp2 = gen_reg_rtx (V1DImode);
5019 emit_insn (gen_mmx_psadbw (tmp2, operands[1], tmp));
5020 tmp2 = gen_lowpart (V8QImode, tmp2);
5021 emit_insn (gen_vec_extractv8qiqi (operands[0], tmp2, const0_rtx));
5022 DONE;
5023 })
5024
5025 (define_expand "reduc_plus_scal_v4hi"
5026 [(plus:V4HI
5027 (match_operand:HI 0 "register_operand")
5028 (match_operand:V4HI 1 "register_operand"))]
5029 "TARGET_MMX_WITH_SSE"
5030 {
5031 rtx tmp = gen_reg_rtx (V4HImode);
5032 ix86_expand_reduc (gen_addv4hi3, tmp, operands[1]);
5033 emit_insn (gen_vec_extractv4hihi (operands[0], tmp, const0_rtx));
5034 DONE;
5035 })
5036
5037 (define_expand "reduc_<code>_scal_v4hi"
5038 [(smaxmin:V4HI
5039 (match_operand:HI 0 "register_operand")
5040 (match_operand:V4HI 1 "register_operand"))]
5041 "TARGET_MMX_WITH_SSE"
5042 {
5043 rtx tmp = gen_reg_rtx (V4HImode);
5044 ix86_expand_reduc (gen_<code>v4hi3, tmp, operands[1]);
5045 emit_insn (gen_vec_extractv4hihi (operands[0], tmp, const0_rtx));
5046 DONE;
5047 })
5048
5049 (define_expand "reduc_<code>_scal_v4qi"
5050 [(smaxmin:V4QI
5051 (match_operand:QI 0 "register_operand")
5052 (match_operand:V4QI 1 "register_operand"))]
5053 "TARGET_SSE4_1"
5054 {
5055 rtx tmp = gen_reg_rtx (V4QImode);
5056 ix86_expand_reduc (gen_<code>v4qi3, tmp, operands[1]);
5057 emit_insn (gen_vec_extractv4qiqi (operands[0], tmp, const0_rtx));
5058 DONE;
5059 })
5060
5061 (define_expand "reduc_<code>_scal_v4hi"
5062 [(umaxmin:V4HI
5063 (match_operand:HI 0 "register_operand")
5064 (match_operand:V4HI 1 "register_operand"))]
5065 "TARGET_MMX_WITH_SSE && TARGET_SSE4_1"
5066 {
5067 rtx tmp = gen_reg_rtx (V4HImode);
5068 ix86_expand_reduc (gen_<code>v4hi3, tmp, operands[1]);
5069 emit_insn (gen_vec_extractv4hihi (operands[0], tmp, const0_rtx));
5070 DONE;
5071 })
5072
5073 (define_expand "reduc_<code>_scal_v4qi"
5074 [(umaxmin:V4QI
5075 (match_operand:QI 0 "register_operand")
5076 (match_operand:V4QI 1 "register_operand"))]
5077 "TARGET_SSE4_1"
5078 {
5079 rtx tmp = gen_reg_rtx (V4QImode);
5080 ix86_expand_reduc (gen_<code>v4qi3, tmp, operands[1]);
5081 emit_insn (gen_vec_extractv4qiqi (operands[0], tmp, const0_rtx));
5082 DONE;
5083 })
5084
5085 (define_expand "reduc_plus_scal_v4qi"
5086 [(plus:V4QI
5087 (match_operand:QI 0 "register_operand")
5088 (match_operand:V4QI 1 "register_operand"))]
5089 "TARGET_SSE2"
5090 {
5091 rtx op1 = gen_reg_rtx (V16QImode);
5092 emit_insn (gen_vec_setv4si_0 (lowpart_subreg (V4SImode, op1, V16QImode),
5093 CONST0_RTX (V4SImode),
5094 lowpart_subreg (SImode,
5095 operands[1],
5096 V4QImode)));
5097 rtx tmp = gen_reg_rtx (V16QImode);
5098 emit_move_insn (tmp, CONST0_RTX (V16QImode));
5099 rtx tmp2 = gen_reg_rtx (V2DImode);
5100 emit_insn (gen_sse2_psadbw (tmp2, op1, tmp));
5101 tmp2 = gen_lowpart (V16QImode, tmp2);
5102 emit_insn (gen_vec_extractv16qiqi (operands[0], tmp2, const0_rtx));
5103 DONE;
5104 })
5105
5106 (define_expand "usadv8qi"
5107 [(match_operand:V2SI 0 "register_operand")
5108 (match_operand:V8QI 1 "register_operand")
5109 (match_operand:V8QI 2 "register_operand")
5110 (match_operand:V2SI 3 "register_operand")]
5111 "TARGET_MMX_WITH_SSE"
5112 {
5113 rtx t1 = gen_reg_rtx (V1DImode);
5114 rtx t2 = gen_reg_rtx (V2SImode);
5115 emit_insn (gen_mmx_psadbw (t1, operands[1], operands[2]));
5116 convert_move (t2, t1, 0);
5117 emit_insn (gen_addv2si3 (operands[0], t2, operands[3]));
5118 DONE;
5119 })
5120
5121 (define_insn_and_split "mmx_pmovmskb"
5122 [(set (match_operand:SI 0 "register_operand" "=r,r")
5123 (unspec:SI [(match_operand:V8QI 1 "register_operand" "y,x")]
5124 UNSPEC_MOVMSK))]
5125 "(TARGET_MMX || TARGET_MMX_WITH_SSE)
5126 && (TARGET_SSE || TARGET_3DNOW_A)"
5127 "@
5128 pmovmskb\t{%1, %0|%0, %1}
5129 #"
5130 "TARGET_SSE2 && reload_completed
5131 && SSE_REGNO_P (REGNO (operands[1]))"
5132 [(set (match_dup 0)
5133 (unspec:SI [(match_dup 1)] UNSPEC_MOVMSK))
5134 (set (match_dup 0)
5135 (zero_extend:SI (match_dup 2)))]
5136 {
5137 /* Generate SSE pmovmskb and zero-extend from QImode to SImode. */
5138 operands[1] = lowpart_subreg (V16QImode, operands[1],
5139 GET_MODE (operands[1]));
5140 operands[2] = lowpart_subreg (QImode, operands[0],
5141 GET_MODE (operands[0]));
5142 }
5143 [(set_attr "mmx_isa" "native,sse")
5144 (set_attr "type" "mmxcvt,ssemov")
5145 (set_attr "mode" "DI,TI")])
5146
5147 (define_expand "mmx_maskmovq"
5148 [(set (match_operand:V8QI 0 "memory_operand")
5149 (unspec:V8QI [(match_operand:V8QI 1 "register_operand")
5150 (match_operand:V8QI 2 "register_operand")
5151 (match_dup 0)]
5152 UNSPEC_MASKMOV))]
5153 "TARGET_SSE || TARGET_3DNOW_A")
5154
5155 (define_insn "*mmx_maskmovq"
5156 [(set (mem:V8QI (match_operand:P 0 "register_operand" "D"))
5157 (unspec:V8QI [(match_operand:V8QI 1 "register_operand" "y")
5158 (match_operand:V8QI 2 "register_operand" "y")
5159 (mem:V8QI (match_dup 0))]
5160 UNSPEC_MASKMOV))]
5161 "TARGET_SSE || TARGET_3DNOW_A"
5162 ;; @@@ check ordering of operands in intel/nonintel syntax
5163 "maskmovq\t{%2, %1|%1, %2}"
5164 [(set_attr "type" "mmxcvt")
5165 (set_attr "znver1_decode" "vector")
5166 (set_attr "mode" "DI")])
5167
5168 (define_int_iterator EMMS
5169 [(UNSPECV_EMMS "TARGET_MMX")
5170 (UNSPECV_FEMMS "TARGET_3DNOW")])
5171
5172 (define_int_attr emms
5173 [(UNSPECV_EMMS "emms")
5174 (UNSPECV_FEMMS "femms")])
5175
5176 (define_expand "mmx_<emms>"
5177 [(parallel
5178 [(unspec_volatile [(const_int 0)] EMMS)
5179 (clobber (reg:XF ST0_REG))
5180 (clobber (reg:XF ST1_REG))
5181 (clobber (reg:XF ST2_REG))
5182 (clobber (reg:XF ST3_REG))
5183 (clobber (reg:XF ST4_REG))
5184 (clobber (reg:XF ST5_REG))
5185 (clobber (reg:XF ST6_REG))
5186 (clobber (reg:XF ST7_REG))
5187 (clobber (reg:DI MM0_REG))
5188 (clobber (reg:DI MM1_REG))
5189 (clobber (reg:DI MM2_REG))
5190 (clobber (reg:DI MM3_REG))
5191 (clobber (reg:DI MM4_REG))
5192 (clobber (reg:DI MM5_REG))
5193 (clobber (reg:DI MM6_REG))
5194 (clobber (reg:DI MM7_REG))])]
5195 "TARGET_MMX || TARGET_MMX_WITH_SSE"
5196 {
5197 if (!TARGET_MMX)
5198 {
5199 emit_insn (gen_nop ());
5200 DONE;
5201 }
5202 })
5203
5204 (define_insn "*mmx_<emms>"
5205 [(unspec_volatile [(const_int 0)] EMMS)
5206 (clobber (reg:XF ST0_REG))
5207 (clobber (reg:XF ST1_REG))
5208 (clobber (reg:XF ST2_REG))
5209 (clobber (reg:XF ST3_REG))
5210 (clobber (reg:XF ST4_REG))
5211 (clobber (reg:XF ST5_REG))
5212 (clobber (reg:XF ST6_REG))
5213 (clobber (reg:XF ST7_REG))
5214 (clobber (reg:DI MM0_REG))
5215 (clobber (reg:DI MM1_REG))
5216 (clobber (reg:DI MM2_REG))
5217 (clobber (reg:DI MM3_REG))
5218 (clobber (reg:DI MM4_REG))
5219 (clobber (reg:DI MM5_REG))
5220 (clobber (reg:DI MM6_REG))
5221 (clobber (reg:DI MM7_REG))]
5222 ""
5223 "<emms>"
5224 [(set_attr "type" "mmx")
5225 (set_attr "modrm" "0")
5226 (set_attr "memory" "none")])