]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/config/i386/mmx.md
i386: Emulate MMX umulv1siv1di3 with SSE2
[thirdparty/gcc.git] / gcc / config / i386 / mmx.md
1 ;; GCC machine description for MMX and 3dNOW! instructions
2 ;; Copyright (C) 2005-2019 Free Software Foundation, Inc.
3 ;;
4 ;; This file is part of GCC.
5 ;;
6 ;; GCC is free software; you can redistribute it and/or modify
7 ;; it under the terms of the GNU General Public License as published by
8 ;; the Free Software Foundation; either version 3, or (at your option)
9 ;; any later version.
10 ;;
11 ;; GCC is distributed in the hope that it will be useful,
12 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
13 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 ;; GNU General Public License for more details.
15 ;;
16 ;; You should have received a copy of the GNU General Public License
17 ;; along with GCC; see the file COPYING3. If not see
18 ;; <http://www.gnu.org/licenses/>.
19
20 ;; The MMX and 3dNOW! patterns are in the same file because they use
21 ;; the same register file, and 3dNOW! adds a number of extensions to
22 ;; the base integer MMX isa.
23
24 ;; Note! Except for the basic move instructions, *all* of these
25 ;; patterns are outside the normal optabs namespace. This is because
26 ;; use of these registers requires the insertion of emms or femms
27 ;; instructions to return to normal fpu mode. The compiler doesn't
28 ;; know how to do that itself, which means it's up to the user. Which
29 ;; means that we should never use any of these patterns except at the
30 ;; direction of the user via a builtin.
31
32 (define_c_enum "unspec" [
33 UNSPEC_MOVNTQ
34 UNSPEC_PFRCP
35 UNSPEC_PFRCPIT1
36 UNSPEC_PFRCPIT2
37 UNSPEC_PFRSQRT
38 UNSPEC_PFRSQIT1
39 ])
40
41 (define_c_enum "unspecv" [
42 UNSPECV_EMMS
43 UNSPECV_FEMMS
44 ])
45
46 ;; 8 byte integral modes handled by MMX (and by extension, SSE)
47 (define_mode_iterator MMXMODEI [V8QI V4HI V2SI])
48 (define_mode_iterator MMXMODEI8 [V8QI V4HI V2SI (V1DI "TARGET_SSE2")])
49
50 ;; All 8-byte vector modes handled by MMX
51 (define_mode_iterator MMXMODE [V8QI V4HI V2SI V1DI V2SF])
52
53 ;; Mix-n-match
54 (define_mode_iterator MMXMODE12 [V8QI V4HI])
55 (define_mode_iterator MMXMODE24 [V4HI V2SI])
56 (define_mode_iterator MMXMODE248 [V4HI V2SI V1DI])
57
58 ;; Mapping from integer vector mode to mnemonic suffix
59 (define_mode_attr mmxvecsize [(V8QI "b") (V4HI "w") (V2SI "d") (V1DI "q")])
60
61 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
62 ;;
63 ;; Move patterns
64 ;;
65 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
66
67 ;; All of these patterns are enabled for MMX as well as 3dNOW.
68 ;; This is essential for maintaining stable calling conventions.
69
70 (define_expand "mov<mode>"
71 [(set (match_operand:MMXMODE 0 "nonimmediate_operand")
72 (match_operand:MMXMODE 1 "nonimmediate_operand"))]
73 "TARGET_MMX"
74 {
75 ix86_expand_vector_move (<MODE>mode, operands);
76 DONE;
77 })
78
79 (define_insn "*mov<mode>_internal"
80 [(set (match_operand:MMXMODE 0 "nonimmediate_operand"
81 "=r ,o ,r,r ,m ,?!y,!y,?!y,m ,r ,?!y,v,v,v,m,r,v,!y,*x")
82 (match_operand:MMXMODE 1 "nonimm_or_0_operand"
83 "rCo,rC,C,rm,rC,C ,!y,m ,?!y,?!y,r ,C,v,m,v,v,r,*x,!y"))]
84 "TARGET_MMX
85 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
86 {
87 switch (get_attr_type (insn))
88 {
89 case TYPE_MULTI:
90 return "#";
91
92 case TYPE_IMOV:
93 if (get_attr_mode (insn) == MODE_SI)
94 return "mov{l}\t{%1, %k0|%k0, %1}";
95 else
96 return "mov{q}\t{%1, %0|%0, %1}";
97
98 case TYPE_MMX:
99 return "pxor\t%0, %0";
100
101 case TYPE_MMXMOV:
102 /* Handle broken assemblers that require movd instead of movq. */
103 if (!HAVE_AS_IX86_INTERUNIT_MOVQ
104 && (GENERAL_REG_P (operands[0]) || GENERAL_REG_P (operands[1])))
105 return "movd\t{%1, %0|%0, %1}";
106 return "movq\t{%1, %0|%0, %1}";
107
108 case TYPE_SSECVT:
109 if (SSE_REG_P (operands[0]))
110 return "movq2dq\t{%1, %0|%0, %1}";
111 else
112 return "movdq2q\t{%1, %0|%0, %1}";
113
114 case TYPE_SSELOG1:
115 return standard_sse_constant_opcode (insn, operands);
116
117 case TYPE_SSEMOV:
118 switch (get_attr_mode (insn))
119 {
120 case MODE_DI:
121 /* Handle broken assemblers that require movd instead of movq. */
122 if (!HAVE_AS_IX86_INTERUNIT_MOVQ
123 && (GENERAL_REG_P (operands[0]) || GENERAL_REG_P (operands[1])))
124 return "%vmovd\t{%1, %0|%0, %1}";
125 return "%vmovq\t{%1, %0|%0, %1}";
126 case MODE_TI:
127 return "%vmovdqa\t{%1, %0|%0, %1}";
128 case MODE_XI:
129 return "vmovdqa64\t{%g1, %g0|%g0, %g1}";
130
131 case MODE_V2SF:
132 if (TARGET_AVX && REG_P (operands[0]))
133 return "vmovlps\t{%1, %0, %0|%0, %0, %1}";
134 return "%vmovlps\t{%1, %0|%0, %1}";
135 case MODE_V4SF:
136 return "%vmovaps\t{%1, %0|%0, %1}";
137
138 default:
139 gcc_unreachable ();
140 }
141
142 default:
143 gcc_unreachable ();
144 }
145 }
146 [(set (attr "isa")
147 (cond [(eq_attr "alternative" "0,1")
148 (const_string "nox64")
149 (eq_attr "alternative" "2,3,4,9,10")
150 (const_string "x64")
151 (eq_attr "alternative" "15,16")
152 (const_string "x64_sse2")
153 (eq_attr "alternative" "17,18")
154 (const_string "sse2")
155 ]
156 (const_string "*")))
157 (set (attr "type")
158 (cond [(eq_attr "alternative" "0,1")
159 (const_string "multi")
160 (eq_attr "alternative" "2,3,4")
161 (const_string "imov")
162 (eq_attr "alternative" "5")
163 (const_string "mmx")
164 (eq_attr "alternative" "6,7,8,9,10")
165 (const_string "mmxmov")
166 (eq_attr "alternative" "11")
167 (const_string "sselog1")
168 (eq_attr "alternative" "17,18")
169 (const_string "ssecvt")
170 ]
171 (const_string "ssemov")))
172 (set (attr "prefix_rex")
173 (if_then_else (eq_attr "alternative" "9,10,15,16")
174 (const_string "1")
175 (const_string "*")))
176 (set (attr "prefix")
177 (if_then_else (eq_attr "type" "sselog1,ssemov")
178 (const_string "maybe_vex")
179 (const_string "orig")))
180 (set (attr "prefix_data16")
181 (if_then_else
182 (and (eq_attr "type" "ssemov") (eq_attr "mode" "DI"))
183 (const_string "1")
184 (const_string "*")))
185 (set (attr "mode")
186 (cond [(eq_attr "alternative" "2")
187 (const_string "SI")
188 (eq_attr "alternative" "11,12")
189 (cond [(ior (match_operand 0 "ext_sse_reg_operand")
190 (match_operand 1 "ext_sse_reg_operand"))
191 (const_string "XI")
192 (match_test "<MODE>mode == V2SFmode")
193 (const_string "V4SF")
194 (ior (not (match_test "TARGET_SSE2"))
195 (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL"))
196 (const_string "V4SF")
197 (match_test "TARGET_AVX")
198 (const_string "TI")
199 (match_test "optimize_function_for_size_p (cfun)")
200 (const_string "V4SF")
201 ]
202 (const_string "TI"))
203
204 (and (eq_attr "alternative" "13,14")
205 (ior (match_test "<MODE>mode == V2SFmode")
206 (not (match_test "TARGET_SSE2"))))
207 (const_string "V2SF")
208 ]
209 (const_string "DI")))
210 (set (attr "preferred_for_speed")
211 (cond [(eq_attr "alternative" "9,15")
212 (symbol_ref "TARGET_INTER_UNIT_MOVES_FROM_VEC")
213 (eq_attr "alternative" "10,16")
214 (symbol_ref "TARGET_INTER_UNIT_MOVES_TO_VEC")
215 ]
216 (symbol_ref "true")))])
217
218 (define_split
219 [(set (match_operand:MMXMODE 0 "nonimmediate_gr_operand")
220 (match_operand:MMXMODE 1 "nonimmediate_gr_operand"))]
221 "!TARGET_64BIT && reload_completed"
222 [(const_int 0)]
223 "ix86_split_long_move (operands); DONE;")
224
225 (define_split
226 [(set (match_operand:MMXMODE 0 "nonimmediate_gr_operand")
227 (match_operand:MMXMODE 1 "const0_operand"))]
228 "!TARGET_64BIT && reload_completed"
229 [(const_int 0)]
230 "ix86_split_long_move (operands); DONE;")
231
232 (define_expand "movmisalign<mode>"
233 [(set (match_operand:MMXMODE 0 "nonimmediate_operand")
234 (match_operand:MMXMODE 1 "nonimmediate_operand"))]
235 "TARGET_MMX"
236 {
237 ix86_expand_vector_move (<MODE>mode, operands);
238 DONE;
239 })
240
241 (define_insn "sse_movntq"
242 [(set (match_operand:DI 0 "memory_operand" "=m,m")
243 (unspec:DI [(match_operand:DI 1 "register_operand" "y,r")]
244 UNSPEC_MOVNTQ))]
245 "(TARGET_MMX || TARGET_MMX_WITH_SSE)
246 && (TARGET_SSE || TARGET_3DNOW_A)"
247 "@
248 movntq\t{%1, %0|%0, %1}
249 movnti\t{%1, %0|%0, %1}"
250 [(set_attr "mmx_isa" "native,x64")
251 (set_attr "type" "mmxmov,ssemov")
252 (set_attr "mode" "DI")])
253
254 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
255 ;;
256 ;; Parallel single-precision floating point arithmetic
257 ;;
258 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
259
260 (define_expand "mmx_addv2sf3"
261 [(set (match_operand:V2SF 0 "register_operand")
262 (plus:V2SF
263 (match_operand:V2SF 1 "nonimmediate_operand")
264 (match_operand:V2SF 2 "nonimmediate_operand")))]
265 "TARGET_3DNOW"
266 "ix86_fixup_binary_operands_no_copy (PLUS, V2SFmode, operands);")
267
268 (define_insn "*mmx_addv2sf3"
269 [(set (match_operand:V2SF 0 "register_operand" "=y")
270 (plus:V2SF (match_operand:V2SF 1 "nonimmediate_operand" "%0")
271 (match_operand:V2SF 2 "nonimmediate_operand" "ym")))]
272 "TARGET_3DNOW && ix86_binary_operator_ok (PLUS, V2SFmode, operands)"
273 "pfadd\t{%2, %0|%0, %2}"
274 [(set_attr "type" "mmxadd")
275 (set_attr "prefix_extra" "1")
276 (set_attr "mode" "V2SF")])
277
278 (define_expand "mmx_subv2sf3"
279 [(set (match_operand:V2SF 0 "register_operand")
280 (minus:V2SF (match_operand:V2SF 1 "register_operand")
281 (match_operand:V2SF 2 "nonimmediate_operand")))]
282 "TARGET_3DNOW")
283
284 (define_expand "mmx_subrv2sf3"
285 [(set (match_operand:V2SF 0 "register_operand")
286 (minus:V2SF (match_operand:V2SF 2 "register_operand")
287 (match_operand:V2SF 1 "nonimmediate_operand")))]
288 "TARGET_3DNOW")
289
290 (define_insn "*mmx_subv2sf3"
291 [(set (match_operand:V2SF 0 "register_operand" "=y,y")
292 (minus:V2SF (match_operand:V2SF 1 "nonimmediate_operand" "0,ym")
293 (match_operand:V2SF 2 "nonimmediate_operand" "ym,0")))]
294 "TARGET_3DNOW && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
295 "@
296 pfsub\t{%2, %0|%0, %2}
297 pfsubr\t{%1, %0|%0, %1}"
298 [(set_attr "type" "mmxadd")
299 (set_attr "prefix_extra" "1")
300 (set_attr "mode" "V2SF")])
301
302 (define_expand "mmx_mulv2sf3"
303 [(set (match_operand:V2SF 0 "register_operand")
304 (mult:V2SF (match_operand:V2SF 1 "nonimmediate_operand")
305 (match_operand:V2SF 2 "nonimmediate_operand")))]
306 "TARGET_3DNOW"
307 "ix86_fixup_binary_operands_no_copy (MULT, V2SFmode, operands);")
308
309 (define_insn "*mmx_mulv2sf3"
310 [(set (match_operand:V2SF 0 "register_operand" "=y")
311 (mult:V2SF (match_operand:V2SF 1 "nonimmediate_operand" "%0")
312 (match_operand:V2SF 2 "nonimmediate_operand" "ym")))]
313 "TARGET_3DNOW && ix86_binary_operator_ok (MULT, V2SFmode, operands)"
314 "pfmul\t{%2, %0|%0, %2}"
315 [(set_attr "type" "mmxmul")
316 (set_attr "prefix_extra" "1")
317 (set_attr "mode" "V2SF")])
318
319 (define_expand "mmx_<code>v2sf3"
320 [(set (match_operand:V2SF 0 "register_operand")
321 (smaxmin:V2SF
322 (match_operand:V2SF 1 "nonimmediate_operand")
323 (match_operand:V2SF 2 "nonimmediate_operand")))]
324 "TARGET_3DNOW"
325 {
326 if (!flag_finite_math_only || flag_signed_zeros)
327 {
328 operands[1] = force_reg (V2SFmode, operands[1]);
329 emit_insn (gen_mmx_ieee_<maxmin_float>v2sf3
330 (operands[0], operands[1], operands[2]));
331 DONE;
332 }
333 else
334 ix86_fixup_binary_operands_no_copy (<CODE>, V2SFmode, operands);
335 })
336
337 ;; These versions of the min/max patterns are intentionally ignorant of
338 ;; their behavior wrt -0.0 and NaN (via the commutative operand mark).
339 ;; Since both the tree-level MAX_EXPR and the rtl-level SMAX operator
340 ;; are undefined in this condition, we're certain this is correct.
341
342 (define_insn "*mmx_<code>v2sf3"
343 [(set (match_operand:V2SF 0 "register_operand" "=y")
344 (smaxmin:V2SF
345 (match_operand:V2SF 1 "nonimmediate_operand" "%0")
346 (match_operand:V2SF 2 "nonimmediate_operand" "ym")))]
347 "TARGET_3DNOW && ix86_binary_operator_ok (<CODE>, V2SFmode, operands)"
348 "pf<maxmin_float>\t{%2, %0|%0, %2}"
349 [(set_attr "type" "mmxadd")
350 (set_attr "prefix_extra" "1")
351 (set_attr "mode" "V2SF")])
352
353 ;; These versions of the min/max patterns implement exactly the operations
354 ;; min = (op1 < op2 ? op1 : op2)
355 ;; max = (!(op1 < op2) ? op1 : op2)
356 ;; Their operands are not commutative, and thus they may be used in the
357 ;; presence of -0.0 and NaN.
358
359 (define_insn "mmx_ieee_<ieee_maxmin>v2sf3"
360 [(set (match_operand:V2SF 0 "register_operand" "=y")
361 (unspec:V2SF
362 [(match_operand:V2SF 1 "register_operand" "0")
363 (match_operand:V2SF 2 "nonimmediate_operand" "ym")]
364 IEEE_MAXMIN))]
365 "TARGET_3DNOW"
366 "pf<ieee_maxmin>\t{%2, %0|%0, %2}"
367 [(set_attr "type" "mmxadd")
368 (set_attr "prefix_extra" "1")
369 (set_attr "mode" "V2SF")])
370
371 (define_insn "mmx_rcpv2sf2"
372 [(set (match_operand:V2SF 0 "register_operand" "=y")
373 (unspec:V2SF [(match_operand:V2SF 1 "nonimmediate_operand" "ym")]
374 UNSPEC_PFRCP))]
375 "TARGET_3DNOW"
376 "pfrcp\t{%1, %0|%0, %1}"
377 [(set_attr "type" "mmx")
378 (set_attr "prefix_extra" "1")
379 (set_attr "mode" "V2SF")])
380
381 (define_insn "mmx_rcpit1v2sf3"
382 [(set (match_operand:V2SF 0 "register_operand" "=y")
383 (unspec:V2SF [(match_operand:V2SF 1 "register_operand" "0")
384 (match_operand:V2SF 2 "nonimmediate_operand" "ym")]
385 UNSPEC_PFRCPIT1))]
386 "TARGET_3DNOW"
387 "pfrcpit1\t{%2, %0|%0, %2}"
388 [(set_attr "type" "mmx")
389 (set_attr "prefix_extra" "1")
390 (set_attr "mode" "V2SF")])
391
392 (define_insn "mmx_rcpit2v2sf3"
393 [(set (match_operand:V2SF 0 "register_operand" "=y")
394 (unspec:V2SF [(match_operand:V2SF 1 "register_operand" "0")
395 (match_operand:V2SF 2 "nonimmediate_operand" "ym")]
396 UNSPEC_PFRCPIT2))]
397 "TARGET_3DNOW"
398 "pfrcpit2\t{%2, %0|%0, %2}"
399 [(set_attr "type" "mmx")
400 (set_attr "prefix_extra" "1")
401 (set_attr "mode" "V2SF")])
402
403 (define_insn "mmx_rsqrtv2sf2"
404 [(set (match_operand:V2SF 0 "register_operand" "=y")
405 (unspec:V2SF [(match_operand:V2SF 1 "nonimmediate_operand" "ym")]
406 UNSPEC_PFRSQRT))]
407 "TARGET_3DNOW"
408 "pfrsqrt\t{%1, %0|%0, %1}"
409 [(set_attr "type" "mmx")
410 (set_attr "prefix_extra" "1")
411 (set_attr "mode" "V2SF")])
412
413 (define_insn "mmx_rsqit1v2sf3"
414 [(set (match_operand:V2SF 0 "register_operand" "=y")
415 (unspec:V2SF [(match_operand:V2SF 1 "register_operand" "0")
416 (match_operand:V2SF 2 "nonimmediate_operand" "ym")]
417 UNSPEC_PFRSQIT1))]
418 "TARGET_3DNOW"
419 "pfrsqit1\t{%2, %0|%0, %2}"
420 [(set_attr "type" "mmx")
421 (set_attr "prefix_extra" "1")
422 (set_attr "mode" "V2SF")])
423
424 (define_insn "mmx_haddv2sf3"
425 [(set (match_operand:V2SF 0 "register_operand" "=y")
426 (vec_concat:V2SF
427 (plus:SF
428 (vec_select:SF
429 (match_operand:V2SF 1 "register_operand" "0")
430 (parallel [(const_int 0)]))
431 (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
432 (plus:SF
433 (vec_select:SF
434 (match_operand:V2SF 2 "nonimmediate_operand" "ym")
435 (parallel [(const_int 0)]))
436 (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))))]
437 "TARGET_3DNOW"
438 "pfacc\t{%2, %0|%0, %2}"
439 [(set_attr "type" "mmxadd")
440 (set_attr "prefix_extra" "1")
441 (set_attr "mode" "V2SF")])
442
443 (define_insn "mmx_hsubv2sf3"
444 [(set (match_operand:V2SF 0 "register_operand" "=y")
445 (vec_concat:V2SF
446 (minus:SF
447 (vec_select:SF
448 (match_operand:V2SF 1 "register_operand" "0")
449 (parallel [(const_int 0)]))
450 (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
451 (minus:SF
452 (vec_select:SF
453 (match_operand:V2SF 2 "nonimmediate_operand" "ym")
454 (parallel [(const_int 0)]))
455 (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))))]
456 "TARGET_3DNOW_A"
457 "pfnacc\t{%2, %0|%0, %2}"
458 [(set_attr "type" "mmxadd")
459 (set_attr "prefix_extra" "1")
460 (set_attr "mode" "V2SF")])
461
462 (define_insn "mmx_addsubv2sf3"
463 [(set (match_operand:V2SF 0 "register_operand" "=y")
464 (vec_merge:V2SF
465 (plus:V2SF
466 (match_operand:V2SF 1 "register_operand" "0")
467 (match_operand:V2SF 2 "nonimmediate_operand" "ym"))
468 (minus:V2SF (match_dup 1) (match_dup 2))
469 (const_int 1)))]
470 "TARGET_3DNOW_A"
471 "pfpnacc\t{%2, %0|%0, %2}"
472 [(set_attr "type" "mmxadd")
473 (set_attr "prefix_extra" "1")
474 (set_attr "mode" "V2SF")])
475
476 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
477 ;;
478 ;; Parallel single-precision floating point comparisons
479 ;;
480 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
481
482 (define_expand "mmx_eqv2sf3"
483 [(set (match_operand:V2SI 0 "register_operand")
484 (eq:V2SI (match_operand:V2SF 1 "nonimmediate_operand")
485 (match_operand:V2SF 2 "nonimmediate_operand")))]
486 "TARGET_3DNOW"
487 "ix86_fixup_binary_operands_no_copy (EQ, V2SFmode, operands);")
488
489 (define_insn "*mmx_eqv2sf3"
490 [(set (match_operand:V2SI 0 "register_operand" "=y")
491 (eq:V2SI (match_operand:V2SF 1 "nonimmediate_operand" "%0")
492 (match_operand:V2SF 2 "nonimmediate_operand" "ym")))]
493 "TARGET_3DNOW && ix86_binary_operator_ok (EQ, V2SFmode, operands)"
494 "pfcmpeq\t{%2, %0|%0, %2}"
495 [(set_attr "type" "mmxcmp")
496 (set_attr "prefix_extra" "1")
497 (set_attr "mode" "V2SF")])
498
499 (define_insn "mmx_gtv2sf3"
500 [(set (match_operand:V2SI 0 "register_operand" "=y")
501 (gt:V2SI (match_operand:V2SF 1 "register_operand" "0")
502 (match_operand:V2SF 2 "nonimmediate_operand" "ym")))]
503 "TARGET_3DNOW"
504 "pfcmpgt\t{%2, %0|%0, %2}"
505 [(set_attr "type" "mmxcmp")
506 (set_attr "prefix_extra" "1")
507 (set_attr "mode" "V2SF")])
508
509 (define_insn "mmx_gev2sf3"
510 [(set (match_operand:V2SI 0 "register_operand" "=y")
511 (ge:V2SI (match_operand:V2SF 1 "register_operand" "0")
512 (match_operand:V2SF 2 "nonimmediate_operand" "ym")))]
513 "TARGET_3DNOW"
514 "pfcmpge\t{%2, %0|%0, %2}"
515 [(set_attr "type" "mmxcmp")
516 (set_attr "prefix_extra" "1")
517 (set_attr "mode" "V2SF")])
518
519 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
520 ;;
521 ;; Parallel single-precision floating point conversion operations
522 ;;
523 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
524
525 (define_insn "mmx_pf2id"
526 [(set (match_operand:V2SI 0 "register_operand" "=y")
527 (fix:V2SI (match_operand:V2SF 1 "nonimmediate_operand" "ym")))]
528 "TARGET_3DNOW"
529 "pf2id\t{%1, %0|%0, %1}"
530 [(set_attr "type" "mmxcvt")
531 (set_attr "prefix_extra" "1")
532 (set_attr "mode" "V2SF")])
533
534 (define_insn "mmx_pf2iw"
535 [(set (match_operand:V2SI 0 "register_operand" "=y")
536 (sign_extend:V2SI
537 (ss_truncate:V2HI
538 (fix:V2SI
539 (match_operand:V2SF 1 "nonimmediate_operand" "ym")))))]
540 "TARGET_3DNOW_A"
541 "pf2iw\t{%1, %0|%0, %1}"
542 [(set_attr "type" "mmxcvt")
543 (set_attr "prefix_extra" "1")
544 (set_attr "mode" "V2SF")])
545
546 (define_insn "mmx_pi2fw"
547 [(set (match_operand:V2SF 0 "register_operand" "=y")
548 (float:V2SF
549 (sign_extend:V2SI
550 (truncate:V2HI
551 (match_operand:V2SI 1 "nonimmediate_operand" "ym")))))]
552 "TARGET_3DNOW_A"
553 "pi2fw\t{%1, %0|%0, %1}"
554 [(set_attr "type" "mmxcvt")
555 (set_attr "prefix_extra" "1")
556 (set_attr "mode" "V2SF")])
557
558 (define_insn "mmx_floatv2si2"
559 [(set (match_operand:V2SF 0 "register_operand" "=y")
560 (float:V2SF (match_operand:V2SI 1 "nonimmediate_operand" "ym")))]
561 "TARGET_3DNOW"
562 "pi2fd\t{%1, %0|%0, %1}"
563 [(set_attr "type" "mmxcvt")
564 (set_attr "prefix_extra" "1")
565 (set_attr "mode" "V2SF")])
566
567 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
568 ;;
569 ;; Parallel single-precision floating point element swizzling
570 ;;
571 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
572
573 (define_insn "mmx_pswapdv2sf2"
574 [(set (match_operand:V2SF 0 "register_operand" "=y")
575 (vec_select:V2SF (match_operand:V2SF 1 "nonimmediate_operand" "ym")
576 (parallel [(const_int 1) (const_int 0)])))]
577 "TARGET_3DNOW_A"
578 "pswapd\t{%1, %0|%0, %1}"
579 [(set_attr "type" "mmxcvt")
580 (set_attr "prefix_extra" "1")
581 (set_attr "mode" "V2SF")])
582
583 (define_insn "*vec_dupv2sf"
584 [(set (match_operand:V2SF 0 "register_operand" "=y")
585 (vec_duplicate:V2SF
586 (match_operand:SF 1 "register_operand" "0")))]
587 "TARGET_MMX"
588 "punpckldq\t%0, %0"
589 [(set_attr "type" "mmxcvt")
590 (set_attr "mode" "DI")])
591
592 (define_insn "*mmx_concatv2sf"
593 [(set (match_operand:V2SF 0 "register_operand" "=y,y")
594 (vec_concat:V2SF
595 (match_operand:SF 1 "nonimmediate_operand" " 0,rm")
596 (match_operand:SF 2 "nonimm_or_0_operand" "ym,C")))]
597 "TARGET_MMX && !TARGET_SSE"
598 "@
599 punpckldq\t{%2, %0|%0, %2}
600 movd\t{%1, %0|%0, %1}"
601 [(set_attr "type" "mmxcvt,mmxmov")
602 (set_attr "mode" "DI")])
603
604 (define_expand "vec_setv2sf"
605 [(match_operand:V2SF 0 "register_operand")
606 (match_operand:SF 1 "register_operand")
607 (match_operand 2 "const_int_operand")]
608 "TARGET_MMX"
609 {
610 ix86_expand_vector_set (false, operands[0], operands[1],
611 INTVAL (operands[2]));
612 DONE;
613 })
614
615 ;; Avoid combining registers from different units in a single alternative,
616 ;; see comment above inline_secondary_memory_needed function in i386.c
617 (define_insn_and_split "*vec_extractv2sf_0"
618 [(set (match_operand:SF 0 "nonimmediate_operand" "=x, m,y ,m,f,r")
619 (vec_select:SF
620 (match_operand:V2SF 1 "nonimmediate_operand" " xm,x,ym,y,m,m")
621 (parallel [(const_int 0)])))]
622 "TARGET_MMX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
623 "#"
624 "&& reload_completed"
625 [(set (match_dup 0) (match_dup 1))]
626 "operands[1] = gen_lowpart (SFmode, operands[1]);")
627
628 ;; Avoid combining registers from different units in a single alternative,
629 ;; see comment above inline_secondary_memory_needed function in i386.c
630 (define_insn "*vec_extractv2sf_1"
631 [(set (match_operand:SF 0 "nonimmediate_operand" "=y,x,x,y,x,f,r")
632 (vec_select:SF
633 (match_operand:V2SF 1 "nonimmediate_operand" " 0,x,x,o,o,o,o")
634 (parallel [(const_int 1)])))]
635 "TARGET_MMX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
636 "@
637 punpckhdq\t%0, %0
638 %vmovshdup\t{%1, %0|%0, %1}
639 shufps\t{$0xe5, %1, %0|%0, %1, 0xe5}
640 #
641 #
642 #
643 #"
644 [(set_attr "isa" "*,sse3,noavx,*,*,*,*")
645 (set_attr "type" "mmxcvt,sse,sseshuf1,mmxmov,ssemov,fmov,imov")
646 (set (attr "length_immediate")
647 (if_then_else (eq_attr "alternative" "2")
648 (const_string "1")
649 (const_string "*")))
650 (set (attr "prefix_rep")
651 (if_then_else (eq_attr "alternative" "1")
652 (const_string "1")
653 (const_string "*")))
654 (set_attr "prefix" "orig,maybe_vex,orig,orig,orig,orig,orig")
655 (set_attr "mode" "DI,V4SF,V4SF,SF,SF,SF,SF")])
656
657 (define_split
658 [(set (match_operand:SF 0 "register_operand")
659 (vec_select:SF
660 (match_operand:V2SF 1 "memory_operand")
661 (parallel [(const_int 1)])))]
662 "TARGET_MMX && reload_completed"
663 [(set (match_dup 0) (match_dup 1))]
664 "operands[1] = adjust_address (operands[1], SFmode, 4);")
665
666 (define_expand "vec_extractv2sfsf"
667 [(match_operand:SF 0 "register_operand")
668 (match_operand:V2SF 1 "register_operand")
669 (match_operand 2 "const_int_operand")]
670 "TARGET_MMX"
671 {
672 ix86_expand_vector_extract (false, operands[0], operands[1],
673 INTVAL (operands[2]));
674 DONE;
675 })
676
677 (define_expand "vec_initv2sfsf"
678 [(match_operand:V2SF 0 "register_operand")
679 (match_operand 1)]
680 "TARGET_SSE"
681 {
682 ix86_expand_vector_init (false, operands[0], operands[1]);
683 DONE;
684 })
685
686 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
687 ;;
688 ;; Parallel integral arithmetic
689 ;;
690 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
691
692 (define_expand "mmx_<plusminus_insn><mode>3"
693 [(set (match_operand:MMXMODEI8 0 "register_operand")
694 (plusminus:MMXMODEI8
695 (match_operand:MMXMODEI8 1 "register_mmxmem_operand")
696 (match_operand:MMXMODEI8 2 "register_mmxmem_operand")))]
697 "TARGET_MMX || TARGET_MMX_WITH_SSE"
698 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
699
700 (define_expand "<plusminus_insn><mode>3"
701 [(set (match_operand:MMXMODEI 0 "register_operand")
702 (plusminus:MMXMODEI
703 (match_operand:MMXMODEI 1 "register_operand")
704 (match_operand:MMXMODEI 2 "register_operand")))]
705 "TARGET_MMX_WITH_SSE"
706 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
707
708 (define_insn "*mmx_<plusminus_insn><mode>3"
709 [(set (match_operand:MMXMODEI8 0 "register_operand" "=y,x,Yv")
710 (plusminus:MMXMODEI8
711 (match_operand:MMXMODEI8 1 "register_mmxmem_operand" "<comm>0,0,Yv")
712 (match_operand:MMXMODEI8 2 "register_mmxmem_operand" "ym,x,Yv")))]
713 "(TARGET_MMX || TARGET_MMX_WITH_SSE)
714 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
715 "@
716 p<plusminus_mnemonic><mmxvecsize>\t{%2, %0|%0, %2}
717 p<plusminus_mnemonic><mmxvecsize>\t{%2, %0|%0, %2}
718 vp<plusminus_mnemonic><mmxvecsize>\t{%2, %1, %0|%0, %1, %2}"
719 [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
720 (set_attr "type" "mmxadd,sseadd,sseadd")
721 (set_attr "mode" "DI,TI,TI")])
722
723 (define_expand "mmx_<plusminus_insn><mode>3"
724 [(set (match_operand:MMXMODE12 0 "register_operand")
725 (sat_plusminus:MMXMODE12
726 (match_operand:MMXMODE12 1 "register_mmxmem_operand")
727 (match_operand:MMXMODE12 2 "register_mmxmem_operand")))]
728 "TARGET_MMX || TARGET_MMX_WITH_SSE"
729 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
730
731 (define_insn "*mmx_<plusminus_insn><mode>3"
732 [(set (match_operand:MMXMODE12 0 "register_operand" "=y,x,Yv")
733 (sat_plusminus:MMXMODE12
734 (match_operand:MMXMODE12 1 "register_mmxmem_operand" "<comm>0,0,Yv")
735 (match_operand:MMXMODE12 2 "register_mmxmem_operand" "ym,x,Yv")))]
736 "(TARGET_MMX || TARGET_MMX_WITH_SSE)
737 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
738 "@
739 p<plusminus_mnemonic><mmxvecsize>\t{%2, %0|%0, %2}
740 p<plusminus_mnemonic><mmxvecsize>\t{%2, %0|%0, %2}
741 vp<plusminus_mnemonic><mmxvecsize>\t{%2, %1, %0|%0, %1, %2}"
742 [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
743 (set_attr "type" "mmxadd,sseadd,sseadd")
744 (set_attr "mode" "DI,TI,TI")])
745
746 (define_expand "mmx_mulv4hi3"
747 [(set (match_operand:V4HI 0 "register_operand")
748 (mult:V4HI (match_operand:V4HI 1 "register_mmxmem_operand")
749 (match_operand:V4HI 2 "register_mmxmem_operand")))]
750 "TARGET_MMX || TARGET_MMX_WITH_SSE"
751 "ix86_fixup_binary_operands_no_copy (MULT, V4HImode, operands);")
752
753 (define_expand "mulv4hi3"
754 [(set (match_operand:V4HI 0 "register_operand")
755 (mult:V4HI (match_operand:V4HI 1 "register_operand")
756 (match_operand:V4HI 2 "register_operand")))]
757 "TARGET_MMX_WITH_SSE"
758 "ix86_fixup_binary_operands_no_copy (MULT, V4HImode, operands);")
759
760 (define_insn "*mmx_mulv4hi3"
761 [(set (match_operand:V4HI 0 "register_operand" "=y,x,Yv")
762 (mult:V4HI (match_operand:V4HI 1 "register_mmxmem_operand" "%0,0,Yv")
763 (match_operand:V4HI 2 "register_mmxmem_operand" "ym,x,Yv")))]
764 "(TARGET_MMX || TARGET_MMX_WITH_SSE)
765 && ix86_binary_operator_ok (MULT, V4HImode, operands)"
766 "@
767 pmullw\t{%2, %0|%0, %2}
768 pmullw\t{%2, %0|%0, %2}
769 vpmullw\t{%2, %1, %0|%0, %1, %2}"
770 [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
771 (set_attr "type" "mmxmul,ssemul,ssemul")
772 (set_attr "mode" "DI,TI,TI")])
773
774 (define_expand "mmx_smulv4hi3_highpart"
775 [(set (match_operand:V4HI 0 "register_operand")
776 (truncate:V4HI
777 (lshiftrt:V4SI
778 (mult:V4SI
779 (sign_extend:V4SI
780 (match_operand:V4HI 1 "register_mmxmem_operand"))
781 (sign_extend:V4SI
782 (match_operand:V4HI 2 "register_mmxmem_operand")))
783 (const_int 16))))]
784 "TARGET_MMX || TARGET_MMX_WITH_SSE"
785 "ix86_fixup_binary_operands_no_copy (MULT, V4HImode, operands);")
786
787 (define_insn "*mmx_smulv4hi3_highpart"
788 [(set (match_operand:V4HI 0 "register_operand" "=y,x,Yv")
789 (truncate:V4HI
790 (lshiftrt:V4SI
791 (mult:V4SI
792 (sign_extend:V4SI
793 (match_operand:V4HI 1 "register_mmxmem_operand" "%0,0,Yv"))
794 (sign_extend:V4SI
795 (match_operand:V4HI 2 "register_mmxmem_operand" "ym,x,Yv")))
796 (const_int 16))))]
797 "(TARGET_MMX || TARGET_MMX_WITH_SSE)
798 && ix86_binary_operator_ok (MULT, V4HImode, operands)"
799 "@
800 pmulhw\t{%2, %0|%0, %2}
801 pmulhw\t{%2, %0|%0, %2}
802 vpmulhw\t{%2, %1, %0|%0, %1, %2}"
803 [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
804 (set_attr "type" "mmxmul,ssemul,ssemul")
805 (set_attr "mode" "DI,TI,TI")])
806
807 (define_expand "mmx_umulv4hi3_highpart"
808 [(set (match_operand:V4HI 0 "register_operand")
809 (truncate:V4HI
810 (lshiftrt:V4SI
811 (mult:V4SI
812 (zero_extend:V4SI
813 (match_operand:V4HI 1 "register_mmxmem_operand"))
814 (zero_extend:V4SI
815 (match_operand:V4HI 2 "register_mmxmem_operand")))
816 (const_int 16))))]
817 "(TARGET_MMX || TARGET_MMX_WITH_SSE)
818 && (TARGET_SSE || TARGET_3DNOW_A)"
819 "ix86_fixup_binary_operands_no_copy (MULT, V4HImode, operands);")
820
821 (define_insn "*mmx_umulv4hi3_highpart"
822 [(set (match_operand:V4HI 0 "register_operand" "=y,x,Yv")
823 (truncate:V4HI
824 (lshiftrt:V4SI
825 (mult:V4SI
826 (zero_extend:V4SI
827 (match_operand:V4HI 1 "register_mmxmem_operand" "%0,0,Yv"))
828 (zero_extend:V4SI
829 (match_operand:V4HI 2 "register_mmxmem_operand" "ym,x,Yv")))
830 (const_int 16))))]
831 "(TARGET_MMX || TARGET_MMX_WITH_SSE)
832 && (TARGET_SSE || TARGET_3DNOW_A)
833 && ix86_binary_operator_ok (MULT, V4HImode, operands)"
834 "@
835 pmulhuw\t{%2, %0|%0, %2}
836 pmulhuw\t{%2, %0|%0, %2}
837 vpmulhuw\t{%2, %1, %0|%0, %1, %2}"
838 [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
839 (set_attr "type" "mmxmul,ssemul,ssemul")
840 (set_attr "mode" "DI,TI,TI")])
841
842 (define_expand "mmx_pmaddwd"
843 [(set (match_operand:V2SI 0 "register_operand")
844 (plus:V2SI
845 (mult:V2SI
846 (sign_extend:V2SI
847 (vec_select:V2HI
848 (match_operand:V4HI 1 "register_mmxmem_operand")
849 (parallel [(const_int 0) (const_int 2)])))
850 (sign_extend:V2SI
851 (vec_select:V2HI
852 (match_operand:V4HI 2 "register_mmxmem_operand")
853 (parallel [(const_int 0) (const_int 2)]))))
854 (mult:V2SI
855 (sign_extend:V2SI
856 (vec_select:V2HI (match_dup 1)
857 (parallel [(const_int 1) (const_int 3)])))
858 (sign_extend:V2SI
859 (vec_select:V2HI (match_dup 2)
860 (parallel [(const_int 1) (const_int 3)]))))))]
861 "TARGET_MMX || TARGET_MMX_WITH_SSE"
862 "ix86_fixup_binary_operands_no_copy (MULT, V4HImode, operands);")
863
864 (define_insn "*mmx_pmaddwd"
865 [(set (match_operand:V2SI 0 "register_operand" "=y,x,Yv")
866 (plus:V2SI
867 (mult:V2SI
868 (sign_extend:V2SI
869 (vec_select:V2HI
870 (match_operand:V4HI 1 "register_mmxmem_operand" "%0,0,Yv")
871 (parallel [(const_int 0) (const_int 2)])))
872 (sign_extend:V2SI
873 (vec_select:V2HI
874 (match_operand:V4HI 2 "register_mmxmem_operand" "ym,x,Yv")
875 (parallel [(const_int 0) (const_int 2)]))))
876 (mult:V2SI
877 (sign_extend:V2SI
878 (vec_select:V2HI (match_dup 1)
879 (parallel [(const_int 1) (const_int 3)])))
880 (sign_extend:V2SI
881 (vec_select:V2HI (match_dup 2)
882 (parallel [(const_int 1) (const_int 3)]))))))]
883 "(TARGET_MMX || TARGET_MMX_WITH_SSE)
884 && ix86_binary_operator_ok (MULT, V4HImode, operands)"
885 "@
886 pmaddwd\t{%2, %0|%0, %2}
887 pmaddwd\t{%2, %0|%0, %2}
888 vpmaddwd\t{%2, %1, %0|%0, %1, %2}"
889 [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
890 (set_attr "type" "mmxmul,sseiadd,sseiadd")
891 (set_attr "mode" "DI,TI,TI")])
892
893 (define_expand "mmx_pmulhrwv4hi3"
894 [(set (match_operand:V4HI 0 "register_operand")
895 (truncate:V4HI
896 (lshiftrt:V4SI
897 (plus:V4SI
898 (mult:V4SI
899 (sign_extend:V4SI
900 (match_operand:V4HI 1 "nonimmediate_operand"))
901 (sign_extend:V4SI
902 (match_operand:V4HI 2 "nonimmediate_operand")))
903 (const_vector:V4SI [(const_int 32768) (const_int 32768)
904 (const_int 32768) (const_int 32768)]))
905 (const_int 16))))]
906 "TARGET_3DNOW"
907 "ix86_fixup_binary_operands_no_copy (MULT, V4HImode, operands);")
908
909 (define_insn "*mmx_pmulhrwv4hi3"
910 [(set (match_operand:V4HI 0 "register_operand" "=y")
911 (truncate:V4HI
912 (lshiftrt:V4SI
913 (plus:V4SI
914 (mult:V4SI
915 (sign_extend:V4SI
916 (match_operand:V4HI 1 "nonimmediate_operand" "%0"))
917 (sign_extend:V4SI
918 (match_operand:V4HI 2 "nonimmediate_operand" "ym")))
919 (const_vector:V4SI [(const_int 32768) (const_int 32768)
920 (const_int 32768) (const_int 32768)]))
921 (const_int 16))))]
922 "TARGET_3DNOW && ix86_binary_operator_ok (MULT, V4HImode, operands)"
923 "pmulhrw\t{%2, %0|%0, %2}"
924 [(set_attr "type" "mmxmul")
925 (set_attr "prefix_extra" "1")
926 (set_attr "mode" "DI")])
927
928 (define_expand "sse2_umulv1siv1di3"
929 [(set (match_operand:V1DI 0 "register_operand")
930 (mult:V1DI
931 (zero_extend:V1DI
932 (vec_select:V1SI
933 (match_operand:V2SI 1 "register_mmxmem_operand")
934 (parallel [(const_int 0)])))
935 (zero_extend:V1DI
936 (vec_select:V1SI
937 (match_operand:V2SI 2 "register_mmxmem_operand")
938 (parallel [(const_int 0)])))))]
939 "(TARGET_MMX || TARGET_MMX_WITH_SSE) && TARGET_SSE2"
940 "ix86_fixup_binary_operands_no_copy (MULT, V2SImode, operands);")
941
942 (define_insn "*sse2_umulv1siv1di3"
943 [(set (match_operand:V1DI 0 "register_operand" "=y,x,Yv")
944 (mult:V1DI
945 (zero_extend:V1DI
946 (vec_select:V1SI
947 (match_operand:V2SI 1 "register_mmxmem_operand" "%0,0,Yv")
948 (parallel [(const_int 0)])))
949 (zero_extend:V1DI
950 (vec_select:V1SI
951 (match_operand:V2SI 2 "register_mmxmem_operand" "ym,x,Yv")
952 (parallel [(const_int 0)])))))]
953 "(TARGET_MMX || TARGET_MMX_WITH_SSE)
954 && TARGET_SSE2
955 && ix86_binary_operator_ok (MULT, V2SImode, operands)"
956 "@
957 pmuludq\t{%2, %0|%0, %2}
958 pmuludq\t{%2, %0|%0, %2}
959 vpmuludq\t{%2, %1, %0|%0, %1, %2}"
960 [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
961 (set_attr "type" "mmxmul,ssemul,ssemul")
962 (set_attr "mode" "DI,TI,TI")])
963
964 (define_expand "mmx_<code>v4hi3"
965 [(set (match_operand:V4HI 0 "register_operand")
966 (smaxmin:V4HI
967 (match_operand:V4HI 1 "register_mmxmem_operand")
968 (match_operand:V4HI 2 "register_mmxmem_operand")))]
969 "(TARGET_MMX || TARGET_MMX_WITH_SSE)
970 && (TARGET_SSE || TARGET_3DNOW_A)"
971 "ix86_fixup_binary_operands_no_copy (<CODE>, V4HImode, operands);")
972
973 (define_expand "<code>v4hi3"
974 [(set (match_operand:V4HI 0 "register_operand")
975 (smaxmin:V4HI
976 (match_operand:V4HI 1 "register_operand")
977 (match_operand:V4HI 2 "register_operand")))]
978 "TARGET_MMX_WITH_SSE"
979 "ix86_fixup_binary_operands_no_copy (<CODE>, V4HImode, operands);")
980
981 (define_insn "*mmx_<code>v4hi3"
982 [(set (match_operand:V4HI 0 "register_operand" "=y,x,Yv")
983 (smaxmin:V4HI
984 (match_operand:V4HI 1 "register_mmxmem_operand" "%0,0,Yv")
985 (match_operand:V4HI 2 "register_mmxmem_operand" "ym,x,Yv")))]
986 "(TARGET_MMX || TARGET_MMX_WITH_SSE)
987 && (TARGET_SSE || TARGET_3DNOW_A)
988 && ix86_binary_operator_ok (<CODE>, V4HImode, operands)"
989 "@
990 p<maxmin_int>w\t{%2, %0|%0, %2}
991 p<maxmin_int>w\t{%2, %0|%0, %2}
992 vp<maxmin_int>w\t{%2, %1, %0|%0, %1, %2}"
993 [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
994 (set_attr "type" "mmxadd,sseiadd,sseiadd")
995 (set_attr "mode" "DI,TI,TI")])
996
997 (define_expand "mmx_<code>v8qi3"
998 [(set (match_operand:V8QI 0 "register_operand")
999 (umaxmin:V8QI
1000 (match_operand:V8QI 1 "register_mmxmem_operand")
1001 (match_operand:V8QI 2 "register_mmxmem_operand")))]
1002 "(TARGET_MMX || TARGET_MMX_WITH_SSE)
1003 && (TARGET_SSE || TARGET_3DNOW_A)"
1004 "ix86_fixup_binary_operands_no_copy (<CODE>, V8QImode, operands);")
1005
1006 (define_expand "<code>v8qi3"
1007 [(set (match_operand:V8QI 0 "register_operand")
1008 (umaxmin:V8QI
1009 (match_operand:V8QI 1 "register_operand")
1010 (match_operand:V8QI 2 "register_operand")))]
1011 "TARGET_MMX_WITH_SSE"
1012 "ix86_fixup_binary_operands_no_copy (<CODE>, V8QImode, operands);")
1013
1014 (define_insn "*mmx_<code>v8qi3"
1015 [(set (match_operand:V8QI 0 "register_operand" "=y,x,Yv")
1016 (umaxmin:V8QI
1017 (match_operand:V8QI 1 "register_mmxmem_operand" "%0,0,Yv")
1018 (match_operand:V8QI 2 "register_mmxmem_operand" "ym,x,Yv")))]
1019 "(TARGET_MMX || TARGET_MMX_WITH_SSE)
1020 && (TARGET_SSE || TARGET_3DNOW_A)
1021 && ix86_binary_operator_ok (<CODE>, V8QImode, operands)"
1022 "@
1023 p<maxmin_int>b\t{%2, %0|%0, %2}
1024 p<maxmin_int>b\t{%2, %0|%0, %2}
1025 vp<maxmin_int>b\t{%2, %1, %0|%0, %1, %2}"
1026 [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
1027 (set_attr "type" "mmxadd,sseiadd,sseiadd")
1028 (set_attr "mode" "DI,TI,TI")])
1029
1030 (define_insn "mmx_ashr<mode>3"
1031 [(set (match_operand:MMXMODE24 0 "register_operand" "=y,x,Yv")
1032 (ashiftrt:MMXMODE24
1033 (match_operand:MMXMODE24 1 "register_operand" "0,0,Yv")
1034 (match_operand:DI 2 "nonmemory_operand" "yN,xN,YvN")))]
1035 "TARGET_MMX || TARGET_MMX_WITH_SSE"
1036 "@
1037 psra<mmxvecsize>\t{%2, %0|%0, %2}
1038 psra<mmxvecsize>\t{%2, %0|%0, %2}
1039 vpsra<mmxvecsize>\t{%2, %1, %0|%0, %1, %2}"
1040 [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
1041 (set_attr "type" "mmxshft,sseishft,sseishft")
1042 (set (attr "length_immediate")
1043 (if_then_else (match_operand 2 "const_int_operand")
1044 (const_string "1")
1045 (const_string "0")))
1046 (set_attr "mode" "DI,TI,TI")])
1047
1048 (define_expand "ashr<mode>3"
1049 [(set (match_operand:MMXMODE24 0 "register_operand")
1050 (ashiftrt:MMXMODE24
1051 (match_operand:MMXMODE24 1 "register_operand")
1052 (match_operand:DI 2 "nonmemory_operand")))]
1053 "TARGET_MMX_WITH_SSE")
1054
1055 (define_insn "mmx_<shift_insn><mode>3"
1056 [(set (match_operand:MMXMODE248 0 "register_operand" "=y,x,Yv")
1057 (any_lshift:MMXMODE248
1058 (match_operand:MMXMODE248 1 "register_operand" "0,0,Yv")
1059 (match_operand:DI 2 "nonmemory_operand" "yN,xN,YvN")))]
1060 "TARGET_MMX || TARGET_MMX_WITH_SSE"
1061 "@
1062 p<vshift><mmxvecsize>\t{%2, %0|%0, %2}
1063 p<vshift><mmxvecsize>\t{%2, %0|%0, %2}
1064 vp<vshift><mmxvecsize>\t{%2, %1, %0|%0, %1, %2}"
1065 [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
1066 (set_attr "type" "mmxshft,sseishft,sseishft")
1067 (set (attr "length_immediate")
1068 (if_then_else (match_operand 2 "const_int_operand")
1069 (const_string "1")
1070 (const_string "0")))
1071 (set_attr "mode" "DI,TI,TI")])
1072
1073 (define_expand "<shift_insn><mode>3"
1074 [(set (match_operand:MMXMODE248 0 "register_operand")
1075 (any_lshift:MMXMODE248
1076 (match_operand:MMXMODE248 1 "register_operand")
1077 (match_operand:DI 2 "nonmemory_operand")))]
1078 "TARGET_MMX_WITH_SSE")
1079
1080 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1081 ;;
1082 ;; Parallel integral comparisons
1083 ;;
1084 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1085
1086 (define_expand "mmx_eq<mode>3"
1087 [(set (match_operand:MMXMODEI 0 "register_operand")
1088 (eq:MMXMODEI
1089 (match_operand:MMXMODEI 1 "register_mmxmem_operand")
1090 (match_operand:MMXMODEI 2 "register_mmxmem_operand")))]
1091 "TARGET_MMX || TARGET_MMX_WITH_SSE"
1092 "ix86_fixup_binary_operands_no_copy (EQ, <MODE>mode, operands);")
1093
1094 (define_insn "*mmx_eq<mode>3"
1095 [(set (match_operand:MMXMODEI 0 "register_operand" "=y,x,Yv")
1096 (eq:MMXMODEI
1097 (match_operand:MMXMODEI 1 "register_mmxmem_operand" "%0,0,Yv")
1098 (match_operand:MMXMODEI 2 "register_mmxmem_operand" "ym,x,Yv")))]
1099 "(TARGET_MMX || TARGET_MMX_WITH_SSE)
1100 && ix86_binary_operator_ok (EQ, <MODE>mode, operands)"
1101 "@
1102 pcmpeq<mmxvecsize>\t{%2, %0|%0, %2}
1103 pcmpeq<mmxvecsize>\t{%2, %0|%0, %2}
1104 vpcmpeq<mmxvecsize>\t{%2, %1, %0|%0, %1, %2}"
1105 [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
1106 (set_attr "type" "mmxcmp,ssecmp,ssecmp")
1107 (set_attr "mode" "DI,TI,TI")])
1108
1109 (define_insn "mmx_gt<mode>3"
1110 [(set (match_operand:MMXMODEI 0 "register_operand" "=y,x,Yv")
1111 (gt:MMXMODEI
1112 (match_operand:MMXMODEI 1 "register_operand" "0,0,Yv")
1113 (match_operand:MMXMODEI 2 "register_mmxmem_operand" "ym,x,Yv")))]
1114 "TARGET_MMX || TARGET_MMX_WITH_SSE"
1115 "@
1116 pcmpgt<mmxvecsize>\t{%2, %0|%0, %2}
1117 pcmpgt<mmxvecsize>\t{%2, %0|%0, %2}
1118 vpcmpgt<mmxvecsize>\t{%2, %1, %0|%0, %1, %2}"
1119 [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
1120 (set_attr "type" "mmxcmp,ssecmp,ssecmp")
1121 (set_attr "mode" "DI,TI,TI")])
1122
1123 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1124 ;;
1125 ;; Parallel integral logical operations
1126 ;;
1127 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1128
1129 (define_insn "mmx_andnot<mode>3"
1130 [(set (match_operand:MMXMODEI 0 "register_operand" "=y,x,Yv")
1131 (and:MMXMODEI
1132 (not:MMXMODEI (match_operand:MMXMODEI 1 "register_operand" "0,0,Yv"))
1133 (match_operand:MMXMODEI 2 "register_mmxmem_operand" "ym,x,Yv")))]
1134 "TARGET_MMX || TARGET_MMX_WITH_SSE"
1135 "@
1136 pandn\t{%2, %0|%0, %2}
1137 pandn\t{%2, %0|%0, %2}
1138 vpandn\t{%2, %1, %0|%0, %1, %2}"
1139 [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
1140 (set_attr "type" "mmxadd,sselog,sselog")
1141 (set_attr "mode" "DI,TI,TI")])
1142
1143 (define_expand "mmx_<code><mode>3"
1144 [(set (match_operand:MMXMODEI 0 "register_operand")
1145 (any_logic:MMXMODEI
1146 (match_operand:MMXMODEI 1 "register_mmxmem_operand")
1147 (match_operand:MMXMODEI 2 "register_mmxmem_operand")))]
1148 "TARGET_MMX || TARGET_MMX_WITH_SSE"
1149 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
1150
1151 (define_expand "<code><mode>3"
1152 [(set (match_operand:MMXMODEI 0 "register_operand")
1153 (any_logic:MMXMODEI
1154 (match_operand:MMXMODEI 1 "register_operand")
1155 (match_operand:MMXMODEI 2 "register_operand")))]
1156 "TARGET_MMX_WITH_SSE"
1157 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
1158
1159 (define_insn "*mmx_<code><mode>3"
1160 [(set (match_operand:MMXMODEI 0 "register_operand" "=y,x,Yv")
1161 (any_logic:MMXMODEI
1162 (match_operand:MMXMODEI 1 "register_mmxmem_operand" "%0,0,Yv")
1163 (match_operand:MMXMODEI 2 "register_mmxmem_operand" "ym,x,Yv")))]
1164 "(TARGET_MMX || TARGET_MMX_WITH_SSE)
1165 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
1166 "@
1167 p<logic>\t{%2, %0|%0, %2}
1168 p<logic>\t{%2, %0|%0, %2}
1169 vp<logic>\t{%2, %1, %0|%0, %1, %2}"
1170 [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
1171 (set_attr "type" "mmxadd,sselog,sselog")
1172 (set_attr "mode" "DI,TI,TI")])
1173
1174 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1175 ;;
1176 ;; Parallel integral element swizzling
1177 ;;
1178 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1179
1180 ;; Used in signed and unsigned truncations with saturation.
1181 (define_code_iterator any_s_truncate [ss_truncate us_truncate])
1182 ;; Instruction suffix for truncations with saturation.
1183 (define_code_attr s_trunsuffix [(ss_truncate "s") (us_truncate "u")])
1184
1185 (define_insn_and_split "mmx_pack<s_trunsuffix>swb"
1186 [(set (match_operand:V8QI 0 "register_operand" "=y,x,Yv")
1187 (vec_concat:V8QI
1188 (any_s_truncate:V4QI
1189 (match_operand:V4HI 1 "register_operand" "0,0,Yv"))
1190 (any_s_truncate:V4QI
1191 (match_operand:V4HI 2 "register_mmxmem_operand" "ym,x,Yv"))))]
1192 "TARGET_MMX || TARGET_MMX_WITH_SSE"
1193 "@
1194 pack<s_trunsuffix>swb\t{%2, %0|%0, %2}
1195 #
1196 #"
1197 "TARGET_MMX_WITH_SSE && reload_completed"
1198 [(const_int 0)]
1199 "ix86_split_mmx_pack (operands, <any_s_truncate:CODE>); DONE;"
1200 [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
1201 (set_attr "type" "mmxshft,sselog,sselog")
1202 (set_attr "mode" "DI,TI,TI")])
1203
1204 (define_insn_and_split "mmx_packssdw"
1205 [(set (match_operand:V4HI 0 "register_operand" "=y,x,Yv")
1206 (vec_concat:V4HI
1207 (ss_truncate:V2HI
1208 (match_operand:V2SI 1 "register_operand" "0,0,Yv"))
1209 (ss_truncate:V2HI
1210 (match_operand:V2SI 2 "register_mmxmem_operand" "ym,x,Yv"))))]
1211 "TARGET_MMX || TARGET_MMX_WITH_SSE"
1212 "@
1213 packssdw\t{%2, %0|%0, %2}
1214 #
1215 #"
1216 "TARGET_MMX_WITH_SSE && reload_completed"
1217 [(const_int 0)]
1218 "ix86_split_mmx_pack (operands, SS_TRUNCATE); DONE;"
1219 [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
1220 (set_attr "type" "mmxshft,sselog,sselog")
1221 (set_attr "mode" "DI,TI,TI")])
1222
1223 (define_insn_and_split "mmx_punpckhbw"
1224 [(set (match_operand:V8QI 0 "register_operand" "=y,x,Yv")
1225 (vec_select:V8QI
1226 (vec_concat:V16QI
1227 (match_operand:V8QI 1 "register_operand" "0,0,Yv")
1228 (match_operand:V8QI 2 "register_mmxmem_operand" "ym,x,Yv"))
1229 (parallel [(const_int 4) (const_int 12)
1230 (const_int 5) (const_int 13)
1231 (const_int 6) (const_int 14)
1232 (const_int 7) (const_int 15)])))]
1233 "TARGET_MMX || TARGET_MMX_WITH_SSE"
1234 "@
1235 punpckhbw\t{%2, %0|%0, %2}
1236 #
1237 #"
1238 "TARGET_MMX_WITH_SSE && reload_completed"
1239 [(const_int 0)]
1240 "ix86_split_mmx_punpck (operands, true); DONE;"
1241 [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
1242 (set_attr "type" "mmxcvt,sselog,sselog")
1243 (set_attr "mode" "DI,TI,TI")])
1244
1245 (define_insn_and_split "mmx_punpcklbw"
1246 [(set (match_operand:V8QI 0 "register_operand" "=y,x,Yv")
1247 (vec_select:V8QI
1248 (vec_concat:V16QI
1249 (match_operand:V8QI 1 "register_operand" "0,0,Yv")
1250 (match_operand:V8QI 2 "register_mmxmem_operand" "ym,x,Yv"))
1251 (parallel [(const_int 0) (const_int 8)
1252 (const_int 1) (const_int 9)
1253 (const_int 2) (const_int 10)
1254 (const_int 3) (const_int 11)])))]
1255 "TARGET_MMX || TARGET_MMX_WITH_SSE"
1256 "@
1257 punpcklbw\t{%2, %0|%0, %k2}
1258 #
1259 #"
1260 "TARGET_MMX_WITH_SSE && reload_completed"
1261 [(const_int 0)]
1262 "ix86_split_mmx_punpck (operands, false); DONE;"
1263 [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
1264 (set_attr "type" "mmxcvt,sselog,sselog")
1265 (set_attr "mode" "DI,TI,TI")])
1266
1267 (define_insn_and_split "mmx_punpckhwd"
1268 [(set (match_operand:V4HI 0 "register_operand" "=y,x,Yv")
1269 (vec_select:V4HI
1270 (vec_concat:V8HI
1271 (match_operand:V4HI 1 "register_operand" "0,0,Yv")
1272 (match_operand:V4HI 2 "register_mmxmem_operand" "ym,x,Yv"))
1273 (parallel [(const_int 2) (const_int 6)
1274 (const_int 3) (const_int 7)])))]
1275 "TARGET_MMX || TARGET_MMX_WITH_SSE"
1276 "@
1277 punpckhwd\t{%2, %0|%0, %2}
1278 #
1279 #"
1280 "TARGET_MMX_WITH_SSE && reload_completed"
1281 [(const_int 0)]
1282 "ix86_split_mmx_punpck (operands, true); DONE;"
1283 [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
1284 (set_attr "type" "mmxcvt,sselog,sselog")
1285 (set_attr "mode" "DI,TI,TI")])
1286
1287 (define_insn_and_split "mmx_punpcklwd"
1288 [(set (match_operand:V4HI 0 "register_operand" "=y,x,Yv")
1289 (vec_select:V4HI
1290 (vec_concat:V8HI
1291 (match_operand:V4HI 1 "register_operand" "0,0,Yv")
1292 (match_operand:V4HI 2 "register_mmxmem_operand" "ym,x,Yv"))
1293 (parallel [(const_int 0) (const_int 4)
1294 (const_int 1) (const_int 5)])))]
1295 "TARGET_MMX || TARGET_MMX_WITH_SSE"
1296 "@
1297 punpcklwd\t{%2, %0|%0, %k2}
1298 #
1299 #"
1300 "TARGET_MMX_WITH_SSE && reload_completed"
1301 [(const_int 0)]
1302 "ix86_split_mmx_punpck (operands, false); DONE;"
1303 [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
1304 (set_attr "type" "mmxcvt,sselog,sselog")
1305 (set_attr "mode" "DI,TI,TI")])
1306
1307 (define_insn_and_split "mmx_punpckhdq"
1308 [(set (match_operand:V2SI 0 "register_operand" "=y,x,Yv")
1309 (vec_select:V2SI
1310 (vec_concat:V4SI
1311 (match_operand:V2SI 1 "register_operand" "0,0,Yv")
1312 (match_operand:V2SI 2 "register_mmxmem_operand" "ym,x,Yv"))
1313 (parallel [(const_int 1)
1314 (const_int 3)])))]
1315 "TARGET_MMX || TARGET_MMX_WITH_SSE"
1316 "@
1317 punpckhdq\t{%2, %0|%0, %2}
1318 #
1319 #"
1320 "TARGET_MMX_WITH_SSE && reload_completed"
1321 [(const_int 0)]
1322 "ix86_split_mmx_punpck (operands, true); DONE;"
1323 [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
1324 (set_attr "type" "mmxcvt,sselog,sselog")
1325 (set_attr "mode" "DI,TI,TI")])
1326
1327 (define_insn_and_split "mmx_punpckldq"
1328 [(set (match_operand:V2SI 0 "register_operand" "=y,x,Yv")
1329 (vec_select:V2SI
1330 (vec_concat:V4SI
1331 (match_operand:V2SI 1 "register_operand" "0,0,Yv")
1332 (match_operand:V2SI 2 "register_mmxmem_operand" "ym,x,Yv"))
1333 (parallel [(const_int 0)
1334 (const_int 2)])))]
1335 "TARGET_MMX || TARGET_MMX_WITH_SSE"
1336 "@
1337 punpckldq\t{%2, %0|%0, %k2}
1338 #
1339 #"
1340 "TARGET_MMX_WITH_SSE && reload_completed"
1341 [(const_int 0)]
1342 "ix86_split_mmx_punpck (operands, false); DONE;"
1343 [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
1344 (set_attr "type" "mmxcvt,sselog,sselog")
1345 (set_attr "mode" "DI,TI,TI")])
1346
1347 (define_expand "mmx_pinsrw"
1348 [(set (match_operand:V4HI 0 "register_operand")
1349 (vec_merge:V4HI
1350 (vec_duplicate:V4HI
1351 (match_operand:SI 2 "nonimmediate_operand"))
1352 (match_operand:V4HI 1 "register_operand")
1353 (match_operand:SI 3 "const_0_to_3_operand")))]
1354 "(TARGET_MMX || TARGET_MMX_WITH_SSE)
1355 && (TARGET_SSE || TARGET_3DNOW_A)"
1356 {
1357 operands[2] = gen_lowpart (HImode, operands[2]);
1358 operands[3] = GEN_INT (1 << INTVAL (operands[3]));
1359 })
1360
1361 (define_insn "*mmx_pinsrw"
1362 [(set (match_operand:V4HI 0 "register_operand" "=y,x,Yv")
1363 (vec_merge:V4HI
1364 (vec_duplicate:V4HI
1365 (match_operand:HI 2 "nonimmediate_operand" "rm,rm,rm"))
1366 (match_operand:V4HI 1 "register_operand" "0,0,Yv")
1367 (match_operand:SI 3 "const_int_operand")))]
1368 "(TARGET_MMX || TARGET_MMX_WITH_SSE)
1369 && (TARGET_SSE || TARGET_3DNOW_A)
1370 && ((unsigned) exact_log2 (INTVAL (operands[3]))
1371 < GET_MODE_NUNITS (V4HImode))"
1372 {
1373 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
1374 if (TARGET_MMX_WITH_SSE && TARGET_AVX)
1375 {
1376 if (MEM_P (operands[2]))
1377 return "vpinsrw\t{%3, %2, %1, %0|%0, %1, %2, %3}";
1378 else
1379 return "vpinsrw\t{%3, %k2, %1, %0|%0, %1, %k2, %3}";
1380 }
1381 else
1382 {
1383 if (MEM_P (operands[2]))
1384 return "pinsrw\t{%3, %2, %0|%0, %2, %3}";
1385 else
1386 return "pinsrw\t{%3, %k2, %0|%0, %k2, %3}";
1387 }
1388 }
1389 [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
1390 (set_attr "type" "mmxcvt,sselog,sselog")
1391 (set_attr "length_immediate" "1")
1392 (set_attr "mode" "DI,TI,TI")])
1393
1394 (define_insn "mmx_pextrw"
1395 [(set (match_operand:SI 0 "register_operand" "=r,r")
1396 (zero_extend:SI
1397 (vec_select:HI
1398 (match_operand:V4HI 1 "register_operand" "y,Yv")
1399 (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n,n")]))))]
1400 "(TARGET_MMX || TARGET_MMX_WITH_SSE)
1401 && (TARGET_SSE || TARGET_3DNOW_A)"
1402 "@
1403 pextrw\t{%2, %1, %0|%0, %1, %2}
1404 %vpextrw\t{%2, %1, %0|%0, %1, %2}"
1405 [(set_attr "mmx_isa" "native,x64")
1406 (set_attr "type" "mmxcvt,sselog1")
1407 (set_attr "length_immediate" "1")
1408 (set_attr "mode" "DI,TI")])
1409
1410 (define_expand "mmx_pshufw"
1411 [(match_operand:V4HI 0 "register_operand")
1412 (match_operand:V4HI 1 "register_mmxmem_operand")
1413 (match_operand:SI 2 "const_int_operand")]
1414 "(TARGET_MMX || TARGET_MMX_WITH_SSE)
1415 && (TARGET_SSE || TARGET_3DNOW_A)"
1416 {
1417 int mask = INTVAL (operands[2]);
1418 emit_insn (gen_mmx_pshufw_1 (operands[0], operands[1],
1419 GEN_INT ((mask >> 0) & 3),
1420 GEN_INT ((mask >> 2) & 3),
1421 GEN_INT ((mask >> 4) & 3),
1422 GEN_INT ((mask >> 6) & 3)));
1423 DONE;
1424 })
1425
1426 (define_insn "mmx_pshufw_1"
1427 [(set (match_operand:V4HI 0 "register_operand" "=y,Yv")
1428 (vec_select:V4HI
1429 (match_operand:V4HI 1 "register_mmxmem_operand" "ym,Yv")
1430 (parallel [(match_operand 2 "const_0_to_3_operand")
1431 (match_operand 3 "const_0_to_3_operand")
1432 (match_operand 4 "const_0_to_3_operand")
1433 (match_operand 5 "const_0_to_3_operand")])))]
1434 "(TARGET_MMX || TARGET_MMX_WITH_SSE)
1435 && (TARGET_SSE || TARGET_3DNOW_A)"
1436 {
1437 int mask = 0;
1438 mask |= INTVAL (operands[2]) << 0;
1439 mask |= INTVAL (operands[3]) << 2;
1440 mask |= INTVAL (operands[4]) << 4;
1441 mask |= INTVAL (operands[5]) << 6;
1442 operands[2] = GEN_INT (mask);
1443
1444 switch (which_alternative)
1445 {
1446 case 0:
1447 return "pshufw\t{%2, %1, %0|%0, %1, %2}";
1448 case 1:
1449 return "%vpshuflw\t{%2, %1, %0|%0, %1, %2}";
1450 default:
1451 gcc_unreachable ();
1452 }
1453 }
1454 [(set_attr "mmx_isa" "native,x64")
1455 (set_attr "type" "mmxcvt,sselog")
1456 (set_attr "length_immediate" "1")
1457 (set_attr "mode" "DI,TI")])
1458
1459 (define_insn "mmx_pswapdv2si2"
1460 [(set (match_operand:V2SI 0 "register_operand" "=y")
1461 (vec_select:V2SI
1462 (match_operand:V2SI 1 "nonimmediate_operand" "ym")
1463 (parallel [(const_int 1) (const_int 0)])))]
1464 "TARGET_3DNOW_A"
1465 "pswapd\t{%1, %0|%0, %1}"
1466 [(set_attr "type" "mmxcvt")
1467 (set_attr "prefix_extra" "1")
1468 (set_attr "mode" "DI")])
1469
1470 (define_insn_and_split "*vec_dupv4hi"
1471 [(set (match_operand:V4HI 0 "register_operand" "=y,Yv,Yw")
1472 (vec_duplicate:V4HI
1473 (truncate:HI
1474 (match_operand:SI 1 "register_operand" "0,Yv,r"))))]
1475 "(TARGET_MMX || TARGET_MMX_WITH_SSE)
1476 && (TARGET_SSE || TARGET_3DNOW_A)"
1477 "@
1478 pshufw\t{$0, %0, %0|%0, %0, 0}
1479 #
1480 #"
1481 "TARGET_MMX_WITH_SSE && reload_completed"
1482 [(const_int 0)]
1483 {
1484 rtx op;
1485 operands[0] = lowpart_subreg (V8HImode, operands[0],
1486 GET_MODE (operands[0]));
1487 if (TARGET_AVX2)
1488 {
1489 operands[1] = lowpart_subreg (HImode, operands[1],
1490 GET_MODE (operands[1]));
1491 op = gen_rtx_VEC_DUPLICATE (V8HImode, operands[1]);
1492 }
1493 else
1494 {
1495 operands[1] = lowpart_subreg (V8HImode, operands[1],
1496 GET_MODE (operands[1]));
1497 rtx mask = gen_rtx_PARALLEL (VOIDmode,
1498 gen_rtvec (8,
1499 GEN_INT (0),
1500 GEN_INT (0),
1501 GEN_INT (0),
1502 GEN_INT (0),
1503 GEN_INT (4),
1504 GEN_INT (5),
1505 GEN_INT (6),
1506 GEN_INT (7)));
1507
1508 op = gen_rtx_VEC_SELECT (V8HImode, operands[1], mask);
1509 }
1510 rtx insn = gen_rtx_SET (operands[0], op);
1511 emit_insn (insn);
1512 DONE;
1513 }
1514 [(set_attr "mmx_isa" "native,x64,x64_avx")
1515 (set_attr "type" "mmxcvt,sselog1,ssemov")
1516 (set_attr "length_immediate" "1,1,0")
1517 (set_attr "mode" "DI,TI,TI")])
1518
1519 (define_insn_and_split "*vec_dupv2si"
1520 [(set (match_operand:V2SI 0 "register_operand" "=y,x,Yv,Yw")
1521 (vec_duplicate:V2SI
1522 (match_operand:SI 1 "register_operand" "0,0,Yv,r")))]
1523 "TARGET_MMX || TARGET_MMX_WITH_SSE"
1524 "@
1525 punpckldq\t%0, %0
1526 #
1527 #
1528 #"
1529 "TARGET_MMX_WITH_SSE && reload_completed"
1530 [(set (match_dup 0)
1531 (vec_duplicate:V4SI (match_dup 1)))]
1532 "operands[0] = lowpart_subreg (V4SImode, operands[0],
1533 GET_MODE (operands[0]));"
1534 [(set_attr "mmx_isa" "native,x64_noavx,x64_avx,x64_avx")
1535 (set_attr "type" "mmxcvt,ssemov,ssemov,ssemov")
1536 (set_attr "mode" "DI,TI,TI,TI")])
1537
1538 (define_insn "*mmx_concatv2si"
1539 [(set (match_operand:V2SI 0 "register_operand" "=y,y")
1540 (vec_concat:V2SI
1541 (match_operand:SI 1 "nonimmediate_operand" " 0,rm")
1542 (match_operand:SI 2 "nonimm_or_0_operand" "ym,C")))]
1543 "TARGET_MMX && !TARGET_SSE"
1544 "@
1545 punpckldq\t{%2, %0|%0, %2}
1546 movd\t{%1, %0|%0, %1}"
1547 [(set_attr "type" "mmxcvt,mmxmov")
1548 (set_attr "mode" "DI")])
1549
1550 (define_expand "vec_setv2si"
1551 [(match_operand:V2SI 0 "register_operand")
1552 (match_operand:SI 1 "register_operand")
1553 (match_operand 2 "const_int_operand")]
1554 "TARGET_MMX"
1555 {
1556 ix86_expand_vector_set (false, operands[0], operands[1],
1557 INTVAL (operands[2]));
1558 DONE;
1559 })
1560
1561 ;; Avoid combining registers from different units in a single alternative,
1562 ;; see comment above inline_secondary_memory_needed function in i386.c
1563 (define_insn_and_split "*vec_extractv2si_0"
1564 [(set (match_operand:SI 0 "nonimmediate_operand" "=x,m,y, m,r")
1565 (vec_select:SI
1566 (match_operand:V2SI 1 "nonimmediate_operand" "xm,x,ym,y,m")
1567 (parallel [(const_int 0)])))]
1568 "TARGET_MMX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
1569 "#"
1570 "&& reload_completed"
1571 [(set (match_dup 0) (match_dup 1))]
1572 "operands[1] = gen_lowpart (SImode, operands[1]);")
1573
1574 ;; Avoid combining registers from different units in a single alternative,
1575 ;; see comment above inline_secondary_memory_needed function in i386.c
1576 (define_insn "*vec_extractv2si_1"
1577 [(set (match_operand:SI 0 "nonimmediate_operand" "=y,x,x,y,x,r")
1578 (vec_select:SI
1579 (match_operand:V2SI 1 "nonimmediate_operand" " 0,x,x,o,o,o")
1580 (parallel [(const_int 1)])))]
1581 "TARGET_MMX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
1582 "@
1583 punpckhdq\t%0, %0
1584 %vpshufd\t{$0xe5, %1, %0|%0, %1, 0xe5}
1585 shufps\t{$0xe5, %1, %0|%0, %1, 0xe5}
1586 #
1587 #
1588 #"
1589 [(set_attr "isa" "*,sse2,noavx,*,*,*")
1590 (set_attr "type" "mmxcvt,sseshuf1,sseshuf1,mmxmov,ssemov,imov")
1591 (set (attr "length_immediate")
1592 (if_then_else (eq_attr "alternative" "1,2")
1593 (const_string "1")
1594 (const_string "*")))
1595 (set_attr "prefix" "orig,maybe_vex,orig,orig,orig,orig")
1596 (set_attr "mode" "DI,TI,V4SF,SI,SI,SI")])
1597
1598 (define_split
1599 [(set (match_operand:SI 0 "register_operand")
1600 (vec_select:SI
1601 (match_operand:V2SI 1 "memory_operand")
1602 (parallel [(const_int 1)])))]
1603 "TARGET_MMX && reload_completed"
1604 [(set (match_dup 0) (match_dup 1))]
1605 "operands[1] = adjust_address (operands[1], SImode, 4);")
1606
1607 (define_insn_and_split "*vec_extractv2si_zext_mem"
1608 [(set (match_operand:DI 0 "register_operand" "=y,x,r")
1609 (zero_extend:DI
1610 (vec_select:SI
1611 (match_operand:V2SI 1 "memory_operand" "o,o,o")
1612 (parallel [(match_operand:SI 2 "const_0_to_1_operand")]))))]
1613 "TARGET_64BIT"
1614 "#"
1615 "&& reload_completed"
1616 [(set (match_dup 0) (zero_extend:DI (match_dup 1)))]
1617 {
1618 operands[1] = adjust_address (operands[1], SImode, INTVAL (operands[2]) * 4);
1619 }
1620 [(set_attr "isa" "*,sse2,*")])
1621
1622 (define_expand "vec_extractv2sisi"
1623 [(match_operand:SI 0 "register_operand")
1624 (match_operand:V2SI 1 "register_operand")
1625 (match_operand 2 "const_int_operand")]
1626 "TARGET_MMX"
1627 {
1628 ix86_expand_vector_extract (false, operands[0], operands[1],
1629 INTVAL (operands[2]));
1630 DONE;
1631 })
1632
1633 (define_expand "vec_initv2sisi"
1634 [(match_operand:V2SI 0 "register_operand")
1635 (match_operand 1)]
1636 "TARGET_SSE"
1637 {
1638 ix86_expand_vector_init (false, operands[0], operands[1]);
1639 DONE;
1640 })
1641
1642 (define_expand "vec_setv4hi"
1643 [(match_operand:V4HI 0 "register_operand")
1644 (match_operand:HI 1 "register_operand")
1645 (match_operand 2 "const_int_operand")]
1646 "TARGET_MMX"
1647 {
1648 ix86_expand_vector_set (false, operands[0], operands[1],
1649 INTVAL (operands[2]));
1650 DONE;
1651 })
1652
1653 (define_expand "vec_extractv4hihi"
1654 [(match_operand:HI 0 "register_operand")
1655 (match_operand:V4HI 1 "register_operand")
1656 (match_operand 2 "const_int_operand")]
1657 "TARGET_MMX"
1658 {
1659 ix86_expand_vector_extract (false, operands[0], operands[1],
1660 INTVAL (operands[2]));
1661 DONE;
1662 })
1663
1664 (define_expand "vec_initv4hihi"
1665 [(match_operand:V4HI 0 "register_operand")
1666 (match_operand 1)]
1667 "TARGET_SSE"
1668 {
1669 ix86_expand_vector_init (false, operands[0], operands[1]);
1670 DONE;
1671 })
1672
1673 (define_expand "vec_setv8qi"
1674 [(match_operand:V8QI 0 "register_operand")
1675 (match_operand:QI 1 "register_operand")
1676 (match_operand 2 "const_int_operand")]
1677 "TARGET_MMX"
1678 {
1679 ix86_expand_vector_set (false, operands[0], operands[1],
1680 INTVAL (operands[2]));
1681 DONE;
1682 })
1683
1684 (define_expand "vec_extractv8qiqi"
1685 [(match_operand:QI 0 "register_operand")
1686 (match_operand:V8QI 1 "register_operand")
1687 (match_operand 2 "const_int_operand")]
1688 "TARGET_MMX"
1689 {
1690 ix86_expand_vector_extract (false, operands[0], operands[1],
1691 INTVAL (operands[2]));
1692 DONE;
1693 })
1694
1695 (define_expand "vec_initv8qiqi"
1696 [(match_operand:V8QI 0 "register_operand")
1697 (match_operand 1)]
1698 "TARGET_SSE"
1699 {
1700 ix86_expand_vector_init (false, operands[0], operands[1]);
1701 DONE;
1702 })
1703
1704 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1705 ;;
1706 ;; Miscellaneous
1707 ;;
1708 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1709
1710 (define_expand "mmx_uavgv8qi3"
1711 [(set (match_operand:V8QI 0 "register_operand")
1712 (truncate:V8QI
1713 (lshiftrt:V8HI
1714 (plus:V8HI
1715 (plus:V8HI
1716 (zero_extend:V8HI
1717 (match_operand:V8QI 1 "register_mmxmem_operand"))
1718 (zero_extend:V8HI
1719 (match_operand:V8QI 2 "register_mmxmem_operand")))
1720 (const_vector:V8HI [(const_int 1) (const_int 1)
1721 (const_int 1) (const_int 1)
1722 (const_int 1) (const_int 1)
1723 (const_int 1) (const_int 1)]))
1724 (const_int 1))))]
1725 "(TARGET_MMX || TARGET_MMX_WITH_SSE)
1726 && (TARGET_SSE || TARGET_3DNOW_A)"
1727 "ix86_fixup_binary_operands_no_copy (PLUS, V8QImode, operands);")
1728
1729 (define_insn "*mmx_uavgv8qi3"
1730 [(set (match_operand:V8QI 0 "register_operand" "=y,x,Yv")
1731 (truncate:V8QI
1732 (lshiftrt:V8HI
1733 (plus:V8HI
1734 (plus:V8HI
1735 (zero_extend:V8HI
1736 (match_operand:V8QI 1 "register_mmxmem_operand" "%0,0,Yv"))
1737 (zero_extend:V8HI
1738 (match_operand:V8QI 2 "register_mmxmem_operand" "ym,x,Yv")))
1739 (const_vector:V8HI [(const_int 1) (const_int 1)
1740 (const_int 1) (const_int 1)
1741 (const_int 1) (const_int 1)
1742 (const_int 1) (const_int 1)]))
1743 (const_int 1))))]
1744 "(TARGET_MMX || TARGET_MMX_WITH_SSE)
1745 && (TARGET_SSE || TARGET_3DNOW_A)
1746 && ix86_binary_operator_ok (PLUS, V8QImode, operands)"
1747 {
1748 /* These two instructions have the same operation, but their encoding
1749 is different. Prefer the one that is de facto standard. */
1750 if (TARGET_MMX_WITH_SSE && TARGET_AVX)
1751 return "vpavgb\t{%2, %1, %0|%0, %1, %2}";
1752 else if (TARGET_SSE || TARGET_3DNOW_A)
1753 return "pavgb\t{%2, %0|%0, %2}";
1754 else
1755 return "pavgusb\t{%2, %0|%0, %2}";
1756 }
1757 [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
1758 (set_attr "type" "mmxshft,sseiadd,sseiadd")
1759 (set (attr "prefix_extra")
1760 (if_then_else
1761 (not (ior (match_test "TARGET_SSE")
1762 (match_test "TARGET_3DNOW_A")))
1763 (const_string "1")
1764 (const_string "*")))
1765 (set_attr "mode" "DI,TI,TI")])
1766
1767 (define_expand "mmx_uavgv4hi3"
1768 [(set (match_operand:V4HI 0 "register_operand")
1769 (truncate:V4HI
1770 (lshiftrt:V4SI
1771 (plus:V4SI
1772 (plus:V4SI
1773 (zero_extend:V4SI
1774 (match_operand:V4HI 1 "register_mmxmem_operand"))
1775 (zero_extend:V4SI
1776 (match_operand:V4HI 2 "register_mmxmem_operand")))
1777 (const_vector:V4SI [(const_int 1) (const_int 1)
1778 (const_int 1) (const_int 1)]))
1779 (const_int 1))))]
1780 "(TARGET_MMX || TARGET_MMX_WITH_SSE)
1781 && (TARGET_SSE || TARGET_3DNOW_A)"
1782 "ix86_fixup_binary_operands_no_copy (PLUS, V4HImode, operands);")
1783
1784 (define_insn "*mmx_uavgv4hi3"
1785 [(set (match_operand:V4HI 0 "register_operand" "=y,x,Yv")
1786 (truncate:V4HI
1787 (lshiftrt:V4SI
1788 (plus:V4SI
1789 (plus:V4SI
1790 (zero_extend:V4SI
1791 (match_operand:V4HI 1 "register_mmxmem_operand" "%0,0,Yv"))
1792 (zero_extend:V4SI
1793 (match_operand:V4HI 2 "register_mmxmem_operand" "ym,x,Yv")))
1794 (const_vector:V4SI [(const_int 1) (const_int 1)
1795 (const_int 1) (const_int 1)]))
1796 (const_int 1))))]
1797 "(TARGET_MMX || TARGET_MMX_WITH_SSE)
1798 && (TARGET_SSE || TARGET_3DNOW_A)
1799 && ix86_binary_operator_ok (PLUS, V4HImode, operands)"
1800 "@
1801 pavgw\t{%2, %0|%0, %2}
1802 pavgw\t{%2, %0|%0, %2}
1803 vpavgw\t{%2, %1, %0|%0, %1, %2}"
1804 [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
1805 (set_attr "type" "mmxshft,sseiadd,sseiadd")
1806 (set_attr "mode" "DI,TI,TI")])
1807
1808 (define_insn "mmx_psadbw"
1809 [(set (match_operand:V1DI 0 "register_operand" "=y,x,Yv")
1810 (unspec:V1DI [(match_operand:V8QI 1 "register_operand" "0,0,Yv")
1811 (match_operand:V8QI 2 "register_mmxmem_operand" "ym,x,Yv")]
1812 UNSPEC_PSADBW))]
1813 "(TARGET_MMX || TARGET_MMX_WITH_SSE)
1814 && (TARGET_SSE || TARGET_3DNOW_A)"
1815 "@
1816 psadbw\t{%2, %0|%0, %2}
1817 psadbw\t{%2, %0|%0, %2}
1818 vpsadbw\t{%2, %1, %0|%0, %1, %2}"
1819 [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
1820 (set_attr "type" "mmxshft,sseiadd,sseiadd")
1821 (set_attr "mode" "DI,TI,TI")])
1822
1823 (define_insn_and_split "mmx_pmovmskb"
1824 [(set (match_operand:SI 0 "register_operand" "=r,r")
1825 (unspec:SI [(match_operand:V8QI 1 "register_operand" "y,x")]
1826 UNSPEC_MOVMSK))]
1827 "(TARGET_MMX || TARGET_MMX_WITH_SSE)
1828 && (TARGET_SSE || TARGET_3DNOW_A)"
1829 "@
1830 pmovmskb\t{%1, %0|%0, %1}
1831 #"
1832 "TARGET_MMX_WITH_SSE && reload_completed"
1833 [(set (match_dup 0)
1834 (unspec:SI [(match_dup 1)] UNSPEC_MOVMSK))
1835 (set (match_dup 0)
1836 (zero_extend:SI (match_dup 2)))]
1837 {
1838 /* Generate SSE pmovmskb and zero-extend from QImode to SImode. */
1839 operands[1] = lowpart_subreg (V16QImode, operands[1],
1840 GET_MODE (operands[1]));
1841 operands[2] = lowpart_subreg (QImode, operands[0],
1842 GET_MODE (operands[0]));
1843 }
1844 [(set_attr "mmx_isa" "native,x64")
1845 (set_attr "type" "mmxcvt,ssemov")
1846 (set_attr "mode" "DI,TI")])
1847
1848 (define_expand "mmx_maskmovq"
1849 [(set (match_operand:V8QI 0 "memory_operand")
1850 (unspec:V8QI [(match_operand:V8QI 1 "register_operand")
1851 (match_operand:V8QI 2 "register_operand")
1852 (match_dup 0)]
1853 UNSPEC_MASKMOV))]
1854 "TARGET_SSE || TARGET_3DNOW_A")
1855
1856 (define_insn "*mmx_maskmovq"
1857 [(set (mem:V8QI (match_operand:P 0 "register_operand" "D"))
1858 (unspec:V8QI [(match_operand:V8QI 1 "register_operand" "y")
1859 (match_operand:V8QI 2 "register_operand" "y")
1860 (mem:V8QI (match_dup 0))]
1861 UNSPEC_MASKMOV))]
1862 "TARGET_SSE || TARGET_3DNOW_A"
1863 ;; @@@ check ordering of operands in intel/nonintel syntax
1864 "maskmovq\t{%2, %1|%1, %2}"
1865 [(set_attr "type" "mmxcvt")
1866 (set_attr "znver1_decode" "vector")
1867 (set_attr "mode" "DI")])
1868
1869 (define_int_iterator EMMS
1870 [(UNSPECV_EMMS "TARGET_MMX")
1871 (UNSPECV_FEMMS "TARGET_3DNOW")])
1872
1873 (define_int_attr emms
1874 [(UNSPECV_EMMS "emms")
1875 (UNSPECV_FEMMS "femms")])
1876
1877 (define_insn "mmx_<emms>"
1878 [(unspec_volatile [(const_int 0)] EMMS)
1879 (clobber (reg:XF ST0_REG))
1880 (clobber (reg:XF ST1_REG))
1881 (clobber (reg:XF ST2_REG))
1882 (clobber (reg:XF ST3_REG))
1883 (clobber (reg:XF ST4_REG))
1884 (clobber (reg:XF ST5_REG))
1885 (clobber (reg:XF ST6_REG))
1886 (clobber (reg:XF ST7_REG))
1887 (clobber (reg:DI MM0_REG))
1888 (clobber (reg:DI MM1_REG))
1889 (clobber (reg:DI MM2_REG))
1890 (clobber (reg:DI MM3_REG))
1891 (clobber (reg:DI MM4_REG))
1892 (clobber (reg:DI MM5_REG))
1893 (clobber (reg:DI MM6_REG))
1894 (clobber (reg:DI MM7_REG))]
1895 ""
1896 "<emms>"
1897 [(set_attr "type" "mmx")
1898 (set_attr "modrm" "0")
1899 (set_attr "memory" "none")])