]> git.ipfire.org Git - thirdparty/gcc.git/commitdiff
MMX/SSE patterns for i386
authorBernd Schmidt <bernds@redhat.co.uk>
Fri, 8 Sep 2000 17:36:33 +0000 (17:36 +0000)
committerBernd Schmidt <crux@gcc.gnu.org>
Fri, 8 Sep 2000 17:36:33 +0000 (17:36 +0000)
From-SVN: r36270

gcc/ChangeLog
gcc/config/i386/i386-protos.h
gcc/config/i386/i386.c
gcc/config/i386/i386.h
gcc/config/i386/i386.md

index 508e8a28e4a06fbddfedd0c5474bc0ce22bcba6a..c92af028ae090a594f3b31dca6f4ac3f86ed507c 100644 (file)
@@ -1,3 +1,39 @@
+2000-09-08  Bernd Schmidt  <bernds@redhat.co.uk>
+
+       * i386-protos.h (sse_comparison_operator, mmx_reg_operand): Declare
+       new functions.
+       * i386.c (sse_comparison_operator, mmx_reg_operand): New functions.
+       * i386.md (attr "type"): Add sse and mmx types.
+       (attr "memory"): Handle them without a crash.
+       (movsi_1, movdi_2): Allow MMX regs.
+       (movdi splits): Don't split moves involving MMX regs.
+       (setcc_4): Remove '*' from pattern name so we get a gen_setcc4.
+       (movv4sf_internal, movv4si_internal, movv8qi_internal,
+       movv4hi_internal, movv2si_internal, movv8qi, movv4hi, movv2si,
+       movv4sf, movv4si, pushv4sf, pushv4si, pushv8qi, pushv4hi, pushv2si,
+       sse_movaps, sse_movups, sse_movmskps, mmx_pmovmskb, mmx_maskmovq,
+       sse_movntv4sf, sse_movntdi, sse_movhlps, sse_movlhps, sse_movhps,
+       sse_movlps, sse_loadss, sse_movss, sse_storess, sse_shufps,
+       addv4sf3, vmaddv4sf3, subv4sf3, vmsubv4sf3, mulv4sf3, vmmulv4sf3,
+       divv4sf3, vmdivv4sf3, rcpv4sf2, vmrcpv4sf2, rsqrtv4sf2, vmrsqrtv4sf2,
+       sqrtv4sf2, vmsqrtv4sf2, sse_andti3, sse_nandti3, sse_iorti3,
+       sse_xorti3, maskcmpv4sf3, maskncmpv4sf3, vmmaskcmpv4sf3,
+       vmmaskncmpv4sf3, sse_comi, sse_ucomi, sse_unpckhps, sse_unpcklps,
+       smaxv4sf3, vmsmaxv4sf3, sminv4sf3, vmsminv4sf3, cvtpi2ps, cvtps2pi,
+       cvttps2pi, cvtsi2ss, cvtss2si, cvttss2si, addv8qi3, addv4hi3,
+       addv2si3, ssaddv8qi3, ssaddv4hi3, usaddv8qi3, usaddv4hi3, subv8qi3,
+       subv4hi3, subv2si3, sssubv8qi3, sssubv4hi3, ussubv8qi3, ussubv4hi3,
+       mulv4hi3, smulv4hi3_highpart, umulv4hi3_highpart, mmx_pmaddwd,
+       mmx_iordi3, mmx_xordi3, mmx_anddi3, mmx_nanddi3, mmx_uavgv8qi3,
+       mmx_uavgv4hi3, mmx_psadbw, mmx_pinsrw, mmx_pextrw, mmx_pshufw,
+       eqv8qi3, eqv4hi3, eqv2si3, gtv8qi3, gtv4hi3, gtv2si3, umaxv8qi3,
+       smaxv4hi3, uminv8qi3, sminv4hi3, ashrv4hi3, ashrv2si3, lshrv4hi3,
+       lshrv2si3, mmx_lshrdi3, ashlv4hi3, ashlv2si3, mmx_ashldi3,
+       mmx_packsswb, mmx_packssdw, mmx_packuswb, mmx_punpckhbw,
+       mmx_punpckhwd, mmx_punpckhdq, mmx_punpcklbw, mmx_punpcklwd,
+       mmx_punpckldq, emms, sfence, ldmxcsr, prefetch, stmxcsr, sse_clrti,
+       mmx_clrdi): New patterns.
+
 2000-09-08  Richard Earnshaw  <rearnsha@arm.com>
 
        * arm.c: Don't include tm.h directly.
index fb86b1b6f208ffe0110182b30d0f9f55beefd0b4..7ffb299ee88f34da824a4442ddc068b4b794b3ba 100644 (file)
@@ -51,11 +51,13 @@ extern int const1_operand PARAMS ((rtx, enum machine_mode));
 extern int const248_operand PARAMS ((rtx, enum machine_mode));
 extern int incdec_operand PARAMS ((rtx, enum machine_mode));
 extern int reg_no_sp_operand PARAMS ((rtx, enum machine_mode));
+extern int mmx_reg_operand PARAMS ((rtx, enum machine_mode));
 extern int general_no_elim_operand PARAMS ((rtx, enum machine_mode));
 extern int nonmemory_no_elim_operand PARAMS ((rtx, enum machine_mode));
 extern int q_regs_operand PARAMS ((rtx, enum machine_mode));
 extern int non_q_regs_operand PARAMS ((rtx, enum machine_mode));
 extern int no_comparison_operator PARAMS ((rtx, enum machine_mode));
+extern int sse_comparison_operator PARAMS ((rtx, enum machine_mode));
 extern int fcmov_comparison_operator PARAMS ((rtx, enum machine_mode));
 extern int uno_comparison_operator PARAMS ((rtx, enum machine_mode));
 extern int cmp_fp_expander_operand PARAMS ((rtx, enum machine_mode));
index a1746a15695388951b69815daa4f5d45ad6bf09d..7485d1904bd718086ec7b5c93d3a13c72bc76218 100644 (file)
@@ -1174,6 +1174,14 @@ reg_no_sp_operand (op, mode)
   return register_operand (op, mode);
 }
 
+int
+mmx_reg_operand (op, mode)
+     register rtx op;
+     enum machine_mode mode;
+{
+  return MMX_REG_P (op);
+}
+
 /* Return false if this is any eliminable register.  Otherwise
    general_operand.  */
 
@@ -1264,6 +1272,17 @@ no_comparison_operator (op, mode)
     }
 }
 
+/* Return 1 if OP is a comparison that can be used in the CMPSS/CMPPS
+   insns.  */
+int
+sse_comparison_operator (op, mode)
+     rtx op;
+     enum machine_mode mode ATTRIBUTE_UNUSED;
+{
+  enum rtx_code code = GET_CODE (op);
+  return code == EQ || code == LT || code == LE || code == UNORDERED;
+}
+
 /* Return 1 if OP is a comparison operator that can be issued by fcmov.  */
 
 int
index ff396df357f78829c4684a9d8d1b8b38d6258632..ab0b5369012f0bfd8a63d7513eb49a8a6ce53631 100644 (file)
@@ -2584,6 +2584,7 @@ do { long l;                                              \
   {"const1_operand", {CONST_INT}},                                     \
   {"const248_operand", {CONST_INT}},                                   \
   {"incdec_operand", {CONST_INT}},                                     \
+  {"mmx_reg_operand", {REG}},                                          \
   {"reg_no_sp_operand", {SUBREG, REG}},                                        \
   {"general_no_elim_operand", {CONST_INT, CONST_DOUBLE, CONST,         \
                        SYMBOL_REF, LABEL_REF, SUBREG, REG, MEM}},      \
@@ -2592,6 +2593,7 @@ do { long l;                                              \
   {"non_q_regs_operand", {SUBREG, REG}},                               \
   {"no_comparison_operator", {EQ, NE, LT, GE, LTU, GTU, LEU, GEU}},    \
   {"fcmov_comparison_operator", {EQ, NE, LTU, GTU, LEU, GEU}},         \
+  {"sse_comparison_operator", {EQ, LT, LE, UNORDERED }},               \
   {"uno_comparison_operator", {EQ, NE, LE, LT, GE, GT, LEU, LTU, GEU,  \
                               GTU, UNORDERED, ORDERED}},               \
   {"cmp_fp_expander_operand", {CONST_DOUBLE, SUBREG, REG, MEM}},       \
index 72fd72088c0db545f7e433aaf25f0a3f6b6ad959..51386bc011489f1d59ef9014dc0e898567e8f3eb 100644 (file)
 ;; 9  This is an `fnstsw' operation.
 ;; 10 This is a `sahf' operation.
 ;; 11 This is a `fstcw' operation
-;;
+
+;; For SSE/MMX support:
+;; 30 This is `fix', guaranteed to be truncating.
+;; 31 This is a `emms' operation.
+;; 32 This is a `maskmov' operation.
+;; 33 This is a `movmsk' operation.
+;; 34 This is a `non-temporal' move.
+;; 35 This is a `prefetch' operation.
+;; 36 This is used to distinguish COMISS from UCOMISS.
+;; 37 This is a `ldmxcsr' operation.
+;; 38 This is a forced `movaps' instruction (rather than whatever movti does)
+;; 39 This is a forced `movups' instruction (rather than whatever movti does)
+;; 40 This is a `stmxcsr' operation.
+;; 41 This is a `shuffle' operation.
+;; 42 This is a `rcp' operation.
+;; 43 This is a `rsqsrt' operation.
+;; 44 This is a `sfence' operation.
+;; 45 This is a noop to prevent excessive combiner cleverness.
+
 ;; Insns whose names begin with "x86_" are emitted by gen_FOO calls
 ;; from i386.c.
 
 ;; A basic instruction type.  Refinements due to arguments to be
 ;; provided in other attributes.
 (define_attr "type"
-  "other,multi,alu1,negnot,alu,icmp,test,imov,imovx,lea,incdec,ishift,imul,idiv,ibr,setcc,push,pop,call,callv,icmov,fmov,fop,fop1,fsgn,fmul,fdiv,fpspc,fcmov,fcmp,fxch,str,cld"
+  "other,multi,alu1,negnot,alu,icmp,test,imov,imovx,lea,incdec,ishift,imul,idiv,ibr,setcc,push,pop,call,callv,icmov,fmov,fop,fop1,fsgn,fmul,fdiv,fpspc,fcmov,fcmp,fxch,str,cld,sse,mmx"
   (const_string "other"))
 
 ;; Main data type used by the insn
           (const_string "store")
         (match_operand 1 "memory_operand" "")
           (const_string "load")
-        (and (eq_attr "type" "!icmp,test,alu1,negnot,fop1,fsgn,imov,imovx,fmov,fcmp")
+        (and (eq_attr "type" "!icmp,test,alu1,negnot,fop1,fsgn,imov,imovx,fmov,fcmp,sse,mmx")
              (match_operand 2 "memory_operand" ""))
           (const_string "load")
         (and (eq_attr "type" "icmov")
    (set_attr "length_immediate" "1")])
 
 (define_insn "*movsi_1"
-  [(set (match_operand:SI 0 "nonimmediate_operand" "=*a,r,*a,m")
-       (match_operand:SI 1 "general_operand" "im,rinm,rinm,rin"))]
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=*a,r,*a,m,!*y,!r")
+       (match_operand:SI 1 "general_operand" "im,rinm,rinm,rin,r,*y"))]
   "GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM"
   "*
 {
   switch (get_attr_type (insn))
     {
+    case TYPE_MMX:
+      return \"movd\\t{%1, %0|%0, %1}\";
+
     case TYPE_LEA:
       return \"lea{l}\\t{%1, %0|%0, %1}\";
+
     default:
       if (flag_pic && SYMBOLIC_CONST (operands[1]))
        abort();
     }
 }"
   [(set (attr "type")
-     (cond [(and (ne (symbol_ref "flag_pic") (const_int 0))
+     (cond [(ior (match_operand:SI 0 "mmx_reg_operand" "")
+                (match_operand:SI 1 "mmx_reg_operand" ""))
+             (const_string "mmx")
+           (and (ne (symbol_ref "flag_pic") (const_int 0))
                 (match_operand:SI 1 "symbolic_operand" ""))
              (const_string "lea")
           ]
           (const_string "imov")))
-   (set_attr "modrm" "0,*,0,*")
+   (set_attr "modrm" "0,*,0,*,*,*")
    (set_attr "mode" "SI")])
 
 (define_insn "*swapsi"
   "#")
 
 (define_insn "*movdi_2"
-  [(set (match_operand:DI 0 "nonimmediate_operand" "=r,o")
-       (match_operand:DI 1 "general_operand" "riFo,riF"))]
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=r,o,!m*y,!*y")
+       (match_operand:DI 1 "general_operand" "riFo,riF,*y,m"))]
   "GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM"
-  "#")
+  "@
+   #
+   #
+   movq\\t{%1, %0|%0, %1}
+   movq\\t{%1, %0|%0, %1}"
+  [(set_attr "type" "*,*,mmx,mmx")])
 
 (define_split
   [(set (match_operand:DI 0 "push_operand" "")
         (match_operand:DI 1 "general_operand" ""))]
-  "reload_completed"
+  "reload_completed && ! MMX_REG_P (operands[1])"
   [(const_int 0)]
   "if (!ix86_split_long_move (operands)) abort (); DONE;")
 
 (define_split
   [(set (match_operand:DI 0 "nonimmediate_operand" "")
         (match_operand:DI 1 "general_operand" ""))]
-  "reload_completed"
+  "reload_completed && ! MMX_REG_P (operands[0]) && ! MMX_REG_P (operands[1])"
   [(set (match_dup 2) (match_dup 5))
    (set (match_dup 3) (match_dup 6))]
   "if (ix86_split_long_move (operands)) DONE;")
   [(set_attr "type" "setcc")
    (set_attr "mode" "QI")])
 
-(define_insn "*setcc_4"
+(define_insn "setcc_4"
   [(set (strict_low_part (match_operand:QI 0 "nonimmediate_operand" "+qm"))
        (match_operator:QI 1 "uno_comparison_operator"
          [(reg:CC 17) (const_int 0)]))]
                             CODE_LABEL_NUMBER (operands[2]));
   RET;
 }")
+
+       ;; Pentium III SIMD instructions.
+
+;; Moves for SSE/MMX regs.
+
+(define_insn "movv4sf_internal"
+  [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,m")
+       (match_operand:V4SF 1 "general_operand" "xm,x"))]
+  "TARGET_SSE"
+  ;; @@@ let's try to use movaps here.
+  "movaps\\t{%1, %0|%0, %1}"
+  [(set_attr "type" "sse")])
+
+(define_insn "movv4si_internal"
+  [(set (match_operand:V4SI 0 "nonimmediate_operand" "=x,m")
+       (match_operand:V4SI 1 "general_operand" "xm,x"))]
+  "TARGET_SSE"
+  ;; @@@ let's try to use movaps here.
+  "movaps\\t{%1, %0|%0, %1}"
+  [(set_attr "type" "sse")])
+
+(define_insn "movv8qi_internal"
+  [(set (match_operand:V8QI 0 "nonimmediate_operand" "=y,m")
+       (match_operand:V8QI 1 "general_operand" "ym,y"))]
+  "TARGET_MMX"
+  "movq\\t{%1, %0|%0, %1}"
+  [(set_attr "type" "mmx")])
+
+(define_insn "movv4hi_internal"
+  [(set (match_operand:V4HI 0 "nonimmediate_operand" "=y,m")
+       (match_operand:V4HI 1 "general_operand" "ym,y"))]
+  "TARGET_MMX"
+  "movq\\t{%1, %0|%0, %1}"
+  [(set_attr "type" "mmx")])
+
+(define_insn "movv2si_internal"
+  [(set (match_operand:V2SI 0 "nonimmediate_operand" "=y,m")
+       (match_operand:V2SI 1 "general_operand" "ym,y"))]
+  "TARGET_MMX"
+  "movq\\t{%1, %0|%0, %1}"
+  [(set_attr "type" "mmx")])
+
+(define_expand "movti"
+  [(set (match_operand:TI 0 "general_operand" "")
+       (match_operand:TI 1 "general_operand" ""))]
+  "TARGET_SSE"
+  "
+{
+  /* For constants other than zero into memory.  We do not know how the
+     instructions used to build constants modify the upper 64 bits
+     of the register, once we have that information we may be able
+     to handle some of them more efficiently.  */
+  if ((reload_in_progress | reload_completed) == 0
+      && register_operand (operands[0], TImode)
+      && CONSTANT_P (operands[1]))
+    {
+      rtx addr = gen_reg_rtx (Pmode);
+
+      emit_move_insn (addr, XEXP (force_const_mem (TImode, operands[1]), 0));
+      operands[1] = gen_rtx_MEM (TImode, addr);
+    }
+
+  /* Make operand1 a register if it isn't already.  */
+  if ((reload_in_progress | reload_completed) == 0
+      && !register_operand (operands[0], TImode)
+      && !register_operand (operands[1], TImode)
+      && operands[1] != CONST0_RTX (TImode))
+    {
+      rtx temp = force_reg (TImode, operands[1]);
+      emit_move_insn (operands[0], temp);
+      DONE;
+    }
+}")
+
+(define_expand "movv4sf"
+  [(set (match_operand:V4SF 0 "general_operand" "")
+       (match_operand:V4SF 1 "general_operand" ""))]
+  "TARGET_SSE"
+  "
+{
+  /* For constants other than zero into memory.  We do not know how the
+     instructions used to build constants modify the upper 64 bits
+     of the register, once we have that information we may be able
+     to handle some of them more efficiently.  */
+  if ((reload_in_progress | reload_completed) == 0
+      && register_operand (operands[0], V4SFmode)
+      && CONSTANT_P (operands[1]))
+    {
+      rtx addr = gen_reg_rtx (Pmode);
+
+      emit_move_insn (addr, XEXP (force_const_mem (V4SFmode, operands[1]), 0));
+      operands[1] = gen_rtx_MEM (V4SFmode, addr);
+    }
+
+  /* Make operand1 a register if it isn't already.  */
+  if ((reload_in_progress | reload_completed) == 0
+      && !register_operand (operands[0], V4SFmode)
+      && !register_operand (operands[1], V4SFmode)
+      && operands[1] != CONST0_RTX (V4SFmode))
+    {
+      rtx temp = force_reg (V4SFmode, operands[1]);
+      emit_move_insn (operands[0], temp);
+      DONE;
+    }
+}")
+
+(define_expand "movv4si"
+  [(set (match_operand:V4SI 0 "general_operand" "")
+       (match_operand:V4SI 1 "general_operand" ""))]
+  "TARGET_MMX"
+  "
+{
+  /* For constants other than zero into memory.  We do not know how the
+     instructions used to build constants modify the upper 64 bits
+     of the register, once we have that information we may be able
+     to handle some of them more efficiently.  */
+  if ((reload_in_progress | reload_completed) == 0
+      && register_operand (operands[0], V4SImode)
+      && CONSTANT_P (operands[1]))
+    {
+      rtx addr = gen_reg_rtx (Pmode);
+
+      emit_move_insn (addr, XEXP (force_const_mem (V4SImode, operands[1]), 0));
+      operands[1] = gen_rtx_MEM (V4SImode, addr);
+    }
+
+  /* Make operand1 a register if it isn't already.  */
+  if ((reload_in_progress | reload_completed) == 0
+      && !register_operand (operands[0], V4SImode)
+      && !register_operand (operands[1], V4SImode)
+      && operands[1] != CONST0_RTX (V4SImode))
+    {
+      rtx temp = force_reg (V4SImode, operands[1]);
+      emit_move_insn (operands[0], temp);
+      DONE;
+    }
+}")
+
+(define_expand "movv2si"
+  [(set (match_operand:V2SI 0 "general_operand" "")
+       (match_operand:V2SI 1 "general_operand" ""))]
+  "TARGET_MMX"
+  "
+{
+  /* For constants other than zero into memory.  We do not know how the
+     instructions used to build constants modify the upper 64 bits
+     of the register, once we have that information we may be able
+     to handle some of them more efficiently.  */
+  if ((reload_in_progress | reload_completed) == 0
+      && register_operand (operands[0], V2SImode)
+      && CONSTANT_P (operands[1]))
+    {
+      rtx addr = gen_reg_rtx (Pmode);
+
+      emit_move_insn (addr, XEXP (force_const_mem (V2SImode, operands[1]), 0));
+      operands[1] = gen_rtx_MEM (V2SImode, addr);
+    }
+
+  /* Make operand1 a register if it isn't already.  */
+  if ((reload_in_progress | reload_completed) == 0
+      && !register_operand (operands[0], V2SImode)
+      && !register_operand (operands[1], V2SImode)
+      && operands[1] != CONST0_RTX (V2SImode))
+    {
+      rtx temp = force_reg (V2SImode, operands[1]);
+      emit_move_insn (operands[0], temp);
+      DONE;
+    }
+}")
+
+(define_expand "movv4hi"
+  [(set (match_operand:V4HI 0 "general_operand" "")
+       (match_operand:V4HI 1 "general_operand" ""))]
+  "TARGET_MMX"
+  "
+{
+  /* For constants other than zero into memory.  We do not know how the
+     instructions used to build constants modify the upper 64 bits
+     of the register, once we have that information we may be able
+     to handle some of them more efficiently.  */
+  if ((reload_in_progress | reload_completed) == 0
+      && register_operand (operands[0], V4HImode)
+      && CONSTANT_P (operands[1]))
+    {
+      rtx addr = gen_reg_rtx (Pmode);
+
+      emit_move_insn (addr, XEXP (force_const_mem (V4HImode, operands[1]), 0));
+      operands[1] = gen_rtx_MEM (V4HImode, addr);
+    }
+
+  /* Make operand1 a register if it isn't already.  */
+  if ((reload_in_progress | reload_completed) == 0
+      && !register_operand (operands[0], V4HImode)
+      && !register_operand (operands[1], V4HImode)
+      && operands[1] != CONST0_RTX (V4HImode))
+    {
+      rtx temp = force_reg (V4HImode, operands[1]);
+      emit_move_insn (operands[0], temp);
+      DONE;
+    }
+}")
+
+(define_expand "movv8qi"
+  [(set (match_operand:V8QI 0 "general_operand" "")
+       (match_operand:V8QI 1 "general_operand" ""))]
+  "TARGET_MMX"
+  "
+{
+  /* For constants other than zero into memory.  We do not know how the
+     instructions used to build constants modify the upper 64 bits
+     of the register, once we have that information we may be able
+     to handle some of them more efficiently.  */
+  if ((reload_in_progress | reload_completed) == 0
+      && register_operand (operands[0], V8QImode)
+      && CONSTANT_P (operands[1]))
+    {
+      rtx addr = gen_reg_rtx (Pmode);
+
+      emit_move_insn (addr, XEXP (force_const_mem (V8QImode, operands[1]), 0));
+      operands[1] = gen_rtx_MEM (V8QImode, addr);
+    }
+
+  /* Make operand1 a register if it isn't already.  */
+  if ((reload_in_progress | reload_completed) == 0
+      && !register_operand (operands[0], V8QImode)
+      && !register_operand (operands[1], V8QImode)
+      && operands[1] != CONST0_RTX (V8QImode))
+    {
+      rtx temp = force_reg (V8QImode, operands[1]);
+      emit_move_insn (operands[0], temp);
+      DONE;
+    }
+}")
+
+(define_insn_and_split "*pushti"
+  [(set (match_operand:TI 0 "push_operand" "=<")
+       (match_operand:TI 1 "nonmemory_operand" "x"))]
+  "TARGET_SSE"
+  "#"
+  ""
+  [(set (reg:SI 7) (plus:SI (reg:SI 7) (const_int -16)))
+   (set (mem:TI (reg:SI 7)) (match_dup 1))]
+  ""
+  [(set_attr "type" "sse")])
+
+(define_insn_and_split "*pushv4sf"
+  [(set (match_operand:V4SF 0 "push_operand" "=<")
+       (match_operand:V4SF 1 "nonmemory_operand" "x"))]
+  "TARGET_SSE"
+  "#"
+  ""
+  [(set (reg:SI 7) (plus:SI (reg:SI 7) (const_int -16)))
+   (set (mem:V4SF (reg:SI 7)) (match_dup 1))]
+  ""
+  [(set_attr "type" "sse")])
+
+(define_insn_and_split "*pushv4si"
+  [(set (match_operand:V4SI 0 "push_operand" "=<")
+       (match_operand:V4SI 1 "nonmemory_operand" "x"))]
+  "TARGET_SSE"
+  "#"
+  ""
+  [(set (reg:SI 7) (plus:SI (reg:SI 7) (const_int -16)))
+   (set (mem:V4SI (reg:SI 7)) (match_dup 1))]
+  ""
+  [(set_attr "type" "sse")])
+
+(define_insn_and_split "*pushv2si"
+  [(set (match_operand:V2SI 0 "push_operand" "=<")
+       (match_operand:V2SI 1 "nonmemory_operand" "y"))]
+  "TARGET_MMX"
+  "#"
+  ""
+  [(set (reg:SI 7) (plus:SI (reg:SI 7) (const_int -8)))
+   (set (mem:V2SI (reg:SI 7)) (match_dup 1))]
+  ""
+  [(set_attr "type" "mmx")])
+
+(define_insn_and_split "*pushv4hi"
+  [(set (match_operand:V4HI 0 "push_operand" "=<")
+       (match_operand:V4HI 1 "nonmemory_operand" "y"))]
+  "TARGET_MMX"
+  "#"
+  ""
+  [(set (reg:SI 7) (plus:SI (reg:SI 7) (const_int -8)))
+   (set (mem:V4HI (reg:SI 7)) (match_dup 1))]
+  ""
+  [(set_attr "type" "mmx")])
+
+(define_insn_and_split "*pushv8qi"
+  [(set (match_operand:V8QI 0 "push_operand" "=<")
+       (match_operand:V8QI 1 "nonmemory_operand" "y"))]
+  "TARGET_MMX"
+  "#"
+  ""
+  [(set (reg:SI 7) (plus:SI (reg:SI 7) (const_int -8)))
+   (set (mem:V8QI (reg:SI 7)) (match_dup 1))]
+  ""
+  [(set_attr "type" "mmx")])
+
+(define_insn "movti_internal"
+  [(set (match_operand:TI 0 "nonimmediate_operand" "=x,m")
+       (match_operand:TI 1 "general_operand" "xm,x"))]
+  "TARGET_SSE"
+  "@
+   movaps\\t{%1, %0|%0, %1}
+   movaps\\t{%1, %0|%0, %1}"
+  [(set_attr "type" "sse")])
+
+;; These two patterns are useful for specifying exactly whether to use
+;; movaps or movups
+(define_insn "sse_movaps"
+  [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,m")
+       (unspec:V4SF [(match_operand:V4SF 1 "general_operand" "xm,x")] 38))]
+  "TARGET_SSE"
+  "@
+   movaps\\t{%1, %0|%0, %1}
+   movaps\\t{%1, %0|%0, %1}"
+  [(set_attr "type" "sse")])
+
+(define_insn "sse_movups"
+  [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,m")
+       (unspec:V4SF [(match_operand:V4SF 1 "general_operand" "xm,x")] 39))]
+  "TARGET_SSE"
+  "@
+   movups\\t{%1, %0|%0, %1}
+   movups\\t{%1, %0|%0, %1}"
+  [(set_attr "type" "sse")])
+
+
+;; SSE Strange Moves.
+
+(define_insn "sse_movmskps"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+       (unspec:SI [(match_operand:V4SF 1 "register_operand" "x")] 33))]
+  "TARGET_SSE"
+  "movmskps\\t{%1, %0|%0, %1}"
+  [(set_attr "type" "sse")])
+
+(define_insn "mmx_pmovmskb"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+       (unspec:SI [(match_operand:V8QI 1 "register_operand" "y")] 33))]
+  "TARGET_SSE"
+  "pmovmskb\\t{%1, %0|%0, %1}"
+  [(set_attr "type" "sse")])
+
+(define_insn "mmx_maskmovq"
+  [(set (mem:V8QI (match_operand:SI 0 "register_operand" "D"))
+       (unspec:V8QI [(match_operand:V8QI 1 "register_operand" "y")
+                     (match_operand:V8QI 2 "register_operand" "y")] 32))]
+  "TARGET_SSE"
+  ;; @@@ check ordering of operands in intel/nonintel syntax
+  "maskmovq\\t{%2, %1|%1, %2}"
+  [(set_attr "type" "sse")])
+
+(define_insn "sse_movntv4sf"
+  [(set (match_operand:V4SF 0 "memory_operand" "=m")
+       (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "x")] 34))]
+  "TARGET_SSE"
+  "movntps\\t{%1, %0|%0, %1}"
+  [(set_attr "type" "sse")])
+
+(define_insn "sse_movntdi"
+  [(set (match_operand:DI 0 "memory_operand" "=m")
+       (unspec:DI [(match_operand:DI 1 "register_operand" "x")] 34))]
+  "TARGET_SSE"
+  "movntq\\t{%1, %0|%0, %1}"
+  [(set_attr "type" "sse")])
+
+(define_insn "sse_movhlps"
+  [(set (match_operand:V4SF 0 "register_operand" "=x")
+       (vec_merge:V4SF
+        (match_operand:V4SF 1 "register_operand" "0")
+        (vec_select:V4SF (match_operand:V4SF 2 "register_operand" "x")
+                         (parallel [(const_int 2)
+                                    (const_int 3)
+                                    (const_int 0)
+                                    (const_int 1)]))
+        (const_int 3)))]
+  "TARGET_SSE"
+  "movhlps\\t{%2, %0|%0, %2}"
+  [(set_attr "type" "sse")])
+
+(define_insn "sse_movlhps"
+  [(set (match_operand:V4SF 0 "register_operand" "=x")
+       (vec_merge:V4SF
+        (match_operand:V4SF 1 "register_operand" "0")
+        (vec_select:V4SF (match_operand:V4SF 2 "register_operand" "x")
+                         (parallel [(const_int 2)
+                                    (const_int 3)
+                                    (const_int 0)
+                                    (const_int 1)]))
+        (const_int 12)))]
+  "TARGET_SSE"
+  "movlhps\\t{%2, %0|%0, %2}"
+  [(set_attr "type" "sse")])
+
+(define_insn "sse_movhps"
+  [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,m")
+       (vec_merge:V4SF
+        (match_operand:V4SF 1 "nonimmediate_operand" "0,0")
+        (match_operand:V4SF 2 "nonimmediate_operand" "m,x")
+        (const_int 12)))]
+  "TARGET_SSE && (GET_CODE (operands[1]) == MEM || GET_CODE (operands[2]) == MEM)"
+  "movhps\\t{%2, %0|%0, %2}"
+  [(set_attr "type" "sse")])
+
+(define_insn "sse_movlps"
+  [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,m")
+       (vec_merge:V4SF
+        (match_operand:V4SF 1 "nonimmediate_operand" "0,0")
+        (match_operand:V4SF 2 "nonimmediate_operand" "m,x")
+        (const_int 3)))]
+  "TARGET_SSE && (GET_CODE (operands[1]) == MEM || GET_CODE (operands[2]) == MEM)"
+  "movlps\\t{%2, %0|%0, %2}"
+  [(set_attr "type" "sse")])
+
+(define_insn "sse_loadss"
+  [(set (match_operand:V4SF 0 "register_operand" "=x")
+       (vec_merge:V4SF
+        (match_operand:V4SF 1 "memory_operand" "m")
+        (vec_duplicate:V4SF (float:SF (const_int 0)))
+        (const_int 1)))]
+  "TARGET_SSE"
+  "movss\\t{%1, %0|%0, %1}"
+  [(set_attr "type" "sse")])
+
+(define_insn "sse_movss"
+  [(set (match_operand:V4SF 0 "register_operand" "=x")
+       (vec_merge:V4SF
+        (match_operand:V4SF 1 "register_operand" "0")
+        (match_operand:V4SF 2 "register_operand" "x")
+        (const_int 1)))]
+  "TARGET_SSE"
+  "movss\\t{%2, %0|%0, %2}"
+  [(set_attr "type" "sse")])
+
+(define_insn "sse_storess"
+  [(set (match_operand:SF 0 "memory_operand" "=m")
+       (vec_select:SF
+        (match_operand:V4SF 1 "register_operand" "x")
+        (parallel [(const_int 0)])))]
+  "TARGET_SSE"
+  "movss\\t{%1, %0|%0, %1}"
+  [(set_attr "type" "sse")])
+
+(define_insn "sse_shufps"
+  [(set (match_operand:V4SF 0 "register_operand" "=x")
+        (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "0")
+                     (match_operand:V4SF 2 "nonimmediate_operand" "xm")
+                     (match_operand:SI 3 "immediate_operand" "i")] 41))]
+  "TARGET_SSE"
+  ;; @@@ check operand order for intel/nonintel syntax
+  "shufps\\t{%3, %2, %0|%0, %2, %3}"
+  [(set_attr "type" "sse")])
+
+
+;; SSE arithmetic
+
+(define_insn "addv4sf3"
+  [(set (match_operand:V4SF 0 "register_operand" "=x")
+        (plus:V4SF (match_operand:V4SF 1 "register_operand" "0")
+                  (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
+  "TARGET_SSE"
+  "addps\\t{%2, %0|%0, %2}"
+  [(set_attr "type" "sse")])
+
+(define_insn "vmaddv4sf3"
+  [(set (match_operand:V4SF 0 "register_operand" "=x")
+       (vec_merge:V4SF (plus:V4SF (match_operand:V4SF 1 "register_operand" "0")
+                                  (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
+                        (match_dup 1)
+                       (const_int 1)))]
+  "TARGET_SSE"
+  "addss\\t{%2, %0|%0, %2}"
+  [(set_attr "type" "sse")])
+
+(define_insn "subv4sf3"
+  [(set (match_operand:V4SF 0 "register_operand" "=x")
+        (minus:V4SF (match_operand:V4SF 1 "register_operand" "0")
+                  (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
+  "TARGET_SSE"
+  "subps\\t{%2, %0|%0, %2}"
+  [(set_attr "type" "sse")])
+
+(define_insn "vmsubv4sf3"
+  [(set (match_operand:V4SF 0 "register_operand" "=x")
+       (vec_merge:V4SF (minus:V4SF (match_operand:V4SF 1 "register_operand" "0")
+                                  (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
+                        (match_dup 1)
+                       (const_int 1)))]
+  "TARGET_SSE"
+  "subss\\t{%2, %0|%0, %2}"
+  [(set_attr "type" "sse")])
+
+(define_insn "mulv4sf3"
+  [(set (match_operand:V4SF 0 "register_operand" "=x")
+        (mult:V4SF (match_operand:V4SF 1 "register_operand" "0")
+                  (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
+  "TARGET_SSE"
+  "mulps\\t{%2, %0|%0, %2}"
+  [(set_attr "type" "sse")])
+
+(define_insn "vmmulv4sf3"
+  [(set (match_operand:V4SF 0 "register_operand" "=x")
+       (vec_merge:V4SF (mult:V4SF (match_operand:V4SF 1 "register_operand" "0")
+                                  (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
+                        (match_dup 1)
+                       (const_int 1)))]
+  "TARGET_SSE"
+  "mulss\\t{%2, %0|%0, %2}"
+  [(set_attr "type" "sse")])
+
+(define_insn "divv4sf3"
+  [(set (match_operand:V4SF 0 "register_operand" "=x")
+        (div:V4SF (match_operand:V4SF 1 "register_operand" "0")
+                 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
+  "TARGET_SSE"
+  "divps\\t{%2, %0|%0, %2}"
+  [(set_attr "type" "sse")])
+
+(define_insn "vmdivv4sf3"
+  [(set (match_operand:V4SF 0 "register_operand" "=x")
+       (vec_merge:V4SF (div:V4SF (match_operand:V4SF 1 "register_operand" "0")
+                                 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
+                        (match_dup 1)
+                       (const_int 1)))]
+  "TARGET_SSE"
+  "divss\\t{%2, %0|%0, %2}"
+  [(set_attr "type" "sse")])
+
+
+;; SSE square root/reciprocal
+
+(define_insn "rcpv4sf2"
+  [(set (match_operand:V4SF 0 "register_operand" "=x")
+        (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "xm")] 42))]
+  "TARGET_SSE"
+  "rcpps\\t{%1, %0|%0, %1}"
+  [(set_attr "type" "sse")])
+
+(define_insn "vmrcpv4sf2"
+  [(set (match_operand:V4SF 0 "register_operand" "=x")
+       (vec_merge:V4SF (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "xm")] 42)
+                        (match_operand:V4SF 2 "register_operand" "0")
+                       (const_int 1)))]
+  "TARGET_SSE"
+  "rcpss\\t{%1, %0|%0, %1}"
+  [(set_attr "type" "sse")])
+
+(define_insn "rsqrtv4sf2"
+  [(set (match_operand:V4SF 0 "register_operand" "=x")
+        (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "xm")] 43))]
+  "TARGET_SSE"
+  "rsqrtps\\t{%1, %0|%0, %1}"
+  [(set_attr "type" "sse")])
+
+(define_insn "vmrsqrtv4sf2"
+  [(set (match_operand:V4SF 0 "register_operand" "=x")
+       (vec_merge:V4SF (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "xm")] 43)
+                        (match_operand:V4SF 2 "register_operand" "0")
+                       (const_int 1)))]
+  "TARGET_SSE"
+  "rsqrtss\\t{%1, %0|%0, %1}"
+  [(set_attr "type" "sse")])
+
+(define_insn "sqrtv4sf2"
+  [(set (match_operand:V4SF 0 "register_operand" "=x")
+        (sqrt:V4SF (match_operand:V4SF 1 "register_operand" "xm")))]
+  "TARGET_SSE"
+  "sqrtps\\t{%1, %0|%0, %1}"
+  [(set_attr "type" "sse")])
+
+(define_insn "vmsqrtv4sf2"
+  [(set (match_operand:V4SF 0 "register_operand" "=x")
+       (vec_merge:V4SF (sqrt:V4SF (match_operand:V4SF 1 "register_operand" "xm"))
+                        (match_operand:V4SF 2 "register_operand" "0")
+                       (const_int 1)))]
+  "TARGET_SSE"
+  "sqrtss\\t{%1, %0|%0, %1}"
+  [(set_attr "type" "sse")])
+
+
+;; SSE logical operations.
+
+;; These are not called andti3 etc. because we really really don't want
+;; the compiler to widen DImode ands to TImode ands and then try to move
+;; into DImode subregs of SSE registers, and them together, and move out
+;; of DImode subregs again!
+
+(define_insn "sse_andti3"
+  [(set (match_operand:TI 0 "register_operand" "=x")
+        (and:TI (match_operand:TI 1 "register_operand" "0")
+               (match_operand:TI 2 "nonimmediate_operand" "xm")))]
+  "TARGET_SSE"
+  "andps\\t{%2, %0|%0, %2}"
+  [(set_attr "type" "sse")])
+
+(define_insn "sse_nandti3"
+  [(set (match_operand:TI 0 "register_operand" "=x")
+        (and:TI (not:TI (match_operand:TI 1 "register_operand" "0"))
+               (match_operand:TI 2 "nonimmediate_operand" "xm")))]
+  "TARGET_SSE"
+  "andnps\\t{%2, %0|%0, %2}"
+  [(set_attr "type" "sse")])
+
+(define_insn "sse_iorti3"
+  [(set (match_operand:TI 0 "register_operand" "=x")
+        (ior:TI (match_operand:TI 1 "register_operand" "0")
+               (match_operand:TI 2 "nonimmediate_operand" "xm")))]
+  "TARGET_SSE"
+  "iorps\\t{%2, %0|%0, %2}"
+  [(set_attr "type" "sse")])
+
+(define_insn "sse_xorti3"
+  [(set (match_operand:TI 0 "register_operand" "=x")
+        (xor:TI (match_operand:TI 1 "register_operand" "0")
+               (match_operand:TI 2 "nonimmediate_operand" "xm")))]
+  "TARGET_SSE"
+  "xorps\\t{%2, %0|%0, %2}"
+  [(set_attr "type" "sse")])
+
+;; Use xor, but don't show input operands so they aren't live before
+;; this insn.
+(define_insn "sse_clrti"
+  [(set (match_operand:TI 0 "register_operand" "=x")
+        (unspec:TI [(const_int 0)] 45))]
+  "TARGET_SSE"
+  "xorps\\t{%0, %0|%0, %0}"
+  [(set_attr "type" "sse")])
+
+
+;; SSE mask-generating compares
+
+(define_insn "maskcmpv4sf3"
+  [(set (match_operand:V4SI 0 "register_operand" "=x")
+        (match_operator:V4SI 3 "sse_comparison_operator"
+                            [(match_operand:V4SF 1 "register_operand" "0")
+                             (match_operand:V4SF 2 "nonimmediate_operand" "x")]))]
+  "TARGET_SSE"
+  "*
+{
+  switch (GET_CODE (operands[3]))
+    {
+    case EQ:
+      return \"cmpeqps\\t{%2, %0|%0, %2}\";
+    case LT:
+      return \"cmpltps\\t{%2, %0|%0, %2}\";
+    case LE:
+      return \"cmpleps\\t{%2, %0|%0, %2}\";
+    case UNORDERED:
+      return \"cmpunordps\\t{%2, %0|%0, %2}\";
+    default:
+      abort ();
+    }
+}"
+  [(set_attr "type" "sse")])
+
+(define_insn "maskncmpv4sf3"
+  [(set (match_operand:V4SI 0 "register_operand" "=x")
+        (not:V4SI
+        (match_operator:V4SI 3 "sse_comparison_operator"
+                             [(match_operand:V4SF 1 "register_operand" "0")
+                              (match_operand:V4SF 2 "nonimmediate_operand" "x")])))]
+  "TARGET_SSE"
+  "*
+{
+  switch (GET_CODE (operands[3]))
+    {
+    case EQ:
+      return \"cmpneqps\\t{%2, %0|%0, %2}\";
+    case LT:
+      return \"cmpnltps\\t{%2, %0|%0, %2}\";
+    case LE:
+      return \"cmpnleps\\t{%2, %0|%0, %2}\";
+    case UNORDERED:
+      return \"cmpordps\\t{%2, %0|%0, %2}\";
+    default:
+      abort ();
+    }
+}"
+  [(set_attr "type" "sse")])
+
+(define_insn "vmmaskcmpv4sf3"
+  [(set (match_operand:V4SI 0 "register_operand" "=x")
+       (vec_merge:V4SI
+        (match_operator:V4SI 3 "sse_comparison_operator"
+                             [(match_operand:V4SF 1 "register_operand" "0")
+                              (match_operand:V4SF 2 "nonimmediate_operand" "x")])
+        (match_dup 1)
+        (const_int 1)))]
+  "TARGET_SSE"
+  "*
+{
+  switch (GET_CODE (operands[3]))
+    {
+    case EQ:
+      return \"cmpeqss\\t{%2, %0|%0, %2}\";
+    case LT:
+      return \"cmpltss\\t{%2, %0|%0, %2}\";
+    case LE:
+      return \"cmpless\\t{%2, %0|%0, %2}\";
+    case UNORDERED:
+      return \"cmpunordss\\t{%2, %0|%0, %2}\";
+    default:
+      abort ();
+    }
+}"
+  [(set_attr "type" "sse")])
+
+(define_insn "vmmaskncmpv4sf3"
+  [(set (match_operand:V4SI 0 "register_operand" "=x")
+       (vec_merge:V4SI
+        (not:V4SI
+         (match_operator:V4SI 3 "sse_comparison_operator"
+                              [(match_operand:V4SF 1 "register_operand" "0")
+                               (match_operand:V4SF 2 "nonimmediate_operand" "x")]))
+        (subreg:V4SI (match_dup 1) 0)
+        (const_int 1)))]
+  "TARGET_SSE"
+  "*
+{
+  switch (GET_CODE (operands[3]))
+    {
+    case EQ:
+      return \"cmpneqss\\t{%2, %0|%0, %2}\";
+    case LT:
+      return \"cmpnltss\\t{%2, %0|%0, %2}\";
+    case LE:
+      return \"cmpnless\\t{%2, %0|%0, %2}\";
+    case UNORDERED:
+      return \"cmpordss\\t{%2, %0|%0, %2}\";
+    default:
+      abort ();
+    }
+}"
+  [(set_attr "type" "sse")])
+
+(define_insn "sse_comi"
+  [(set (reg:CCFP 17)
+        (match_operator:CCFP 2 "sse_comparison_operator"
+                       [(vec_select:SF
+                         (match_operand:V4SF 0 "register_operand" "x")
+                         (parallel [(const_int 0)]))
+                        (vec_select:SF
+                         (match_operand:V4SF 1 "register_operand" "x")
+                         (parallel [(const_int 0)]))]))]
+  "TARGET_SSE"
+  "comiss\\t{%2, %0|%0, %2}"
+  [(set_attr "type" "sse")])
+
+(define_insn "sse_ucomi"
+  [(set (reg:CCFPU 17)
+       (match_operator:CCFPU 2 "sse_comparison_operator"
+                       [(vec_select:SF
+                         (match_operand:V4SF 0 "register_operand" "x")
+                         (parallel [(const_int 0)]))
+                        (vec_select:SF
+                         (match_operand:V4SF 1 "register_operand" "x")
+                         (parallel [(const_int 0)]))]))]
+  "TARGET_SSE"
+  "ucomiss\\t{%2, %0|%0, %2}"
+  [(set_attr "type" "sse")])
+
+
+;; SSE unpack
+
+(define_insn "sse_unpckhps"
+  [(set (match_operand:V4SF 0 "register_operand" "=x")
+       (vec_merge:V4SF
+        (vec_select:V4SF (match_operand:V4SF 1 "register_operand" "0")
+                         (parallel [(const_int 2)
+                                    (const_int 0)
+                                    (const_int 3)
+                                    (const_int 1)]))
+        (vec_select:V8QI (match_operand:V8QI 2 "register_operand" "x")
+                         (parallel [(const_int 0)
+                                    (const_int 2)
+                                    (const_int 1)
+                                    (const_int 3)]))
+        (const_int 5)))]
+  "TARGET_SSE"
+  "unpckhps\\t{%2, %0|%0, %2}"
+  [(set_attr "type" "sse")])
+
+(define_insn "sse_unpcklps"
+  [(set (match_operand:V4SF 0 "register_operand" "=x")
+       (vec_merge:V4SF
+        (vec_select:V4SF (match_operand:V4SF 1 "register_operand" "0")
+                         (parallel [(const_int 0)
+                                    (const_int 2)
+                                    (const_int 1)
+                                    (const_int 3)]))
+        (vec_select:V8QI (match_operand:V8QI 2 "register_operand" "x")
+                         (parallel [(const_int 2)
+                                    (const_int 0)
+                                    (const_int 3)
+                                    (const_int 1)]))
+        (const_int 5)))]
+  "TARGET_SSE"
+  "unpcklps\\t{%2, %0|%0, %2}"
+  [(set_attr "type" "sse")])
+
+
+;; SSE min/max
+
+(define_insn "smaxv4sf3"
+  [(set (match_operand:V4SF 0 "register_operand" "=x")
+        (smax:V4SF (match_operand:V4SF 1 "register_operand" "0")
+                  (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
+  "TARGET_SSE"
+  "maxps\\t{%2, %0|%0, %2}"
+  [(set_attr "type" "sse")])
+
+(define_insn "vmsmaxv4sf3"
+  [(set (match_operand:V4SF 0 "register_operand" "=x")
+       (vec_merge:V4SF (smax:V4SF (match_operand:V4SF 1 "register_operand" "0")
+                                  (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
+                        (match_dup 1)
+                       (const_int 1)))]
+  "TARGET_SSE"
+  "maxss\\t{%2, %0|%0, %2}"
+  [(set_attr "type" "sse")])
+
+(define_insn "sminv4sf3"
+  [(set (match_operand:V4SF 0 "register_operand" "=x")
+        (smin:V4SF (match_operand:V4SF 1 "register_operand" "0")
+                  (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
+  "TARGET_SSE"
+  "minps\\t{%2, %0|%0, %2}"
+  [(set_attr "type" "sse")])
+
+(define_insn "vmsminv4sf3"
+  [(set (match_operand:V4SF 0 "register_operand" "=x")
+       (vec_merge:V4SF (smin:V4SF (match_operand:V4SF 1 "register_operand" "0")
+                                  (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
+                        (match_dup 1)
+                       (const_int 1)))]
+  "TARGET_SSE"
+  "minss\\t{%2, %0|%0, %2}"
+  [(set_attr "type" "sse")])
+
+
+;; SSE <-> integer/MMX conversions
+
+(define_insn "cvtpi2ps"
+  [(set (match_operand:V4SF 0 "register_operand" "=x")
+       (vec_merge:V4SF (match_operand:V4SF 1 "register_operand" "0")
+                       (vec_duplicate:V4SF
+                        (float:V2SF (match_operand:V2SI 2 "register_operand" "ym")))
+                       (const_int 12)))]
+  "TARGET_SSE"
+  "cvtpi2ps\\t{%2, %0|%0, %2}"
+  [(set_attr "type" "sse")])
+
+(define_insn "cvtps2pi"
+  [(set (match_operand:V2SI 0 "register_operand" "=y")
+       (vec_select:V2SI (fix:V4SI (match_operand:V4SF 1 "register_operand" "xm"))
+                        (parallel
+                         [(const_int 0)
+                          (const_int 1)])))]
+  "TARGET_SSE"
+  "cvtps2pi\\t{%1, %0|%0, %1}"
+  [(set_attr "type" "sse")])
+
+(define_insn "cvttps2pi"
+  [(set (match_operand:V2SI 0 "register_operand" "=y")
+       (vec_select:V2SI (unspec:V4SI [(match_operand:V4SF 1 "register_operand" "xm")] 30)
+                        (parallel
+                         [(const_int 0)
+                          (const_int 1)])))]
+  "TARGET_SSE"
+  "cvttps2pi\\t{%1, %0|%0, %1}"
+  [(set_attr "type" "sse")])
+
+(define_insn "cvtsi2ss"
+  [(set (match_operand:V4SF 0 "register_operand" "=x")
+       (vec_merge:V4SF (match_operand:V4SF 1 "register_operand" "0")
+                       (vec_duplicate:V4SF
+                        (float:SF (match_operand:SI 2 "register_operand" "rm")))
+                       (const_int 15)))]
+  "TARGET_SSE"
+  "cvtsi2ss\\t{%2, %0|%0, %2}"
+  [(set_attr "type" "sse")])
+
+(define_insn "cvtss2si"
+  [(set (match_operand:SI 0 "register_operand" "=y")
+       (vec_select:SI (fix:V4SI (match_operand:V4SF 1 "register_operand" "xm"))
+                      (parallel [(const_int 0)])))]
+  "TARGET_SSE"
+  "cvtss2si\\t{%1, %0|%0, %1}"
+  [(set_attr "type" "sse")])
+
+(define_insn "cvttss2si"
+  [(set (match_operand:SI 0 "register_operand" "=y")
+       (vec_select:SI (unspec:V4SI [(match_operand:V4SF 1 "register_operand" "xm")] 30)
+                      (parallel [(const_int 0)])))]
+  "TARGET_SSE"
+  "cvttss2si\\t{%1, %0|%0, %1}"
+  [(set_attr "type" "sse")])
+
+
+;; MMX insns
+
+;; MMX arithmetic
+
+(define_insn "addv8qi3"
+  [(set (match_operand:V8QI 0 "register_operand" "=y")
+        (plus:V8QI (match_operand:V8QI 1 "register_operand" "0")
+                  (match_operand:V8QI 2 "nonimmediate_operand" "ym")))]
+  "TARGET_MMX"
+  "paddb\\t{%2, %0|%0, %2}"
+  [(set_attr "type" "mmx")])
+
+(define_insn "addv4hi3"
+  [(set (match_operand:V4HI 0 "register_operand" "=y")
+        (plus:V4HI (match_operand:V4HI 1 "register_operand" "0")
+                  (match_operand:V4HI 2 "nonimmediate_operand" "ym")))]
+  "TARGET_MMX"
+  "paddw\\t{%2, %0|%0, %2}"
+  [(set_attr "type" "mmx")])
+
+(define_insn "addv2si3"
+  [(set (match_operand:V2SI 0 "register_operand" "=y")
+        (plus:V2SI (match_operand:V2SI 1 "register_operand" "0")
+                  (match_operand:V2SI 2 "nonimmediate_operand" "ym")))]
+  "TARGET_MMX"
+  "paddd\\t{%2, %0|%0, %2}"
+  [(set_attr "type" "mmx")])
+
+(define_insn "ssaddv8qi3"
+  [(set (match_operand:V8QI 0 "register_operand" "=y")
+        (ss_plus:V8QI (match_operand:V8QI 1 "register_operand" "0")
+                     (match_operand:V8QI 2 "nonimmediate_operand" "ym")))]
+  "TARGET_MMX"
+  "paddsb\\t{%2, %0|%0, %2}"
+  [(set_attr "type" "mmx")])
+
+(define_insn "ssaddv4hi3"
+  [(set (match_operand:V4HI 0 "register_operand" "=y")
+        (ss_plus:V4HI (match_operand:V4HI 1 "register_operand" "0")
+                     (match_operand:V4HI 2 "nonimmediate_operand" "ym")))]
+  "TARGET_MMX"
+  "paddsw\\t{%2, %0|%0, %2}"
+  [(set_attr "type" "mmx")])
+
+(define_insn "usaddv8qi3"
+  [(set (match_operand:V8QI 0 "register_operand" "=y")
+        (us_plus:V8QI (match_operand:V8QI 1 "register_operand" "0")
+                     (match_operand:V8QI 2 "nonimmediate_operand" "ym")))]
+  "TARGET_MMX"
+  "paddusb\\t{%2, %0|%0, %2}"
+  [(set_attr "type" "mmx")])
+
+(define_insn "usaddv4hi3"
+  [(set (match_operand:V4HI 0 "register_operand" "=y")
+        (us_plus:V4HI (match_operand:V4HI 1 "register_operand" "0")
+                     (match_operand:V4HI 2 "nonimmediate_operand" "ym")))]
+  "TARGET_MMX"
+  "paddusw\\t{%2, %0|%0, %2}"
+  [(set_attr "type" "mmx")])
+
+(define_insn "subv8qi3"
+  [(set (match_operand:V8QI 0 "register_operand" "=y")
+        (minus:V8QI (match_operand:V8QI 1 "register_operand" "0")
+                   (match_operand:V8QI 2 "nonimmediate_operand" "ym")))]
+  "TARGET_MMX"
+  "psubb\\t{%2, %0|%0, %2}"
+  [(set_attr "type" "mmx")])
+
+(define_insn "subv4hi3"
+  [(set (match_operand:V4HI 0 "register_operand" "=y")
+        (minus:V4HI (match_operand:V4HI 1 "register_operand" "0")
+                   (match_operand:V4HI 2 "nonimmediate_operand" "ym")))]
+  "TARGET_MMX"
+  "psubw\\t{%2, %0|%0, %2}"
+  [(set_attr "type" "mmx")])
+
+(define_insn "subv2si3"
+  [(set (match_operand:V2SI 0 "register_operand" "=y")
+        (minus:V2SI (match_operand:V2SI 1 "register_operand" "0")
+                   (match_operand:V2SI 2 "nonimmediate_operand" "ym")))]
+  "TARGET_MMX"
+  "psubd\\t{%2, %0|%0, %2}"
+  [(set_attr "type" "mmx")])
+
+(define_insn "sssubv8qi3"
+  [(set (match_operand:V8QI 0 "register_operand" "=y")
+        (ss_minus:V8QI (match_operand:V8QI 1 "register_operand" "0")
+                      (match_operand:V8QI 2 "nonimmediate_operand" "ym")))]
+  "TARGET_MMX"
+  "psubsb\\t{%2, %0|%0, %2}"
+  [(set_attr "type" "mmx")])
+
+(define_insn "sssubv4hi3"
+  [(set (match_operand:V4HI 0 "register_operand" "=y")
+        (ss_minus:V4HI (match_operand:V4HI 1 "register_operand" "0")
+                      (match_operand:V4HI 2 "nonimmediate_operand" "ym")))]
+  "TARGET_MMX"
+  "psubsw\\t{%2, %0|%0, %2}"
+  [(set_attr "type" "mmx")])
+
+(define_insn "ussubv8qi3"
+  [(set (match_operand:V8QI 0 "register_operand" "=y")
+        (us_minus:V8QI (match_operand:V8QI 1 "register_operand" "0")
+                      (match_operand:V8QI 2 "nonimmediate_operand" "ym")))]
+  "TARGET_MMX"
+  "psubusb\\t{%2, %0|%0, %2}"
+  [(set_attr "type" "mmx")])
+
+(define_insn "ussubv4hi3"
+  [(set (match_operand:V4HI 0 "register_operand" "=y")
+        (us_minus:V4HI (match_operand:V4HI 1 "register_operand" "0")
+                      (match_operand:V4HI 2 "nonimmediate_operand" "ym")))]
+  "TARGET_MMX"
+  "psubusw\\t{%2, %0|%0, %2}"
+  [(set_attr "type" "mmx")])
+
+(define_insn "mulv4hi3"
+  [(set (match_operand:V4HI 0 "register_operand" "=y")
+        (mult:V4HI (match_operand:V4HI 1 "register_operand" "0")
+                  (match_operand:V4HI 2 "nonimmediate_operand" "ym")))]
+  "TARGET_MMX"
+  "pmullw\\t{%2, %0|%0, %2}"
+  [(set_attr "type" "mmx")])
+
+(define_insn "smulv4hi3_highpart"
+  [(set (match_operand:V4HI 0 "register_operand" "=y")
+       (truncate:V4HI
+        (lshiftrt:V4SI
+         (mult:V4SI (sign_extend:V4SI (match_operand:V4HI 1 "register_operand" "0"))
+                    (sign_extend:V4SI (match_operand:V4HI 2 "nonimmediate_operand" "ym")))
+         (const_int 16))))]
+  "TARGET_MMX"
+  "pmulhw\\t{%2, %0|%0, %2}"
+  [(set_attr "type" "mmx")])
+
+(define_insn "umulv4hi3_highpart"
+  [(set (match_operand:V4HI 0 "register_operand" "=y")
+       (truncate:V4HI
+        (lshiftrt:V4SI
+         (mult:V4SI (zero_extend:V4SI (match_operand:V4HI 1 "register_operand" "0"))
+                    (zero_extend:V4SI (match_operand:V4HI 2 "nonimmediate_operand" "ym")))
+         (const_int 16))))]
+  "TARGET_MMX"
+  "pmulhuw\\t{%2, %0|%0, %2}"
+  [(set_attr "type" "mmx")])
+
+(define_insn "mmx_pmaddwd"
+  [(set (match_operand:V2SI 0 "register_operand" "=y")
+        (plus:V2SI
+        (mult:V2SI
+         (sign_extend:V2SI (vec_select:V2HI (match_operand:V4HI 1 "register_operand" "0")
+                                            (parallel [(const_int 0)
+                                                       (const_int 2)])))
+         (sign_extend:V2SI (vec_select:V2HI (match_operand:V4HI 2 "nonimmediate_operand" "ym")
+                                            (parallel [(const_int 0)
+                                                       (const_int 2)]))))
+        (mult:V2SI
+         (sign_extend:V2SI (vec_select:V2HI (match_dup 1)
+                                            (parallel [(const_int 1)
+                                                       (const_int 3)])))
+         (sign_extend:V2SI (vec_select:V2HI (match_dup 2)
+                                            (parallel [(const_int 1)
+                                                       (const_int 3)]))))))]
+  "TARGET_MMX"
+  "pmaddwd\\t{%2, %0|%0, %2}"
+  [(set_attr "type" "mmx")])
+
+
+;; MMX logical operations
+;; Note we don't want to declare these as regular iordi3 insns to prevent
+;; normal code that also wants to use the FPU from getting broken.
+;; The UNSPECs are there to prevent the combiner from getting overly clever.
+(define_insn "mmx_iordi3"
+  [(set (match_operand:DI 0 "register_operand" "=y")
+        (unspec:DI
+        [(ior:DI (match_operand:DI 1 "register_operand" "0")
+                 (match_operand:DI 2 "nonimmediate_operand" "ym"))] 45))]
+  "TARGET_MMX"
+  "por\\t{%2, %0|%0, %2}"
+  [(set_attr "type" "mmx")])
+
+(define_insn "mmx_xordi3"
+  [(set (match_operand:DI 0 "register_operand" "=y")
+        (unspec:DI
+        [(xor:DI (match_operand:DI 1 "register_operand" "0")
+                 (match_operand:DI 2 "nonimmediate_operand" "ym"))] 45))]
+  "TARGET_MMX"
+  "pxor\\t{%2, %0|%0, %2}"
+  [(set_attr "type" "mmx")])
+
+;; Same as pxor, but don't show input operands so that we don't think
+;; they are live.
+(define_insn "mmx_clrdi"
+  [(set (match_operand:DI 0 "register_operand" "=y")
+        (unspec:DI [(const_int 0)] 45))]
+  "TARGET_MMX"
+  "pxor\\t{%0, %0|%0, %0}"
+  [(set_attr "type" "mmx")])
+
+(define_insn "mmx_anddi3"
+  [(set (match_operand:DI 0 "register_operand" "=y")
+        (unspec:DI
+        [(and:DI (match_operand:DI 1 "register_operand" "0")
+                 (match_operand:DI 2 "nonimmediate_operand" "ym"))] 45))]
+  "TARGET_MMX"
+  "pand\\t{%2, %0|%0, %2}"
+  [(set_attr "type" "mmx")])
+
+(define_insn "mmx_nanddi3"
+  [(set (match_operand:DI 0 "register_operand" "=y")
+        (unspec:DI
+        [(and:DI (not:DI (match_operand:DI 1 "register_operand" "0"))
+                         (match_operand:DI 2 "nonimmediate_operand" "ym"))] 45))]
+  "TARGET_MMX"
+  "pandn\\t{%2, %0|%0, %2}"
+  [(set_attr "type" "mmx")])
+
+
+;; MMX unsigned averages/sum of absolute differences
+
+(define_insn "mmx_uavgv8qi3"
+  [(set (match_operand:V8QI 0 "register_operand" "=y")
+        (ashiftrt:V8QI
+        (plus:V8QI (plus:V8QI
+                    (match_operand:V8QI 1 "register_operand" "0")
+                    (match_operand:V8QI 2 "nonimmediate_operand" "ym"))
+                   (vec_const:V8QI (parallel [(const_int 1)
+                                              (const_int 1)
+                                              (const_int 1)
+                                              (const_int 1)
+                                              (const_int 1)
+                                              (const_int 1)
+                                              (const_int 1)
+                                              (const_int 1)])))
+        (const_int 1)))]
+  "TARGET_SSE"
+  "pavgbn\\t{%2, %0|%0, %2}"
+  [(set_attr "type" "sse")])
+
+(define_insn "mmx_uavgv4hi3"
+  [(set (match_operand:V4HI 0 "register_operand" "=y")
+        (ashiftrt:V4HI
+        (plus:V4HI (plus:V4HI
+                    (match_operand:V4HI 1 "register_operand" "0")
+                    (match_operand:V4HI 2 "nonimmediate_operand" "ym"))
+                   (vec_const:V4HI (parallel [(const_int 1)
+                                              (const_int 1)
+                                              (const_int 1)
+                                              (const_int 1)])))
+        (const_int 1)))]
+  "TARGET_SSE"
+  "pavgwn\\t{%2, %0|%0, %2}"
+  [(set_attr "type" "sse")])
+
+(define_insn "mmx_psadbw"
+  [(set (match_operand:V8QI 0 "register_operand" "=y")
+        (abs:V8QI (minus:V8QI (match_operand:DI 1 "register_operand" "0")
+                             (match_operand:DI 2 "nonimmediate_operand" "ym"))))]
+  "TARGET_SSE"
+  "padbw\\t{%2, %0|%0, %2}"
+  [(set_attr "type" "sse")])
+
+
+;; MMX insert/extract/shuffle
+
+(define_insn "mmx_pinsrw"
+  [(set (match_operand:V4HI 0 "register_operand" "=y")
+        (vec_merge:V4HI (match_operand:V4HI 1 "register_operand" "0")
+                       (vec_duplicate:V4HI
+                        (truncate:HI (match_operand:SI 2 "nonimmediate_operand" "rm")))
+                       (match_operand:SI 3 "immediate_operand" "i")))]
+  "TARGET_SSE"
+  ;; @@@ check operand order for intel/nonintel syntax.
+  "pinsrw\\t%3, {%2, %0|%0, %2}"
+  [(set_attr "type" "sse")])
+
+(define_insn "mmx_pextrw"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+        (zero_extend:SI (vec_select:HI (match_operand:V4HI 1 "register_operand" "y")
+                                      (parallel
+                                       [(match_operand:SI 2 "immediate_operand" "i")]))))]
+  "TARGET_SSE"
+  ;; @@@ check operand order for intel/nonintel syntax.
+  "pextrw\\t%2, {%1, %0|%0, %1}"
+  [(set_attr "type" "sse")])
+
+(define_insn "mmx_pshufw"
+  [(set (match_operand:V4HI 0 "register_operand" "=y")
+        (unspec:V4HI [(match_operand:V4HI 1 "register_operand" "0")
+                     (match_operand:V4HI 2 "nonimmediate_operand" "ym")
+                     (match_operand:SI 3 "immediate_operand" "i")] 41))]
+  "TARGET_SSE"
+  ;; @@@ check operand order for intel/nonintel syntax
+  "pshufw\\t %3,{%2, %0|%0, %2}"
+  [(set_attr "type" "sse")])
+
+
+;; MMX mask-generating comparisons
+
+(define_insn "eqv8qi3"
+  [(set (match_operand:V8QI 0 "register_operand" "=y")
+        (eq:V8QI (match_operand:V8QI 1 "register_operand" "0")
+                (match_operand:V8QI 2 "nonimmediate_operand" "ym")))]
+  "TARGET_MMX"
+  "pcmpeqb\\t{%2, %0|%0, %2}"
+  [(set_attr "type" "mmx")])
+
+(define_insn "eqv4hi3"
+  [(set (match_operand:V4HI 0 "register_operand" "=y")
+        (eq:V4HI (match_operand:V4HI 1 "register_operand" "0")
+                (match_operand:V4HI 2 "nonimmediate_operand" "ym")))]
+  "TARGET_MMX"
+  "pcmpeqw\\t{%2, %0|%0, %2}"
+  [(set_attr "type" "mmx")])
+
+(define_insn "eqv2si3"
+  [(set (match_operand:V2SI 0 "register_operand" "=y")
+        (eq:V2SI (match_operand:V2SI 1 "register_operand" "0")
+                (match_operand:V2SI 2 "nonimmediate_operand" "ym")))]
+  "TARGET_MMX"
+  "pcmpeqd\\t{%2, %0|%0, %2}"
+  [(set_attr "type" "mmx")])
+
+(define_insn "gtv8qi3"
+  [(set (match_operand:V8QI 0 "register_operand" "=y")
+        (gt:V8QI (match_operand:V8QI 1 "register_operand" "0")
+                (match_operand:V8QI 2 "nonimmediate_operand" "ym")))]
+  "TARGET_MMX"
+  "pcmpgtb\\t{%2, %0|%0, %2}"
+  [(set_attr "type" "mmx")])
+
+(define_insn "gtv4hi3"
+  [(set (match_operand:V4HI 0 "register_operand" "=y")
+        (gt:V4HI (match_operand:V4HI 1 "register_operand" "0")
+                (match_operand:V4HI 2 "nonimmediate_operand" "ym")))]
+  "TARGET_MMX"
+  "pcmpgtw\\t{%2, %0|%0, %2}"
+  [(set_attr "type" "mmx")])
+
+(define_insn "gtv2si3"
+  [(set (match_operand:V2SI 0 "register_operand" "=y")
+        (gt:V2SI (match_operand:V2SI 1 "register_operand" "0")
+                (match_operand:V2SI 2 "nonimmediate_operand" "ym")))]
+  "TARGET_MMX"
+  "pcmpgtd\\t{%2, %0|%0, %2}"
+  [(set_attr "type" "mmx")])
+
+
+;; MMX max/min insns
+
+(define_insn "umaxv8qi3"
+  [(set (match_operand:V8QI 0 "register_operand" "=y")
+        (umax:V8QI (match_operand:V8QI 1 "register_operand" "0")
+                  (match_operand:V8QI 2 "nonimmediate_operand" "ym")))]
+  "TARGET_SSE"
+  "pmaxub\\t{%2, %0|%0, %2}"
+  [(set_attr "type" "sse")])
+
+(define_insn "smaxv4hi3"
+  [(set (match_operand:V4HI 0 "register_operand" "=y")
+        (smax:V4HI (match_operand:V4HI 1 "register_operand" "0")
+                  (match_operand:V4HI 2 "nonimmediate_operand" "ym")))]
+  "TARGET_SSE"
+  "pmaxsw\\t{%2, %0|%0, %2}"
+  [(set_attr "type" "sse")])
+
+(define_insn "uminv8qi3"
+  [(set (match_operand:V8QI 0 "register_operand" "=y")
+        (umin:V8QI (match_operand:V8QI 1 "register_operand" "0")
+                  (match_operand:V8QI 2 "nonimmediate_operand" "ym")))]
+  "TARGET_SSE"
+  "pminub\\t{%2, %0|%0, %2}"
+  [(set_attr "type" "sse")])
+
+(define_insn "sminv4hi3"
+  [(set (match_operand:V4HI 0 "register_operand" "=y")
+        (smin:V4HI (match_operand:V4HI 1 "register_operand" "0")
+                  (match_operand:V4HI 2 "nonimmediate_operand" "ym")))]
+  "TARGET_SSE"
+  "pminsw\\t{%2, %0|%0, %2}"
+  [(set_attr "type" "sse")])
+
+
+;; MMX shifts
+
+(define_insn "ashrv4hi3"
+  [(set (match_operand:V4HI 0 "register_operand" "=y")
+        (ashiftrt:V4HI (match_operand:V4HI 1 "register_operand" "0")
+                      (match_operand:DI 2 "nonmemory_operand" "yi")))]
+  "TARGET_MMX"
+  "psraw\\t{%2, %0|%0, %2}"
+  [(set_attr "type" "mmx")])
+
+(define_insn "ashrv2si3"
+  [(set (match_operand:V2SI 0 "register_operand" "=y")
+        (ashiftrt:V2SI (match_operand:V2SI 1 "register_operand" "0")
+                      (match_operand:DI 2 "nonmemory_operand" "yi")))]
+  "TARGET_MMX"
+  "psrad\\t{%2, %0|%0, %2}"
+  [(set_attr "type" "mmx")])
+
+(define_insn "lshrv4hi3"
+  [(set (match_operand:V4HI 0 "register_operand" "=y")
+        (lshiftrt:V4HI (match_operand:V4HI 1 "register_operand" "0")
+                      (match_operand:DI 2 "nonmemory_operand" "yi")))]
+  "TARGET_MMX"
+  "psrlw\\t{%2, %0|%0, %2}"
+  [(set_attr "type" "mmx")])
+
+(define_insn "lshrv2si3"
+  [(set (match_operand:V2SI 0 "register_operand" "=y")
+        (lshiftrt:V2SI (match_operand:V2SI 1 "register_operand" "0")
+                      (match_operand:DI 2 "nonmemory_operand" "yi")))]
+  "TARGET_MMX"
+  "psrld\\t{%2, %0|%0, %2}"
+  [(set_attr "type" "mmx")])
+
+;; See logical MMX insns.
+(define_insn "mmx_lshrdi3"
+  [(set (match_operand:DI 0 "register_operand" "=y")
+        (lshiftrt:DI (match_operand:DI 1 "register_operand" "0")
+                    (match_operand:DI 2 "nonmemory_operand" "yi")))]
+  "TARGET_MMX"
+  "psrlq\\t{%2, %0|%0, %2}"
+  [(set_attr "type" "mmx")])
+
+(define_insn "ashlv4hi3"
+  [(set (match_operand:V4HI 0 "register_operand" "=y")
+        (ashift:V4HI (match_operand:V4HI 1 "register_operand" "0")
+                      (match_operand:DI 2 "nonmemory_operand" "yi")))]
+  "TARGET_MMX"
+  "psllw\\t{%2, %0|%0, %2}"
+  [(set_attr "type" "mmx")])
+
+(define_insn "ashlv2si3"
+  [(set (match_operand:V2SI 0 "register_operand" "=y")
+        (ashift:V2SI (match_operand:V2SI 1 "register_operand" "0")
+                      (match_operand:DI 2 "nonmemory_operand" "yi")))]
+  "TARGET_MMX"
+  "pslld\\t{%2, %0|%0, %2}"
+  [(set_attr "type" "mmx")])
+
+;; See logical MMX insns.
+(define_insn "mmx_ashldi3"
+  [(set (match_operand:DI 0 "register_operand" "=y")
+        (ashift:DI (match_operand:DI 1 "register_operand" "0")
+                  (match_operand:DI 2 "nonmemory_operand" "yi")))]
+  "TARGET_MMX"
+  "psllq\\t{%2, %0|%0, %2}"
+  [(set_attr "type" "mmx")])
+
+
+;; MMX pack/unpack insns.
+
+(define_insn "mmx_packsswb"
+  [(set (match_operand:V8QI 0 "register_operand" "=y")
+       (vec_concat:V8QI
+        (ss_truncate:V4QI (match_operand:V4HI 1 "register_operand" "0"))
+        (ss_truncate:V4QI (match_operand:V4HI 2 "register_operand" "y"))))]
+  "TARGET_MMX"
+  "packsswb\\t{%2, %0|%0, %2}"
+  [(set_attr "type" "mmx")])
+
+(define_insn "mmx_packssdw"
+  [(set (match_operand:V4HI 0 "register_operand" "=y")
+       (vec_concat:V4HI
+        (ss_truncate:V2HI (match_operand:V2SI 1 "register_operand" "0"))
+        (ss_truncate:V2HI (match_operand:V2SI 2 "register_operand" "y"))))]
+  "TARGET_MMX"
+  "packssdw\\t{%2, %0|%0, %2}"
+  [(set_attr "type" "mmx")])
+
+(define_insn "mmx_packuswb"
+  [(set (match_operand:V8QI 0 "register_operand" "=y")
+       (vec_concat:V8QI
+        (us_truncate:V4QI (match_operand:V4HI 1 "register_operand" "0"))
+        (us_truncate:V4QI (match_operand:V4HI 2 "register_operand" "y"))))]
+  "TARGET_MMX"
+  "packuswb\\t{%2, %0|%0, %2}"
+  [(set_attr "type" "mmx")])
+
+(define_insn "mmx_punpckhbw"
+  [(set (match_operand:V8QI 0 "register_operand" "=y")
+       (vec_merge:V8QI
+        (vec_select:V8QI (match_operand:V8QI 1 "register_operand" "0")
+                         (parallel [(const_int 4)
+                                    (const_int 0)
+                                    (const_int 5)
+                                    (const_int 1)
+                                    (const_int 6)
+                                    (const_int 2)
+                                    (const_int 7)
+                                    (const_int 3)]))
+        (vec_select:V8QI (match_operand:V8QI 2 "register_operand" "y")
+                         (parallel [(const_int 0)
+                                    (const_int 4)
+                                    (const_int 1)
+                                    (const_int 5)
+                                    (const_int 2)
+                                    (const_int 6)
+                                    (const_int 3)
+                                    (const_int 7)]))
+        (const_int 85)))]
+  "TARGET_MMX"
+  "punpckhbw\\t{%2, %0|%0, %2}"
+  [(set_attr "type" "mmx")])
+
+(define_insn "mmx_punpckhwd"
+  [(set (match_operand:V4HI 0 "register_operand" "=y")
+       (vec_merge:V4HI
+        (vec_select:V4HI (match_operand:V4HI 1 "register_operand" "0")
+                         (parallel [(const_int 0)
+                                    (const_int 2)
+                                    (const_int 1)
+                                    (const_int 3)]))
+        (vec_select:V4HI (match_operand:V4HI 2 "register_operand" "y")
+                         (parallel [(const_int 2)
+                                    (const_int 0)
+                                    (const_int 3)
+                                    (const_int 1)]))
+        (const_int 5)))]
+  "TARGET_MMX"
+  "punpckhbw\\t{%2, %0|%0, %2}"
+  [(set_attr "type" "mmx")])
+
+(define_insn "mmx_punpckhdq"
+  [(set (match_operand:V2SI 0 "register_operand" "=y")
+       (vec_merge:V2SI
+        (vec_select:V2SI (match_operand:V2SI 1 "register_operand" "0")
+                         (parallel [(const_int 0)
+                                    (const_int 1)]))
+        (vec_select:V2SI (match_operand:V2SI 2 "register_operand" "y")
+                         (parallel [(const_int 1)
+                                    (const_int 0)]))
+        (const_int 1)))]
+  "TARGET_MMX"
+  "punpckhbw\\t{%2, %0|%0, %2}"
+  [(set_attr "type" "mmx")])
+
+(define_insn "mmx_punpcklbw"
+  [(set (match_operand:V8QI 0 "register_operand" "=y")
+       (vec_merge:V8QI
+        (vec_select:V8QI (match_operand:V8QI 1 "register_operand" "0")
+                         (parallel [(const_int 0)
+                                    (const_int 4)
+                                    (const_int 1)
+                                    (const_int 5)
+                                    (const_int 2)
+                                    (const_int 6)
+                                    (const_int 3)
+                                    (const_int 7)]))
+        (vec_select:V8QI (match_operand:V8QI 2 "register_operand" "y")
+                         (parallel [(const_int 4)
+                                    (const_int 0)
+                                    (const_int 5)
+                                    (const_int 1)
+                                    (const_int 6)
+                                    (const_int 2)
+                                    (const_int 7)
+                                    (const_int 3)]))
+        (const_int 85)))]
+  "TARGET_MMX"
+  "punpcklbw\\t{%2, %0|%0, %2}"
+  [(set_attr "type" "mmx")])
+
+(define_insn "mmx_punpcklwd"
+  [(set (match_operand:V4HI 0 "register_operand" "=y")
+       (vec_merge:V4HI
+        (vec_select:V4HI (match_operand:V4HI 1 "register_operand" "0")
+                         (parallel [(const_int 2)
+                                    (const_int 0)
+                                    (const_int 3)
+                                    (const_int 1)]))
+        (vec_select:V4HI (match_operand:V4HI 2 "register_operand" "y")
+                         (parallel [(const_int 0)
+                                    (const_int 2)
+                                    (const_int 1)
+                                    (const_int 3)]))
+        (const_int 5)))]
+  "TARGET_MMX"
+  "punpcklbw\\t{%2, %0|%0, %2}"
+  [(set_attr "type" "mmx")])
+
+(define_insn "mmx_punpckldq"
+  [(set (match_operand:V2SI 0 "register_operand" "=y")
+       (vec_merge:V2SI
+        (vec_select:V2SI (match_operand:V2SI 1 "register_operand" "0")
+                          (parallel [(const_int 1)
+                                     (const_int 0)]))
+        (vec_select:V2SI (match_operand:V2SI 2 "register_operand" "y")
+                         (parallel [(const_int 0)
+                                    (const_int 1)]))
+        (const_int 1)))]
+  "TARGET_MMX"
+  "punpcklbw\\t{%2, %0|%0, %2}"
+  [(set_attr "type" "mmx")])
+
+
+;; Miscellaneous stuff
+
+(define_insn "emms"
+  [(unspec_volatile [(const_int 0)] 31)
+   (clobber (reg:XF 8))
+   (clobber (reg:XF 9))
+   (clobber (reg:XF 10))
+   (clobber (reg:XF 11))
+   (clobber (reg:XF 12))
+   (clobber (reg:XF 13))
+   (clobber (reg:XF 14))
+   (clobber (reg:XF 15))
+   (clobber (reg:DI 27))
+   (clobber (reg:DI 28))
+   (clobber (reg:DI 29))
+   (clobber (reg:DI 30))
+   (clobber (reg:DI 31))
+   (clobber (reg:DI 32))
+   (clobber (reg:DI 33))
+   (clobber (reg:DI 34))]
+  "TARGET_MMX"
+  "emms"
+  [(set_attr "type" "mmx")])
+
+(define_insn "ldmxcsr"
+  [(unspec_volatile [(match_operand:SI 0 "memory_operand" "m")] 37)]
+  "TARGET_MMX"
+  "ldmxcsr\\t%0"
+  [(set_attr "type" "mmx")])
+
+(define_insn "stmxcsr"
+  [(set (match_operand:SI 0 "memory_operand" "=m")
+       (unspec_volatile:SI [(const_int 0)] 40))]
+  "TARGET_MMX"
+  "stmxcsr\\t%0"
+  [(set_attr "type" "mmx")])
+
+(define_expand "sfence"
+  [(set (match_dup 0)
+       (unspec:BLK [(match_dup 0)] 44))]
+  "TARGET_SSE"
+  "
+{
+  operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
+  MEM_VOLATILE_P (operands[0]) = 1;
+}")
+
+(define_insn "*sfence_insn"
+  [(set (match_operand:BLK 0 "" "")
+       (unspec:BLK [(match_dup 0)] 44))]
+  "TARGET_SSE"
+  "sfence"
+  [(set_attr "type" "sse")])
+
+(define_insn "prefetch"
+  [(unspec [(match_operand:SI 0 "address_operand" "p")
+           (match_operand:SI 1 "address_operand" "p")] 35)]
+  "TARGET_SSE"
+  "*
+{
+  switch (INTVAL (operands[1]))
+    {
+    case 0:
+      return \"prefetcht0\\t%0\";
+    case 1:
+      return \"prefetcht1\\t%0\";
+    case 2:
+      return \"prefetcht2\\t%0\";
+    case 3:
+      return \"prefetchnta\\t%0\";
+    default:
+      abort ();
+    }
+}"
+  [(set_attr "type" "sse")])
+