]> git.ipfire.org Git - thirdparty/gcc.git/blobdiff - gcc/config/i386/i386.md
PR target/62055
[thirdparty/gcc.git] / gcc / config / i386 / i386.md
index e08b2b7c14bfedb288651f238e7ca115fafbcacf..2b7df20813e4196b3c1c16d33aa1f18d2b9820c7 100644 (file)
@@ -1,5 +1,5 @@
 ;; GCC machine description for IA-32 and x86-64.
-;; Copyright (C) 1988-2018 Free Software Foundation, Inc.
+;; Copyright (C) 1988-2019 Free Software Foundation, Inc.
 ;; Mostly by William Schelter.
 ;; x86_64 support added by Jan Hubicka
 ;;
   UNSPEC_NOTRAP
   UNSPEC_PARITY
   UNSPEC_FSTCW
-  UNSPEC_FLDCW
   UNSPEC_REP
   UNSPEC_LD_MPIC       ; load_macho_picbase
   UNSPEC_TRUNC_NOOP
 
   ;; Generic math support
   UNSPEC_COPYSIGN
+  UNSPEC_XORSIGN
   UNSPEC_IEEE_MIN      ; not commutative
   UNSPEC_IEEE_MAX      ; not commutative
 
   UNSPEC_FRNDINT_FLOOR
   UNSPEC_FRNDINT_CEIL
   UNSPEC_FRNDINT_TRUNC
-  UNSPEC_FRNDINT_MASK_PM
   UNSPEC_FIST_FLOOR
   UNSPEC_FIST_CEIL
 
 
   ;; For Speculation Barrier support
   UNSPECV_SPECULATION_BARRIER
+
+  UNSPECV_PTWRITE
+
+  ;; For ENQCMD and ENQCMDS support
+  UNSPECV_ENQCMD
+  UNSPECV_ENQCMDS
 ])
 
 ;; Constants to represent rounding modes in the ROUND instruction
    (ARGP_REG                   16)
    (FLAGS_REG                  17)
    (FPSR_REG                   18)
-   (FPCR_REG                   19)
-   (FRAME_REG                  20)
-   (XMM0_REG                   21)
-   (XMM1_REG                   22)
-   (XMM2_REG                   23)
-   (XMM3_REG                   24)
-   (XMM4_REG                   25)
-   (XMM5_REG                   26)
-   (XMM6_REG                   27)
-   (XMM7_REG                   28)
-   (MM0_REG                    29)
-   (MM1_REG                    30)
-   (MM2_REG                    31)
-   (MM3_REG                    32)
-   (MM4_REG                    33)
-   (MM5_REG                    34)
-   (MM6_REG                    35)
-   (MM7_REG                    36)
-   (R8_REG                     37)
-   (R9_REG                     38)
-   (R10_REG                    39)
-   (R11_REG                    40)
-   (R12_REG                    41)
-   (R13_REG                    42)
-   (R14_REG                    43)
-   (R15_REG                    44)
-   (XMM8_REG                   45)
-   (XMM9_REG                   46)
-   (XMM10_REG                  47)
-   (XMM11_REG                  48)
-   (XMM12_REG                  49)
-   (XMM13_REG                  50)
-   (XMM14_REG                  51)
-   (XMM15_REG                  52)
-   (XMM16_REG                  53)
-   (XMM17_REG                  54)
-   (XMM18_REG                  55)
-   (XMM19_REG                  56)
-   (XMM20_REG                  57)
-   (XMM21_REG                  58)
-   (XMM22_REG                  59)
-   (XMM23_REG                  60)
-   (XMM24_REG                  61)
-   (XMM25_REG                  62)
-   (XMM26_REG                  63)
-   (XMM27_REG                  64)
-   (XMM28_REG                  65)
-   (XMM29_REG                  66)
-   (XMM30_REG                  67)
-   (XMM31_REG                  68)
-   (MASK0_REG                  69)
-   (MASK1_REG                  70)
-   (MASK2_REG                  71)
-   (MASK3_REG                  72)
-   (MASK4_REG                  73)
-   (MASK5_REG                  74)
-   (MASK6_REG                  75)
-   (MASK7_REG                  76)
-   (FIRST_PSEUDO_REG           77)
+   (FRAME_REG                  19)
+   (XMM0_REG                   20)
+   (XMM1_REG                   21)
+   (XMM2_REG                   22)
+   (XMM3_REG                   23)
+   (XMM4_REG                   24)
+   (XMM5_REG                   25)
+   (XMM6_REG                   26)
+   (XMM7_REG                   27)
+   (MM0_REG                    28)
+   (MM1_REG                    29)
+   (MM2_REG                    30)
+   (MM3_REG                    31)
+   (MM4_REG                    32)
+   (MM5_REG                    33)
+   (MM6_REG                    34)
+   (MM7_REG                    35)
+   (R8_REG                     36)
+   (R9_REG                     37)
+   (R10_REG                    38)
+   (R11_REG                    39)
+   (R12_REG                    40)
+   (R13_REG                    41)
+   (R14_REG                    42)
+   (R15_REG                    43)
+   (XMM8_REG                   44)
+   (XMM9_REG                   45)
+   (XMM10_REG                  46)
+   (XMM11_REG                  47)
+   (XMM12_REG                  48)
+   (XMM13_REG                  49)
+   (XMM14_REG                  50)
+   (XMM15_REG                  51)
+   (XMM16_REG                  52)
+   (XMM17_REG                  53)
+   (XMM18_REG                  54)
+   (XMM19_REG                  55)
+   (XMM20_REG                  56)
+   (XMM21_REG                  57)
+   (XMM22_REG                  58)
+   (XMM23_REG                  59)
+   (XMM24_REG                  60)
+   (XMM25_REG                  61)
+   (XMM26_REG                  62)
+   (XMM27_REG                  63)
+   (XMM28_REG                  64)
+   (XMM29_REG                  65)
+   (XMM30_REG                  66)
+   (XMM31_REG                  67)
+   (MASK0_REG                  68)
+   (MASK1_REG                  69)
+   (MASK2_REG                  70)
+   (MASK3_REG                  71)
+   (MASK4_REG                  72)
+   (MASK5_REG                  73)
+   (MASK6_REG                  74)
+   (MASK7_REG                  75)
+   (FIRST_PSEUDO_REG           76)
   ])
 
 ;; Insns whose names begin with "x86_" are emitted by gen_FOO calls
 ;; Processor type.
 (define_attr "cpu" "none,pentium,pentiumpro,geode,k6,athlon,k8,core2,nehalem,
                    atom,slm,glm,haswell,generic,amdfam10,bdver1,bdver2,bdver3,
-                   bdver4,btver2,znver1"
+                   bdver4,btver2,znver1,znver2"
   (const (symbol_ref "ix86_schedule")))
 
 ;; A basic instruction type.  Refinements due to arguments to be
 
 ;; Defines rounding mode of an FP operation.
 
-(define_attr "i387_cw" "trunc,floor,ceil,mask_pm,uninitialized,any"
+(define_attr "i387_cw" "trunc,floor,ceil,uninitialized,any"
   (const_string "any"))
 
+;; Define attribute to indicate AVX insns with partial XMM register update.
+(define_attr "avx_partial_xmm_update" "false,true"
+  (const_string "false"))
+
 ;; Define attribute to classify add/sub insns that consumes carry flag (CF)
 (define_attr "use_carry" "0,1" (const_string "0"))
 
                    avx512vl,noavx512vl,x64_avx512dq,x64_avx512bw"
   (const_string "base"))
 
+;; Define instruction set of MMX instructions
+(define_attr "mmx_isa" "base,native,x64,x64_noavx,x64_avx"
+  (const_string "base"))
+
 (define_attr "enabled" ""
   (cond [(eq_attr "isa" "x64") (symbol_ref "TARGET_64BIT")
         (eq_attr "isa" "x64_sse2")
         (eq_attr "isa" "noavx512dq") (symbol_ref "!TARGET_AVX512DQ")
         (eq_attr "isa" "avx512vl") (symbol_ref "TARGET_AVX512VL")
         (eq_attr "isa" "noavx512vl") (symbol_ref "!TARGET_AVX512VL")
+
+        (eq_attr "mmx_isa" "native")
+          (symbol_ref "!TARGET_MMX_WITH_SSE")
+        (eq_attr "mmx_isa" "x64")
+          (symbol_ref "TARGET_MMX_WITH_SSE")
+        (eq_attr "mmx_isa" "x64_avx")
+          (symbol_ref "TARGET_MMX_WITH_SSE && TARGET_AVX")
+        (eq_attr "mmx_isa" "x64_noavx")
+          (symbol_ref "TARGET_MMX_WITH_SSE && !TARGET_AVX")
        ]
        (const_int 1)))
 
 (define_code_iterator absneg [abs neg])
 
 ;; Base name for x87 insn mnemonic.
-(define_code_attr absneg_mnemonic [(abs "abs") (neg "chs")])
+(define_code_attr absneg_mnemonic [(abs "fabs") (neg "fchs")])
 
 ;; Used in signed and unsigned widening multiplications.
 (define_code_iterator any_extend [sign_extend zero_extend])
 
 ;; Prefix for insn menmonic.
-(define_code_attr sgnprefix [(sign_extend "i") (zero_extend "")])
-
+(define_code_attr sgnprefix [(sign_extend "i") (zero_extend "")
+                            (div "i") (udiv "")])
 ;; Prefix for define_insn
-(define_code_attr u [(sign_extend "") (zero_extend "u")])
 (define_code_attr s [(sign_extend "s") (zero_extend "u")])
-(define_code_attr u_bool [(sign_extend "false") (zero_extend "true")])
+(define_code_attr u [(sign_extend "") (zero_extend "u")
+                    (div "") (udiv "u")])
+(define_code_attr u_bool [(sign_extend "false") (zero_extend "true")
+                         (div "false") (udiv "true")])
 
 ;; Used in signed and unsigned truncations.
 (define_code_iterator any_truncate [ss_truncate truncate us_truncate])
                               (HI "TARGET_HIMODE_MATH")
                               SI])
 
-;; Math-dependant integer modes with DImode.
-(define_mode_iterator SWIM1248x [(QI "TARGET_QIMODE_MATH")
-                                (HI "TARGET_HIMODE_MATH")
-                                SI (DI "(TARGET_STV && TARGET_SSE2) || TARGET_64BIT")])
+;; Math-dependant integer modes with DImode (enabled for 32bit with STV).
+(define_mode_iterator SWIM1248s
+       [(QI "TARGET_QIMODE_MATH")
+        (HI "TARGET_HIMODE_MATH")
+        SI (DI "TARGET_64BIT || (TARGET_STV && TARGET_SSE2)")])
 
 ;; Math-dependant single word integer modes without QImode.
 (define_mode_iterator SWIM248 [(HI "TARGET_HIMODE_MATH")
 ;; All x87 floating point modes
 (define_mode_iterator X87MODEF [SF DF XF])
 
+;; All SSE floating point modes
+(define_mode_iterator SSEMODEF [SF DF TF])
+(define_mode_attr ssevecmodef [(SF "V4SF") (DF "V2DF") (TF "TF")])
+
 ;; SSE instruction suffix for various modes
 (define_mode_attr ssemodesuffix
   [(SF "ss") (DF "sd")
   [(QI "V64QI") (HI "V32HI") (SI "V16SI") (DI "V8DI") (SF "V16SF") (DF "V8DF")])
 
 ;; Instruction suffix for REX 64bit operators.
-(define_mode_attr rex64suffix [(SI "") (DI "{q}")])
+(define_mode_attr rex64suffix [(SI "{l}") (DI "{q}")])
 (define_mode_attr rex64namesuffix [(SI "") (DI "q")])
 
 ;; This mode iterator allows :P to be used for patterns that operate on
        (compare:CC (match_operand:SWI48 0 "nonimmediate_operand")
                    (match_operand:SWI48 1 "<general_operand>")))])
 
-(define_mode_iterator SWI1248_AVX512BWDQ2_64
-  [(QI "TARGET_AVX512DQ") (HI "TARGET_AVX512DQ")
+(define_mode_iterator SWI1248_AVX512BWDQ_64
+  [(QI "TARGET_AVX512DQ") HI
    (SI "TARGET_AVX512BW") (DI "TARGET_AVX512BW && TARGET_64BIT")])
 
 (define_insn "*cmp<mode>_ccz_1"
   [(set (reg FLAGS_REG)
-       (compare (match_operand:SWI1248_AVX512BWDQ2_64 0
+       (compare (match_operand:SWI1248_AVX512BWDQ_64 0
                        "nonimmediate_operand" "<r>,?m<r>,$k")
-                (match_operand:SWI1248_AVX512BWDQ2_64 1 "const0_operand")))]
-  "ix86_match_ccmode (insn, CCZmode)"
+                (match_operand:SWI1248_AVX512BWDQ_64 1 "const0_operand")))]
+  "TARGET_AVX512F && ix86_match_ccmode (insn, CCZmode)"
   "@
    test{<imodesuffix>}\t%0, %0
    cmp{<imodesuffix>}\t{%1, %0|%0, %1}
-   ktest<mskmodesuffix>\t%0, %0"
+   kortest<mskmodesuffix>\t%0, %0"
   [(set_attr "type" "test,icmp,msklog")
    (set_attr "length_immediate" "0,1,*")
    (set_attr "prefix" "*,*,vex")
   DONE;
 })
 
-
 ;; FP compares, step 1:
-;; Set the FP condition codes.
-
-;; We may not use "#" to split and emit these, since the REG_DEAD notes
-;; used to manage the reg stack popping would not be preserved.
-
-(define_insn "*cmp<mode>_0_i387"
-  [(set (match_operand:HI 0 "register_operand" "=a")
-       (unspec:HI
-         [(compare:CCFP
-            (match_operand:X87MODEF 1 "register_operand" "f")
-            (match_operand:X87MODEF 2 "const0_operand"))]
-       UNSPEC_FNSTSW))]
-  "TARGET_80387"
-  "* return output_fp_compare (insn, operands, false, false);"
-  [(set_attr "type" "multi")
-   (set_attr "unit" "i387")
-   (set_attr "mode" "<MODE>")])
+;; Set the FP condition codes and move fpsr to ax.
 
-(define_insn_and_split "*cmp<mode>_0_cc_i387"
-  [(set (reg:CCFP FLAGS_REG)
-       (compare:CCFP
-         (match_operand:X87MODEF 1 "register_operand" "f")
-         (match_operand:X87MODEF 2 "const0_operand")))
-   (clobber (match_operand:HI 0 "register_operand" "=a"))]
-  "TARGET_80387 && TARGET_SAHF && !TARGET_CMOVE"
-  "#"
-  "&& reload_completed"
-  [(set (match_dup 0)
-       (unspec:HI
-         [(compare:CCFP (match_dup 1)(match_dup 2))]
-       UNSPEC_FNSTSW))
-   (set (reg:CC FLAGS_REG)
-       (unspec:CC [(match_dup 0)] UNSPEC_SAHF))]
-  ""
-  [(set_attr "type" "multi")
-   (set_attr "unit" "i387")
-   (set_attr "mode" "<MODE>")])
+;; We may not use "#" to split and emit these
+;; due to reg-stack pops killing fpsr.
 
 (define_insn "*cmpxf_i387"
   [(set (match_operand:HI 0 "register_operand" "=a")
        (unspec:HI
          [(compare:CCFP
             (match_operand:XF 1 "register_operand" "f")
-            (match_operand:XF 2 "register_operand" "f"))]
+            (match_operand:XF 2 "reg_or_0_operand" "fC"))]
          UNSPEC_FNSTSW))]
   "TARGET_80387"
   "* return output_fp_compare (insn, operands, false, false);"
    (set_attr "unit" "i387")
    (set_attr "mode" "XF")])
 
-(define_insn_and_split "*cmpxf_cc_i387"
-  [(set (reg:CCFP FLAGS_REG)
-       (compare:CCFP
-         (match_operand:XF 1 "register_operand" "f")
-         (match_operand:XF 2 "register_operand" "f")))
-   (clobber (match_operand:HI 0 "register_operand" "=a"))]
-  "TARGET_80387 && TARGET_SAHF && !TARGET_CMOVE"
-  "#"
-  "&& reload_completed"
-  [(set (match_dup 0)
-       (unspec:HI
-         [(compare:CCFP (match_dup 1)(match_dup 2))]
-       UNSPEC_FNSTSW))
-   (set (reg:CC FLAGS_REG)
-       (unspec:CC [(match_dup 0)] UNSPEC_SAHF))]
-  ""
-  [(set_attr "type" "multi")
-   (set_attr "unit" "i387")
-   (set_attr "mode" "XF")])
-
 (define_insn "*cmp<mode>_i387"
   [(set (match_operand:HI 0 "register_operand" "=a")
        (unspec:HI
          [(compare:CCFP
             (match_operand:MODEF 1 "register_operand" "f")
-            (match_operand:MODEF 2 "nonimmediate_operand" "fm"))]
+            (match_operand:MODEF 2 "nonimm_or_0_operand" "fmC"))]
          UNSPEC_FNSTSW))]
   "TARGET_80387"
   "* return output_fp_compare (insn, operands, false, false);"
    (set_attr "unit" "i387")
    (set_attr "mode" "<MODE>")])
 
-(define_insn_and_split "*cmp<mode>_cc_i387"
-  [(set (reg:CCFP FLAGS_REG)
-       (compare:CCFP
-         (match_operand:MODEF 1 "register_operand" "f")
-         (match_operand:MODEF 2 "nonimmediate_operand" "fm")))
-   (clobber (match_operand:HI 0 "register_operand" "=a"))]
-  "TARGET_80387 && TARGET_SAHF && !TARGET_CMOVE"
-  "#"
-  "&& reload_completed"
-  [(set (match_dup 0)
-       (unspec:HI
-         [(compare:CCFP (match_dup 1)(match_dup 2))]
-       UNSPEC_FNSTSW))
-   (set (reg:CC FLAGS_REG)
-       (unspec:CC [(match_dup 0)] UNSPEC_SAHF))]
-  ""
-  [(set_attr "type" "multi")
-   (set_attr "unit" "i387")
-   (set_attr "mode" "<MODE>")])
-
-(define_insn "*cmpu<mode>_i387"
-  [(set (match_operand:HI 0 "register_operand" "=a")
-       (unspec:HI
-         [(unspec:CCFP
-            [(compare:CCFP
-               (match_operand:X87MODEF 1 "register_operand" "f")
-               (match_operand:X87MODEF 2 "register_operand" "f"))]
-            UNSPEC_NOTRAP)]
-         UNSPEC_FNSTSW))]
-  "TARGET_80387"
-  "* return output_fp_compare (insn, operands, false, true);"
-  [(set_attr "type" "multi")
-   (set_attr "unit" "i387")
-   (set_attr "mode" "<MODE>")])
-
-(define_insn_and_split "*cmpu<mode>_cc_i387"
-  [(set (reg:CCFP FLAGS_REG)
-       (unspec:CCFP
-         [(compare:CCFP
-            (match_operand:X87MODEF 1 "register_operand" "f")
-            (match_operand:X87MODEF 2 "register_operand" "f"))]
-         UNSPEC_NOTRAP))
-   (clobber (match_operand:HI 0 "register_operand" "=a"))]
-  "TARGET_80387 && TARGET_SAHF && !TARGET_CMOVE"
-  "#"
-  "&& reload_completed"
-  [(set (match_dup 0)
-       (unspec:HI
-         [(unspec:CCFP
-            [(compare:CCFP (match_dup 1)(match_dup 2))]
-            UNSPEC_NOTRAP)]
-         UNSPEC_FNSTSW))
-   (set (reg:CC FLAGS_REG)
-       (unspec:CC [(match_dup 0)] UNSPEC_SAHF))]
-  ""
-  [(set_attr "type" "multi")
-   (set_attr "unit" "i387")
-   (set_attr "mode" "<MODE>")])
-
 (define_insn "*cmp<X87MODEF:mode>_<SWI24:mode>_i387"
   [(set (match_operand:HI 0 "register_operand" "=a")
        (unspec:HI
          [(compare:CCFP
             (match_operand:X87MODEF 1 "register_operand" "f")
             (float:X87MODEF
-              (match_operand:SWI24 2 "memory_operand" "m")))]
+              (match_operand:SWI24 2 "nonimmediate_operand" "m")))]
          UNSPEC_FNSTSW))]
   "TARGET_80387
    && (TARGET_USE_<SWI24:MODE>MODE_FIOP
    (set_attr "fp_int_src" "true")
    (set_attr "mode" "<SWI24:MODE>")])
 
-(define_insn_and_split "*cmp<X87MODEF:mode>_<SWI24:mode>_cc_i387"
-  [(set (reg:CCFP FLAGS_REG)
-       (compare:CCFP
-         (match_operand:X87MODEF 1 "register_operand" "f")
-         (float:X87MODEF
-           (match_operand:SWI24 2 "memory_operand" "m"))))
-   (clobber (match_operand:HI 0 "register_operand" "=a"))]
-  "TARGET_80387 && TARGET_SAHF && !TARGET_CMOVE
-   && (TARGET_USE_<SWI24:MODE>MODE_FIOP
-       || optimize_function_for_size_p (cfun))"
-  "#"
-  "&& reload_completed"
-  [(set (match_dup 0)
+(define_insn "*cmpu<mode>_i387"
+  [(set (match_operand:HI 0 "register_operand" "=a")
        (unspec:HI
-         [(compare:CCFP
-            (match_dup 1)
-            (float:X87MODEF (match_dup 2)))]
-       UNSPEC_FNSTSW))
-   (set (reg:CC FLAGS_REG)
-       (unspec:CC [(match_dup 0)] UNSPEC_SAHF))]
-  ""
+         [(unspec:CCFP
+            [(compare:CCFP
+               (match_operand:X87MODEF 1 "register_operand" "f")
+               (match_operand:X87MODEF 2 "register_operand" "f"))]
+            UNSPEC_NOTRAP)]
+         UNSPEC_FNSTSW))]
+  "TARGET_80387"
+  "* return output_fp_compare (insn, operands, false, true);"
   [(set_attr "type" "multi")
    (set_attr "unit" "i387")
-   (set_attr "fp_int_src" "true")
-   (set_attr "mode" "<SWI24:MODE>")])
-
-;; FP compares, step 2
-;; Move the fpsw to ax.
-
-(define_insn "x86_fnstsw_1"
-  [(set (match_operand:HI 0 "register_operand" "=a")
-       (unspec:HI [(reg:CCFP FPSR_REG)] UNSPEC_FNSTSW))]
-  "TARGET_80387"
-  "fnstsw\t%0"
-  [(set_attr "length" "2")
-   (set_attr "mode" "SI")
-   (set_attr "unit" "i387")])
+   (set_attr "mode" "<MODE>")])
 
-;; FP compares, step 3
+;; FP compares, step 2:
 ;; Get ax into flags, general case.
 
 (define_insn "x86_sahf_1"
    (set_attr "bdver1_decode" "direct")
    (set_attr "mode" "SI")])
 
-;; Pentium Pro can do steps 1 through 3 in one go.
+;; Pentium Pro can do both steps in one go.
 ;; (these instructions set flags directly)
 
 (define_subst_attr "unord" "unord_subst" "" "u")
          [(match_dup 1)]
          UNSPEC_NOTRAP))])
 
+(define_insn "*cmpi<unord>xf_i387"
+  [(set (reg:CCFP FLAGS_REG)
+       (compare:CCFP
+         (match_operand:XF 0 "register_operand" "f")
+         (match_operand:XF 1 "register_operand" "f")))]
+  "TARGET_80387 && TARGET_CMOVE"
+  "* return output_fp_compare (insn, operands, true, <unordered>);"
+  [(set_attr "type" "fcmp")
+   (set_attr "mode" "XF")
+   (set_attr "athlon_decode" "vector")
+   (set_attr "amdfam10_decode" "direct")
+   (set_attr "bdver1_decode" "double")
+   (set_attr "znver1_decode" "double")])
+
 (define_insn "*cmpi<unord><MODEF:mode>"
   [(set (reg:CCFP FLAGS_REG)
        (compare:CCFP
         (eq_attr "alternative" "0")
         (symbol_ref "true")
         (symbol_ref "false"))))])
-
-(define_insn "*cmpi<unord>xf_i387"
-  [(set (reg:CCFP FLAGS_REG)
-       (compare:CCFP
-         (match_operand:XF 0 "register_operand" "f")
-         (match_operand:XF 1 "register_operand" "f")))]
-  "TARGET_80387 && TARGET_CMOVE"
-  "* return output_fp_compare (insn, operands, true, <unordered>);"
-  [(set_attr "type" "fcmp")
-   (set_attr "mode" "XF")
-   (set_attr "athlon_decode" "vector")
-   (set_attr "amdfam10_decode" "direct")
-   (set_attr "bdver1_decode" "double")
-   (set_attr "znver1_decode" "double")])
 \f
 ;; Push/pop instructions.
 
 
 (define_insn "*movdi_internal"
   [(set (match_operand:DI 0 "nonimmediate_operand"
-    "=r  ,o  ,r,r  ,r,m ,*y,*y,?*y,?m,?r,?*y,*v,*v,*v,m ,m,?r ,?*Yd,?r,?*v,?*y,?*x,*k,*k ,*r,*m")
+    "=r  ,o  ,r,r  ,r,m ,*y,*y,?*y,?m,?r,?*y,*v,*v,*v,m ,m,?r ,?*Yd,?r,?*v,?*y,?*x,*k,*k ,*r,*m,*k")
        (match_operand:DI 1 "general_operand"
-    "riFo,riF,Z,rem,i,re,C ,*y,m  ,*y,*y,r  ,C ,*v,m ,*v,v,*Yd,r   ,*v,r  ,*x ,*y ,*r,*km,*k,*k"))]
+    "riFo,riF,Z,rem,i,re,C ,*y,m  ,*y,*y,r  ,C ,*v,m ,*v,v,*Yd,r   ,*v,r  ,*x ,*y ,*r,*km,*k,*k,CBC"))]
   "!(MEM_P (operands[0]) && MEM_P (operands[1]))"
 {
   switch (get_attr_type (insn))
     case TYPE_MSKMOV:
       return "kmovq\t{%1, %0|%0, %1}";
 
+    case TYPE_MSKLOG:
+      if (operands[1] == const0_rtx)
+       return "kxorq\t%0, %0, %0";
+      else if (operands[1] == constm1_rtx)
+       return "kxnorq\t%0, %0, %0";
+      gcc_unreachable ();
+
     case TYPE_MULTI:
       return "#";
 
              (const_string "ssecvt")
            (eq_attr "alternative" "23,24,25,26")
              (const_string "mskmov")
+           (eq_attr "alternative" "27")
+             (const_string "msklog")
            (and (match_operand 0 "register_operand")
                 (match_operand 1 "pic_32bit_operand"))
              (const_string "lea")
 
 (define_insn "*movsi_internal"
   [(set (match_operand:SI 0 "nonimmediate_operand"
-    "=r,m ,*y,*y,?*y,?m,?r,?*y,*v,*v,*v,m ,?r,?*v,*k,*k ,*rm")
+    "=r,m ,*y,*y,?*y,?m,?r,?*y,*v,*v,*v,m ,?r,?*v,*k,*k ,*rm,*k")
        (match_operand:SI 1 "general_operand"
-    "g ,re,C ,*y,m  ,*y,*y,r  ,C ,*v,m ,*v,*v,r  ,*r,*km,*k"))]
+    "g ,re,C ,*y,m  ,*y,*y,r  ,C ,*v,m ,*v,*v,r  ,*r,*km,*k ,CBC"))]
   "!(MEM_P (operands[0]) && MEM_P (operands[1]))"
 {
   switch (get_attr_type (insn))
     case TYPE_MSKMOV:
       return "kmovd\t{%1, %0|%0, %1}";
 
+    case TYPE_MSKLOG:
+      if (operands[1] == const0_rtx)
+       return "kxord\t%0, %0, %0";
+      else if (operands[1] == constm1_rtx)
+       return "kxnord\t%0, %0, %0";
+      gcc_unreachable ();
+
     case TYPE_SSEMOV:
       switch (get_attr_mode (insn))
        {
              (const_string "ssemov")
            (eq_attr "alternative" "14,15,16")
              (const_string "mskmov")
+           (eq_attr "alternative" "17")
+             (const_string "msklog")
            (and (match_operand 0 "register_operand")
                 (match_operand 1 "pic_32bit_operand"))
              (const_string "lea")
           (symbol_ref "true")))])
 
 (define_insn "*movhi_internal"
-  [(set (match_operand:HI 0 "nonimmediate_operand" "=r,r ,r ,m ,k,k ,r,m")
-       (match_operand:HI 1 "general_operand"      "r ,rn,rm,rn,r,km,k,k"))]
+  [(set (match_operand:HI 0 "nonimmediate_operand" "=r,r ,r ,m ,k,k ,r,m,k")
+       (match_operand:HI 1 "general_operand"      "r ,rn,rm,rn,r,km,k,k,CBC"))]
   "!(MEM_P (operands[0]) && MEM_P (operands[1]))"
 {
   switch (get_attr_type (insn))
          gcc_unreachable ();
        }
 
+    case TYPE_MSKLOG:
+      if (operands[1] == const0_rtx)
+       return "kxorw\t%0, %0, %0";
+      else if (operands[1] == constm1_rtx)
+       return "kxnorw\t%0, %0, %0";
+      gcc_unreachable ();
+
     default:
       if (get_attr_mode (insn) == MODE_SI)
        return "mov{l}\t{%k1, %k0|%k0, %k1}";
   [(set (attr "type")
      (cond [(eq_attr "alternative" "4,5,6,7")
              (const_string "mskmov")
+           (eq_attr "alternative" "8")
+             (const_string "msklog")
            (match_test "optimize_function_for_size_p (cfun)")
              (const_string "imov")
            (and (eq_attr "alternative" "0")
           ]
           (const_string "imov")))
     (set (attr "prefix")
-      (if_then_else (eq_attr "alternative" "4,5,6,7")
+      (if_then_else (eq_attr "alternative" "4,5,6,7,8")
        (const_string "vex")
        (const_string "orig")))
     (set (attr "mode")
 
 (define_insn "*movqi_internal"
   [(set (match_operand:QI 0 "nonimmediate_operand"
-                       "=Q,R,r,q,q,r,r ,?r,m ,k,k,r,m,k")
+                       "=Q,R,r,q,q,r,r ,?r,m ,k,k,r,m,k,k,k")
        (match_operand:QI 1 "general_operand"
-                       "Q ,R,r,n,m,q,rn, m,qn,r,k,k,k,m"))]
+                       "Q ,R,r,n,m,q,rn, m,qn,r,k,k,k,m,C,BC"))]
   "!(MEM_P (operands[0]) && MEM_P (operands[1]))"
 {
-  static char buf[128];
+  char buf[128];
   const char *ops;
   const char *suffix;
 
       suffix = (get_attr_mode (insn) == MODE_HI) ? "w" : "b";
 
       snprintf (buf, sizeof (buf), ops, suffix);
-      return buf;
+      output_asm_insn (buf, operands);
+      return "";
+
+    case TYPE_MSKLOG:
+      if (operands[1] == const0_rtx)
+       {
+         if (get_attr_mode (insn) == MODE_HI)
+           return "kxorw\t%0, %0, %0";
+         else
+           return "kxorb\t%0, %0, %0";
+       }
+      else if (operands[1] == constm1_rtx)
+       {
+         gcc_assert (TARGET_AVX512DQ);
+         return "kxnorb\t%0, %0, %0";
+       }
+      gcc_unreachable ();
 
     default:
       if (get_attr_mode (insn) == MODE_SI)
   [(set (attr "isa")
      (cond [(eq_attr "alternative" "1,2")
              (const_string "x64")
-           (eq_attr "alternative" "12,13")
+           (eq_attr "alternative" "12,13,15")
              (const_string "avx512dq")
           ]
           (const_string "*")))
    (set (attr "type")
      (cond [(eq_attr "alternative" "9,10,11,12,13")
              (const_string "mskmov")
+           (eq_attr "alternative" "14,15")
+             (const_string "msklog")
            (and (eq_attr "alternative" "7")
                 (not (match_operand:QI 1 "aligned_operand")))
              (const_string "imovx")
           ]
           (const_string "imov")))
    (set (attr "prefix")
-     (if_then_else (eq_attr "alternative" "9,10,11")
+     (if_then_else (eq_attr "alternative" "9,10,11,12,13,14,15")
        (const_string "vex")
        (const_string "orig")))
    (set (attr "mode")
               (const_string "SI")
             (eq_attr "alternative" "8")
               (const_string "QI")
-            (and (eq_attr "alternative" "9,10,11")
+            (and (eq_attr "alternative" "9,10,11,14")
                  (not (match_test "TARGET_AVX512DQ")))
               (const_string "HI")
             (eq_attr "type" "imovx")
 
 (define_insn "*zero_extendsidi2"
   [(set (match_operand:DI 0 "nonimmediate_operand"
-               "=r,?r,?o,r   ,o,?*y,?!*y,$r,$v,$x,*x,*v,*r")
+               "=r,?r,?o,r   ,o,?*y,?!*y,$r,$v,$x,*x,*v,*r,*k")
        (zero_extend:DI
         (match_operand:SI 1 "x86_64_zext_operand"
-               "0 ,rm,r ,rmWz,0,r  ,m   ,v ,r ,m ,*x,*v,*k")))]
+               "0 ,rm,r ,rmWz,0,r  ,m   ,v ,r ,m ,*x,*v,*k,*km")))]
   ""
 {
   switch (get_attr_type (insn))
              (const_string "avx512f")
            (eq_attr "alternative" "12")
              (const_string "x64_avx512bw")
+           (eq_attr "alternative" "13")
+             (const_string "avx512bw")
           ]
           (const_string "*")))
+   (set (attr "mmx_isa")
+     (if_then_else (eq_attr "alternative" "5,6")
+                  (const_string "native")
+                  (const_string "*")))
    (set (attr "type")
      (cond [(eq_attr "alternative" "0,1,2,4")
              (const_string "multi")
                (const_string "multi"))
            (eq_attr "alternative" "8,9,10,11")
              (const_string "ssemov")
-           (eq_attr "alternative" "12")
+           (eq_attr "alternative" "12,13")
              (const_string "mskmov")
           ]
           (const_string "imovx")))
   [(QI "avx512dq") (HI "avx512f") (SI "avx512bw") (DI "avx512bw")])
 
 (define_insn "zero_extend<mode>di2"
-  [(set (match_operand:DI 0 "register_operand" "=r,*r")
+  [(set (match_operand:DI 0 "register_operand" "=r,*r,*k")
        (zero_extend:DI
-        (match_operand:SWI12 1 "nonimmediate_operand" "<r>m,*k")))]
+        (match_operand:SWI12 1 "nonimmediate_operand" "<r>m,*k,*km")))]
   "TARGET_64BIT"
   "@
    movz{<imodesuffix>l|x}\t{%1, %k0|%k0, %1}
+   kmov<mskmodesuffix>\t{%1, %k0|%k0, %1}
    kmov<mskmodesuffix>\t{%1, %k0|%k0, %1}"
-  [(set_attr "isa" "*,<kmov_isa>")
-   (set_attr "type" "imovx,mskmov")
-   (set_attr "mode" "SI,<MODE>")])
+  [(set_attr "isa" "*,<kmov_isa>,<kmov_isa>")
+   (set_attr "type" "imovx,mskmov,mskmov")
+   (set_attr "mode" "SI,<MODE>,<MODE>")])
 
 (define_expand "zero_extend<mode>si2"
   [(set (match_operand:SI 0 "register_operand")
    (set_attr "mode" "SI")])
 
 (define_insn "*zero_extend<mode>si2"
-  [(set (match_operand:SI 0 "register_operand" "=r,*r")
+  [(set (match_operand:SI 0 "register_operand" "=r,*r,*k")
        (zero_extend:SI
-         (match_operand:SWI12 1 "nonimmediate_operand" "<r>m,*k")))]
+         (match_operand:SWI12 1 "nonimmediate_operand" "<r>m,*k,*km")))]
   "!(TARGET_ZERO_EXTEND_WITH_AND && optimize_function_for_speed_p (cfun))"
   "@
    movz{<imodesuffix>l|x}\t{%1, %0|%0, %1}
+   kmov<mskmodesuffix>\t{%1, %0|%0, %1}
    kmov<mskmodesuffix>\t{%1, %0|%0, %1}"
-  [(set_attr "isa" "*,<kmov_isa>")
-   (set_attr "type" "imovx,mskmov")
-   (set_attr "mode" "SI,<MODE>")])
+  [(set_attr "isa" "*,<kmov_isa>,<kmov_isa>")
+   (set_attr "type" "imovx,mskmov,mskmov")
+   (set_attr "mode" "SI,<MODE>,<MODE>")])
 
 (define_expand "zero_extendqihi2"
   [(set (match_operand:HI 0 "register_operand")
 
 ; zero extend to SImode to avoid partial register stalls
 (define_insn "*zero_extendqihi2"
-  [(set (match_operand:HI 0 "register_operand" "=r,*r")
-       (zero_extend:HI (match_operand:QI 1 "nonimmediate_operand" "qm,*k")))]
+  [(set (match_operand:HI 0 "register_operand" "=r,*r,*k")
+       (zero_extend:HI (match_operand:QI 1 "nonimmediate_operand" "qm,*k,*km")))]
   "!(TARGET_ZERO_EXTEND_WITH_AND && optimize_function_for_speed_p (cfun))"
   "@
    movz{bl|x}\t{%1, %k0|%k0, %1}
-   kmovb\t{%1, %k0|%k0, %1}"
-  [(set_attr "isa" "*,avx512dq")
-   (set_attr "type" "imovx,mskmov")
-   (set_attr "mode" "SI,QI")])
-
-(define_insn_and_split "*zext<mode>_doubleword_and"
-  [(set (match_operand:DI 0 "register_operand" "=&<r>")
-       (zero_extend:DI (match_operand:SWI12 1 "nonimmediate_operand" "<r>m")))]
-  "!TARGET_64BIT && TARGET_STV && TARGET_SSE2
-   && TARGET_ZERO_EXTEND_WITH_AND && optimize_function_for_speed_p (cfun)"
-  "#"
-  "&& reload_completed && GENERAL_REG_P (operands[0])"
-  [(set (match_dup 2) (const_int 0))]
-{
-  split_double_mode (DImode, &operands[0], 1, &operands[0], &operands[2]);
-
-  emit_move_insn (operands[0], const0_rtx);
-
-  gcc_assert (!TARGET_PARTIAL_REG_STALL);
-  emit_insn (gen_movstrict<mode>
-            (gen_lowpart (<MODE>mode, operands[0]), operands[1]));
-})
-
-(define_insn_and_split "*zext<mode>_doubleword"
-  [(set (match_operand:DI 0 "register_operand" "=r")
-       (zero_extend:DI (match_operand:SWI12 1 "nonimmediate_operand" "<r>m")))]
-  "!TARGET_64BIT && TARGET_STV && TARGET_SSE2
-   && !(TARGET_ZERO_EXTEND_WITH_AND && optimize_function_for_speed_p (cfun))"
-  "#"
-  "&& reload_completed && GENERAL_REG_P (operands[0])"
-  [(set (match_dup 0) (zero_extend:SI (match_dup 1)))
-   (set (match_dup 2) (const_int 0))]
-  "split_double_mode (DImode, &operands[0], 1, &operands[0], &operands[2]);")
-
-(define_insn_and_split "*zextsi_doubleword"
-  [(set (match_operand:DI 0 "register_operand" "=r")
-       (zero_extend:DI (match_operand:SI 1 "nonimmediate_operand" "rm")))]
-  "!TARGET_64BIT && TARGET_STV && TARGET_SSE2"
-  "#"
-  "&& reload_completed && GENERAL_REG_P (operands[0])"
-  [(set (match_dup 0) (match_dup 1))
-   (set (match_dup 2) (const_int 0))]
-  "split_double_mode (DImode, &operands[0], 1, &operands[0], &operands[2]);")
+   kmovb\t{%1, %k0|%k0, %1}
+   kmovb\t{%1, %0|%0, %1}"
+  [(set_attr "isa" "*,avx512dq,avx512dq")
+   (set_attr "type" "imovx,mskmov,mskmov")
+   (set_attr "mode" "SI,QI,QI")])
 \f
 ;; Sign extension instructions
 
 })
 
 (define_insn "*extendsfdf2"
-  [(set (match_operand:DF 0 "nonimm_ssenomem_operand" "=f,m,v")
+  [(set (match_operand:DF 0 "nonimm_ssenomem_operand" "=f,m,v,v")
         (float_extend:DF
-         (match_operand:SF 1 "nonimmediate_operand" "fm,f,vm")))]
+         (match_operand:SF 1 "nonimmediate_operand" "fm,f,v,m")))]
   "TARGET_80387 || (TARGET_SSE2 && TARGET_SSE_MATH)"
 {
   switch (which_alternative)
       return output_387_reg_move (insn, operands);
 
     case 2:
+      return "%vcvtss2sd\t{%d1, %0|%0, %d1}";
+    case 3:
       return "%vcvtss2sd\t{%1, %d0|%d0, %1}";
 
     default:
       gcc_unreachable ();
     }
 }
-  [(set_attr "type" "fmov,fmov,ssecvt")
-   (set_attr "prefix" "orig,orig,maybe_vex")
-   (set_attr "mode" "SF,XF,DF")
+  [(set_attr "type" "fmov,fmov,ssecvt,ssecvt")
+   (set_attr "avx_partial_xmm_update" "false,false,false,true")
+   (set_attr "prefix" "orig,orig,maybe_vex,maybe_vex")
+   (set_attr "mode" "SF,XF,DF,DF")
    (set (attr "enabled")
      (if_then_else
        (match_test ("TARGET_SSE2 && TARGET_SSE_MATH"))
    (set (match_dup 0) (float_extend:DF (match_dup 2)))]
   "operands[2] = lowpart_subreg (SFmode, operands[0], DFmode);")
 
-;; Break partial reg stall for cvtss2sd.  This splitter should split
-;; late in the pass sequence (after register rename pass),
-;; so allocated registers won't change anymore.
+;; Break partial SSE register dependency stall.  This splitter should split
+;; late in the pass sequence (after register rename pass), so allocated
+;; registers won't change anymore
 
 (define_split
   [(set (match_operand:DF 0 "sse_reg_operand")
         (float_extend:DF
           (match_operand:SF 1 "nonimmediate_operand")))]
-  "TARGET_SSE_PARTIAL_REG_DEPENDENCY && epilogue_completed
+  "!TARGET_AVX
+   && TARGET_SSE_PARTIAL_REG_DEPENDENCY && epilogue_completed
    && optimize_function_for_speed_p (cfun)
    && (!REG_P (operands[1])
-       || REGNO (operands[0]) != REGNO (operands[1]))
+       || (!TARGET_AVX && REGNO (operands[0]) != REGNO (operands[1])))
    && (!EXT_REX_SSE_REG_P (operands[0])
        || TARGET_AVX512VL)"
   [(set (match_dup 0)
 ;; Conversion from DFmode to SFmode.
 
 (define_insn "truncdfsf2"
-  [(set (match_operand:SF 0 "nonimm_ssenomem_operand" "=m,f,v")
+  [(set (match_operand:SF 0 "nonimm_ssenomem_operand" "=m,f,v,v")
        (float_truncate:SF
-         (match_operand:DF 1 "register_ssemem_operand" "f,f,vm")))]
+         (match_operand:DF 1 "register_ssemem_operand" "f,f,v,m")))]
   "TARGET_80387 || (TARGET_SSE2 && TARGET_SSE_MATH)"
 {
   switch (which_alternative)
       return output_387_reg_move (insn, operands);
 
     case 2:
+      return "%vcvtsd2ss\t{%d1, %0|%0, %d1}";
+    case 3:
       return "%vcvtsd2ss\t{%1, %d0|%d0, %1}";
 
     default:
       gcc_unreachable ();
     }
 }
-  [(set_attr "type" "fmov,fmov,ssecvt")
+  [(set_attr "type" "fmov,fmov,ssecvt,ssecvt")
+   (set_attr "avx_partial_xmm_update" "false,false,false,true")
    (set_attr "mode" "SF")
    (set (attr "enabled")
      (if_then_else
    (set (match_dup 0) (float_truncate:SF (match_dup 2)))]
   "operands[2] = lowpart_subreg (DFmode, operands[0], SFmode);")
 
-;; Break partial reg stall for cvtsd2ss.  This splitter should split
-;; late in the pass sequence (after register rename pass),
-;; so allocated registers won't change anymore.
+;; Break partial SSE register dependency stall.  This splitter should split
+;; late in the pass sequence (after register rename pass), so allocated
+;; registers won't change anymore
 
 (define_split
   [(set (match_operand:SF 0 "sse_reg_operand")
         (float_truncate:SF
          (match_operand:DF 1 "nonimmediate_operand")))]
-  "TARGET_SSE_PARTIAL_REG_DEPENDENCY && epilogue_completed
+  "!TARGET_AVX
+   && TARGET_SSE_PARTIAL_REG_DEPENDENCY && epilogue_completed
    && optimize_function_for_speed_p (cfun)
    && (!REG_P (operands[1])
-       || REGNO (operands[0]) != REGNO (operands[1]))
+       || (!TARGET_AVX && REGNO (operands[0]) != REGNO (operands[1])))
    && (!EXT_REX_SSE_REG_P (operands[0])
        || TARGET_AVX512VL)"
   [(set (match_dup 0)
 (define_insn "fix_trunc<mode>_i387_fisttp"
   [(set (match_operand:SWI248x 0 "nonimmediate_operand" "=m")
        (fix:SWI248x (match_operand 1 "register_operand" "f")))
-   (clobber (match_scratch:XF 2 "=&1f"))]
+   (clobber (match_scratch:XF 2 "=&f"))]
   "X87_FLOAT_MODE_P (GET_MODE (operands[1]))
    && TARGET_FISTTP
    && !((SSE_FLOAT_MODE_P (GET_MODE (operands[1]))
        (fix:DI (match_operand 1 "register_operand" "f")))
    (use (match_operand:HI 2 "memory_operand" "m"))
    (use (match_operand:HI 3 "memory_operand" "m"))
-   (clobber (match_scratch:XF 4 "=&1f"))]
+   (clobber (match_scratch:XF 4 "=&f"))]
   "X87_FLOAT_MODE_P (GET_MODE (operands[1]))
    && !TARGET_FISTTP
    && !(TARGET_64BIT && SSE_FLOAT_MODE_P (GET_MODE (operands[1])))"
 
 (define_insn "x86_fnstcw_1"
   [(set (match_operand:HI 0 "memory_operand" "=m")
-       (unspec:HI [(reg:HI FPCR_REG)] UNSPEC_FSTCW))]
+       (unspec:HI [(const_int 0)] UNSPEC_FSTCW))]
   "TARGET_80387"
   "fnstcw\t%0"
   [(set (attr "length")
    (set_attr "mode" "HI")
    (set_attr "unit" "i387")
    (set_attr "bdver1_decode" "vector")])
-
-(define_insn "x86_fldcw_1"
-  [(set (reg:HI FPCR_REG)
-       (unspec:HI [(match_operand:HI 0 "memory_operand" "m")] UNSPEC_FLDCW))]
-  "TARGET_80387"
-  "fldcw\t%0"
-  [(set (attr "length")
-       (symbol_ref "ix86_attr_length_address_default (insn) + 2"))
-   (set_attr "mode" "HI")
-   (set_attr "unit" "i387")
-   (set_attr "athlon_decode" "vector")
-   (set_attr "amdfam10_decode" "vector")
-   (set_attr "bdver1_decode" "vector")])
 \f
 ;; Conversion between fixed point and floating point.
 
    %vcvtsi2<MODEF:ssemodesuffix><SWI48:rex64suffix>\t{%1, %d0|%d0, %1}
    %vcvtsi2<MODEF:ssemodesuffix><SWI48:rex64suffix>\t{%1, %d0|%d0, %1}"
   [(set_attr "type" "fmov,sseicvt,sseicvt")
+   (set_attr "avx_partial_xmm_update" "false,true,true")
    (set_attr "prefix" "orig,maybe_vex,maybe_vex")
    (set_attr "mode" "<MODEF:MODE>")
    (set (attr "prefix_rex")
 })
 
 (define_insn_and_split "floatdi<X87MODEF:mode>2_i387_with_xmm"
-  [(set (match_operand:X87MODEF 0 "register_operand" "=f")
+  [(set (match_operand:X87MODEF 0 "register_operand" "=f,f")
        (float:X87MODEF
-         (match_operand:DI 1 "register_operand" "r")))
-   (clobber (match_scratch:V4SI 3 "=x"))
-   (clobber (match_scratch:V4SI 4 "=x"))
-   (clobber (match_operand:DI 2 "memory_operand" "=m"))]
+         (match_operand:DI 1 "register_operand" "r,r")))
+   (clobber (match_operand:DI 2 "memory_operand" "=m,m"))
+   (clobber (match_scratch:V4SI 3 "=x,x"))
+   (clobber (match_scratch:V4SI 4 "=X,x"))]
   "!TARGET_64BIT && TARGET_INTER_UNIT_MOVES_TO_VEC
    && TARGET_80387 && X87_ENABLE_FLOAT (<X87MODEF:MODE>mode, DImode)
    && TARGET_SSE2 && optimize_function_for_speed_p (cfun)"
      Assemble the 64-bit DImode value in an xmm register.  */
   emit_insn (gen_sse2_loadld (operands[3], CONST0_RTX (V4SImode),
                              gen_lowpart (SImode, operands[1])));
-  emit_insn (gen_sse2_loadld (operands[4], CONST0_RTX (V4SImode),
-                             gen_highpart (SImode, operands[1])));
-  emit_insn (gen_vec_interleave_lowv4si (operands[3], operands[3],
-                                        operands[4]));
-
+  if (TARGET_SSE4_1)
+    emit_insn (gen_sse4_1_pinsrd (operands[3], operands[3],
+                                 gen_highpart (SImode, operands[1]),
+                                 GEN_INT (2)));
+  else
+    {
+      emit_insn (gen_sse2_loadld (operands[4], CONST0_RTX (V4SImode),
+                                 gen_highpart (SImode, operands[1])));
+      emit_insn (gen_vec_interleave_lowv4si (operands[3], operands[3],
+                                            operands[4]));
+    }
   operands[3] = gen_lowpart (DImode, operands[3]);
 }
-  [(set_attr "type" "multi")
+  [(set_attr "isa" "sse4,*")
+   (set_attr "type" "multi")
    (set_attr "mode" "<X87MODEF:MODE>")
    (set_attr "unit" "i387")
    (set_attr "fp_int_src" "true")])
 
-;; Avoid partial SSE register dependency stalls.  This splitter should split
+;; Break partial SSE register dependency stall.  This splitter should split
 ;; late in the pass sequence (after register rename pass), so allocated
 ;; registers won't change anymore
 
 (define_split
   [(set (match_operand:MODEF 0 "sse_reg_operand")
        (float:MODEF (match_operand:SWI48 1 "nonimmediate_operand")))]
-  "TARGET_SSE_PARTIAL_REG_DEPENDENCY && epilogue_completed
+  "!TARGET_AVX
+   && TARGET_SSE_PARTIAL_REG_DEPENDENCY && epilogue_completed
    && optimize_function_for_speed_p (cfun)
    && (!EXT_REX_SSE_REG_P (operands[0])
        || TARGET_AVX512VL)"
   [(set (match_operand:DF 0 "register_operand")
        (unsigned_float:DF
          (match_operand:DI 1 "nonimmediate_operand")))]
-  "(TARGET_KEEPS_VECTOR_ALIGNED_STACK || TARGET_AVX512F)
+  "((TARGET_64BIT && TARGET_AVX512F)
+    || TARGET_KEEPS_VECTOR_ALIGNED_STACK)
    && TARGET_SSE2 && TARGET_SSE_MATH"
 {
   if (!TARGET_64BIT)
           (plus:SWI48 (match_dup 1) (match_dup 2)))])]
   "ix86_binary_operator_ok (PLUS, <MODE>mode, operands)")
 
-(define_insn "sub<mode>3_carry"
+(define_insn "@sub<mode>3_carry"
   [(set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m,<r>")
        (minus:SWI
          (minus:SWI
 \f
 ;; Divmod instructions.
 
-(define_expand "divmod<mode>4"
+(define_code_iterator any_div [div udiv])
+(define_code_attr paired_mod [(div "mod") (udiv "umod")])
+
+(define_expand "<u>divmod<mode>4"
   [(parallel [(set (match_operand:SWIM248 0 "register_operand")
-                  (div:SWIM248
+                  (any_div:SWIM248
                     (match_operand:SWIM248 1 "register_operand")
                     (match_operand:SWIM248 2 "nonimmediate_operand")))
              (set (match_operand:SWIM248 3 "register_operand")
-                  (mod:SWIM248 (match_dup 1) (match_dup 2)))
+                  (<paired_mod>:SWIM248 (match_dup 1) (match_dup 2)))
              (clobber (reg:CC FLAGS_REG))])])
 
 ;; Split with 8bit unsigned divide:
 ;;        use original integer divide
 (define_split
   [(set (match_operand:SWI48 0 "register_operand")
-       (div:SWI48 (match_operand:SWI48 2 "register_operand")
-                   (match_operand:SWI48 3 "nonimmediate_operand")))
+       (any_div:SWI48 (match_operand:SWI48 2 "register_operand")
+                      (match_operand:SWI48 3 "nonimmediate_operand")))
    (set (match_operand:SWI48 1 "register_operand")
-       (mod:SWI48 (match_dup 2) (match_dup 3)))
+       (<paired_mod>:SWI48 (match_dup 2) (match_dup 3)))
    (clobber (reg:CC FLAGS_REG))]
   "TARGET_USE_8BIT_IDIV
    && TARGET_QIMODE_MATH
    && can_create_pseudo_p ()
    && !optimize_insn_for_size_p ()"
   [(const_int 0)]
-  "ix86_split_idivmod (<MODE>mode, operands, true); DONE;")
+  "ix86_split_idivmod (<MODE>mode, operands, <u_bool>); DONE;")
 
 (define_split
   [(set (match_operand:DI 0 "register_operand")
        (zero_extend:DI
-         (div:SI (match_operand:SI 2 "register_operand")
-                 (match_operand:SI 3 "nonimmediate_operand"))))
+         (any_div:SI (match_operand:SI 2 "register_operand")
+                     (match_operand:SI 3 "nonimmediate_operand"))))
    (set (match_operand:SI 1 "register_operand")
-       (mod:SI (match_dup 2) (match_dup 3)))
+       (<paired_mod>:SI (match_dup 2) (match_dup 3)))
    (clobber (reg:CC FLAGS_REG))]
-  "TARGET_USE_8BIT_IDIV
+  "TARGET_64BIT
+   && TARGET_USE_8BIT_IDIV
    && TARGET_QIMODE_MATH
    && can_create_pseudo_p ()
    && !optimize_insn_for_size_p ()"
   [(const_int 0)]
-  "ix86_split_idivmod (SImode, operands, true); DONE;")
+  "ix86_split_idivmod (SImode, operands, <u_bool>); DONE;")
 
 (define_split
   [(set (match_operand:DI 1 "register_operand")
        (zero_extend:DI
-         (mod:SI (match_operand:SI 2 "register_operand")
-                 (match_operand:SI 3 "nonimmediate_operand"))))
+         (<paired_mod>:SI (match_operand:SI 2 "register_operand")
+                          (match_operand:SI 3 "nonimmediate_operand"))))
    (set (match_operand:SI 0 "register_operand")
-       (div:SI  (match_dup 2) (match_dup 3)))
+       (any_div:SI  (match_dup 2) (match_dup 3)))
    (clobber (reg:CC FLAGS_REG))]
-  "TARGET_USE_8BIT_IDIV
+  "TARGET_64BIT
+   && TARGET_USE_8BIT_IDIV
    && TARGET_QIMODE_MATH
    && can_create_pseudo_p ()
    && !optimize_insn_for_size_p ()"
   [(const_int 0)]
-  "ix86_split_idivmod (SImode, operands, true); DONE;")
+  "ix86_split_idivmod (SImode, operands, <u_bool>); DONE;")
 
 (define_insn_and_split "divmod<mode>4_1"
   [(set (match_operand:SWI48 0 "register_operand" "=a")
   [(set_attr "type" "multi")
    (set_attr "mode" "<MODE>")])
 
+(define_insn_and_split "udivmod<mode>4_1"
+  [(set (match_operand:SWI48 0 "register_operand" "=a")
+       (udiv:SWI48 (match_operand:SWI48 2 "register_operand" "0")
+                   (match_operand:SWI48 3 "nonimmediate_operand" "rm")))
+   (set (match_operand:SWI48 1 "register_operand" "=&d")
+       (umod:SWI48 (match_dup 2) (match_dup 3)))
+   (unspec [(const_int 0)] UNSPEC_DIV_ALREADY_SPLIT)
+   (clobber (reg:CC FLAGS_REG))]
+  ""
+  "#"
+  "reload_completed"
+  [(set (match_dup 1) (const_int 0))
+   (parallel [(set (match_dup 0)
+                  (udiv:SWI48 (match_dup 2) (match_dup 3)))
+             (set (match_dup 1)
+                  (umod:SWI48 (match_dup 2) (match_dup 3)))
+             (use (match_dup 1))
+             (clobber (reg:CC FLAGS_REG))])]
+  ""
+  [(set_attr "type" "multi")
+   (set_attr "mode" "<MODE>")])
+
 (define_insn_and_split "divmodsi4_zext_1"
   [(set (match_operand:DI 0 "register_operand" "=a")
        (zero_extend:DI
    (clobber (reg:CC FLAGS_REG))]
   "TARGET_64BIT"
   "#"
-  "reload_completed"
+  "&& reload_completed"
   [(parallel [(set (match_dup 1)
                   (ashiftrt:SI (match_dup 4) (match_dup 5)))
              (clobber (reg:CC FLAGS_REG))])
   [(set_attr "type" "multi")
    (set_attr "mode" "SI")])
 
+(define_insn_and_split "udivmodsi4_zext_1"
+  [(set (match_operand:DI 0 "register_operand" "=a")
+       (zero_extend:DI
+         (udiv:SI (match_operand:SI 2 "register_operand" "0")
+                  (match_operand:SI 3 "nonimmediate_operand" "rm"))))
+   (set (match_operand:SI 1 "register_operand" "=&d")
+       (umod:SI (match_dup 2) (match_dup 3)))
+   (unspec [(const_int 0)] UNSPEC_DIV_ALREADY_SPLIT)
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_64BIT"
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 1) (const_int 0))
+   (parallel [(set (match_dup 0)
+                  (zero_extend:DI (udiv:SI (match_dup 2) (match_dup 3))))
+             (set (match_dup 1)
+                  (umod:SI (match_dup 2) (match_dup 3)))
+             (use (match_dup 1))
+             (clobber (reg:CC FLAGS_REG))])]
+  ""
+  [(set_attr "type" "multi")
+   (set_attr "mode" "SI")])
+
 (define_insn_and_split "divmodsi4_zext_2"
   [(set (match_operand:DI 1 "register_operand" "=&d")
        (zero_extend:DI
    (clobber (reg:CC FLAGS_REG))]
   "TARGET_64BIT"
   "#"
-  "reload_completed"
+  "&& reload_completed"
   [(parallel [(set (match_dup 6)
                   (ashiftrt:SI (match_dup 4) (match_dup 5)))
              (clobber (reg:CC FLAGS_REG))])
   [(set_attr "type" "multi")
    (set_attr "mode" "SI")])
 
-(define_insn_and_split "*divmod<mode>4"
-  [(set (match_operand:SWIM248 0 "register_operand" "=a")
+(define_insn_and_split "udivmodsi4_zext_2"
+  [(set (match_operand:DI 1 "register_operand" "=&d")
+       (zero_extend:DI
+         (umod:SI (match_operand:SI 2 "register_operand" "0")
+                (match_operand:SI 3 "nonimmediate_operand" "rm"))))
+   (set (match_operand:SI 0 "register_operand" "=a")
+       (udiv:SI (match_dup 2) (match_dup 3)))
+   (unspec [(const_int 0)] UNSPEC_DIV_ALREADY_SPLIT)
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_64BIT"
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 4) (const_int 0))
+   (parallel [(set (match_dup 1)
+                  (zero_extend:DI (umod:SI (match_dup 2) (match_dup 3))))
+             (set (match_dup 0)
+                  (udiv:SI (match_dup 2) (match_dup 3)))
+             (use (match_dup 4))
+             (clobber (reg:CC FLAGS_REG))])]
+  "operands[4] = gen_lowpart (SImode, operands[1]);"
+  [(set_attr "type" "multi")
+   (set_attr "mode" "SI")])
+
+(define_insn_and_split "*divmod<mode>4"
+  [(set (match_operand:SWIM248 0 "register_operand" "=a")
        (div:SWIM248 (match_operand:SWIM248 2 "register_operand" "0")
                    (match_operand:SWIM248 3 "nonimmediate_operand" "rm")))
    (set (match_operand:SWIM248 1 "register_operand" "=&d")
   [(set_attr "type" "multi")
    (set_attr "mode" "<MODE>")])
 
-(define_insn_and_split "*divmodsi4_zext_1"
-  [(set (match_operand:DI 0 "register_operand" "=a")
-       (zero_extend:DI
-         (div:SI (match_operand:SI 2 "register_operand" "0")
-                 (match_operand:SI 3 "nonimmediate_operand" "rm"))))
-   (set (match_operand:SI 1 "register_operand" "=&d")
-       (mod:SI (match_dup 2) (match_dup 3)))
+(define_insn_and_split "*udivmod<mode>4"
+  [(set (match_operand:SWIM248 0 "register_operand" "=a")
+       (udiv:SWIM248 (match_operand:SWIM248 2 "register_operand" "0")
+                     (match_operand:SWIM248 3 "nonimmediate_operand" "rm")))
+   (set (match_operand:SWIM248 1 "register_operand" "=&d")
+       (umod:SWIM248 (match_dup 2) (match_dup 3)))
    (clobber (reg:CC FLAGS_REG))]
-  "TARGET_64BIT"
+  ""
   "#"
   "reload_completed"
-  [(parallel [(set (match_dup 1)
-                  (ashiftrt:SI (match_dup 4) (match_dup 5)))
-             (clobber (reg:CC FLAGS_REG))])
+  [(set (match_dup 1) (const_int 0))
    (parallel [(set (match_dup 0)
-                  (zero_extend:DI (div:SI (match_dup 2) (match_dup 3))))
+                  (udiv:SWIM248 (match_dup 2) (match_dup 3)))
              (set (match_dup 1)
-                  (mod:SI (match_dup 2) (match_dup 3)))
+                  (umod:SWIM248 (match_dup 2) (match_dup 3)))
              (use (match_dup 1))
              (clobber (reg:CC FLAGS_REG))])]
-{
-  operands[5] = GEN_INT (GET_MODE_BITSIZE (SImode)-1);
-
-  if (optimize_function_for_size_p (cfun) || TARGET_USE_CLTD)
-    operands[4] = operands[2];
-  else
-    {
-      /* Avoid use of cltd in favor of a mov+shift.  */
-      emit_move_insn (operands[1], operands[2]);
-      operands[4] = operands[1];
-    }
-}
+  ""
   [(set_attr "type" "multi")
-   (set_attr "mode" "SI")])
+   (set_attr "mode" "<MODE>")])
 
-(define_insn_and_split "*divmodsi4_zext_2"
-  [(set (match_operand:DI 1 "register_operand" "=&d")
-       (zero_extend:DI
-         (mod:SI (match_operand:SI 2 "register_operand" "0")
-                 (match_operand:SI 3 "nonimmediate_operand" "rm"))))
-   (set (match_operand:SI 0 "register_operand" "=a")
-       (div:SI (match_dup 2) (match_dup 3)))
+;; Optimize division or modulo by constant power of 2, if the constant
+;; materializes only after expansion.
+(define_insn_and_split "*udivmod<mode>4_pow2"
+  [(set (match_operand:SWI48 0 "register_operand" "=r")
+       (udiv:SWI48 (match_operand:SWI48 2 "register_operand" "0")
+                   (match_operand:SWI48 3 "const_int_operand" "n")))
+   (set (match_operand:SWI48 1 "register_operand" "=r")
+       (umod:SWI48 (match_dup 2) (match_dup 3)))
    (clobber (reg:CC FLAGS_REG))]
-  "TARGET_64BIT"
+  "IN_RANGE (exact_log2 (UINTVAL (operands[3])), 1, 31)"
   "#"
-  "reload_completed"
-  [(parallel [(set (match_dup 6)
-                  (ashiftrt:SI (match_dup 4) (match_dup 5)))
+  "&& reload_completed"
+  [(set (match_dup 1) (match_dup 2))
+   (parallel [(set (match_dup 0) (lshiftrt:<MODE> (match_dup 2) (match_dup 4)))
              (clobber (reg:CC FLAGS_REG))])
-   (parallel [(set (match_dup 1)
-                  (zero_extend:DI (mod:SI (match_dup 2) (match_dup 3))))
-             (set (match_dup 0)
-                  (div:SI (match_dup 2) (match_dup 3)))
-             (use (match_dup 6))
+   (parallel [(set (match_dup 1) (and:<MODE> (match_dup 1) (match_dup 5)))
              (clobber (reg:CC FLAGS_REG))])]
 {
-  operands[5] = GEN_INT (GET_MODE_BITSIZE (SImode)-1);
-  operands[6] = gen_lowpart (SImode, operands[1]);
-
-  if (optimize_function_for_size_p (cfun) || TARGET_USE_CLTD)
-    operands[4] = operands[2];
-  else
-    {
-      /* Avoid use of cltd in favor of a mov+shift.  */
-      emit_move_insn (operands[6], operands[2]);
-      operands[4] = operands[6];
-    }
+  int v = exact_log2 (UINTVAL (operands[3]));
+  operands[4] = GEN_INT (v);
+  operands[5] = GEN_INT ((HOST_WIDE_INT_1U << v) - 1);
 }
   [(set_attr "type" "multi")
-   (set_attr "mode" "SI")])
-
-(define_insn "*divmod<mode>4_noext"
-  [(set (match_operand:SWIM248 0 "register_operand" "=a")
-       (div:SWIM248 (match_operand:SWIM248 2 "register_operand" "0")
-                   (match_operand:SWIM248 3 "nonimmediate_operand" "rm")))
-   (set (match_operand:SWIM248 1 "register_operand" "=d")
-       (mod:SWIM248 (match_dup 2) (match_dup 3)))
-   (use (match_operand:SWIM248 4 "register_operand" "1"))
-   (clobber (reg:CC FLAGS_REG))]
-  ""
-  "idiv{<imodesuffix>}\t%3"
-  [(set_attr "type" "idiv")
    (set_attr "mode" "<MODE>")])
 
-(define_insn "*divmodsi4_noext_zext_1"
+(define_insn_and_split "*divmodsi4_zext_1"
   [(set (match_operand:DI 0 "register_operand" "=a")
        (zero_extend:DI
          (div:SI (match_operand:SI 2 "register_operand" "0")
                  (match_operand:SI 3 "nonimmediate_operand" "rm"))))
-   (set (match_operand:SI 1 "register_operand" "=d")
-       (mod:SI (match_dup 2) (match_dup 3)))
-   (use (match_operand:SI 4 "register_operand" "1"))
-   (clobber (reg:CC FLAGS_REG))]
-  "TARGET_64BIT"
-  "idiv{l}\t%3"
-  [(set_attr "type" "idiv")
-   (set_attr "mode" "SI")])
-
-(define_insn "*divmodsi4_noext_zext_2"
-  [(set (match_operand:DI 1 "register_operand" "=d")
-       (zero_extend:DI
-         (mod:SI (match_operand:SI 2 "register_operand" "0")
-                 (match_operand:SI 3 "nonimmediate_operand" "rm"))))
-   (set (match_operand:SI 0 "register_operand" "=a")
-       (div:SI (match_dup 2) (match_dup 3)))
-   (use (match_operand:SI 4 "register_operand" "1"))
-   (clobber (reg:CC FLAGS_REG))]
-  "TARGET_64BIT"
-  "idiv{l}\t%3"
-  [(set_attr "type" "idiv")
-   (set_attr "mode" "SI")])
-
-(define_expand "divmodqi4"
-  [(parallel [(set (match_operand:QI 0 "register_operand")
-                  (div:QI
-                    (match_operand:QI 1 "register_operand")
-                    (match_operand:QI 2 "nonimmediate_operand")))
-             (set (match_operand:QI 3 "register_operand")
-                  (mod:QI (match_dup 1) (match_dup 2)))
-             (clobber (reg:CC FLAGS_REG))])]
-  "TARGET_QIMODE_MATH"
-{
-  rtx div, mod;
-  rtx tmp0, tmp1;
-  
-  tmp0 = gen_reg_rtx (HImode);
-  tmp1 = gen_reg_rtx (HImode);
-
-  /* Extend operands[1] to HImode.  Generate 8bit divide.  Result is in AX.  */
-  emit_insn (gen_extendqihi2 (tmp1, operands[1]));
-  emit_insn (gen_divmodhiqi3 (tmp0, tmp1, operands[2]));
-
-  /* Extract remainder from AH.  */
-  tmp1 = gen_rtx_ZERO_EXTRACT (SImode, tmp0, GEN_INT (8), GEN_INT (8));
-  tmp1 = lowpart_subreg (QImode, tmp1, SImode);
-  rtx_insn *insn = emit_move_insn (operands[3], tmp1);
-
-  mod = gen_rtx_MOD (QImode, operands[1], operands[2]);
-  set_unique_reg_note (insn, REG_EQUAL, mod);
-
-  /* Extract quotient from AL.  */
-  insn = emit_move_insn (operands[0], gen_lowpart (QImode, tmp0));
-
-  div = gen_rtx_DIV (QImode, operands[1], operands[2]);
-  set_unique_reg_note (insn, REG_EQUAL, div);
-
-  DONE;
-})
-
-;; Divide AX by r/m8, with result stored in
-;; AL <- Quotient
-;; AH <- Remainder
-;; Change div/mod to HImode and extend the second argument to HImode
-;; so that mode of div/mod matches with mode of arguments.  Otherwise
-;; combine may fail.
-(define_insn "divmodhiqi3"
-  [(set (match_operand:HI 0 "register_operand" "=a")
-       (ior:HI
-         (ashift:HI
-           (zero_extend:HI
-             (truncate:QI
-               (mod:HI (match_operand:HI 1 "register_operand" "0")
-                       (sign_extend:HI
-                         (match_operand:QI 2 "nonimmediate_operand" "qm")))))
-           (const_int 8))
-         (zero_extend:HI
-           (truncate:QI
-             (div:HI (match_dup 1) (sign_extend:HI (match_dup 2)))))))
-   (clobber (reg:CC FLAGS_REG))]
-  "TARGET_QIMODE_MATH"
-  "idiv{b}\t%2"
-  [(set_attr "type" "idiv")
-   (set_attr "mode" "QI")])
-
-(define_expand "udivmod<mode>4"
-  [(parallel [(set (match_operand:SWIM248 0 "register_operand")
-                  (udiv:SWIM248
-                    (match_operand:SWIM248 1 "register_operand")
-                    (match_operand:SWIM248 2 "nonimmediate_operand")))
-             (set (match_operand:SWIM248 3 "register_operand")
-                  (umod:SWIM248 (match_dup 1) (match_dup 2)))
-             (clobber (reg:CC FLAGS_REG))])])
-
-;; Split with 8bit unsigned divide:
-;;     if (dividend an divisor are in [0-255])
-;;        use 8bit unsigned integer divide
-;;      else
-;;        use original integer divide
-(define_split
-  [(set (match_operand:SWI48 0 "register_operand")
-       (udiv:SWI48 (match_operand:SWI48 2 "register_operand")
-                   (match_operand:SWI48 3 "nonimmediate_operand")))
-   (set (match_operand:SWI48 1 "register_operand")
-       (umod:SWI48 (match_dup 2) (match_dup 3)))
-   (clobber (reg:CC FLAGS_REG))]
-  "TARGET_USE_8BIT_IDIV
-   && TARGET_QIMODE_MATH
-   && can_create_pseudo_p ()
-   && !optimize_insn_for_size_p ()"
-  [(const_int 0)]
-  "ix86_split_idivmod (<MODE>mode, operands, false); DONE;")
-
-(define_split
-  [(set (match_operand:DI 0 "register_operand")
-       (zero_extend:DI
-         (udiv:SI (match_operand:SI 2 "register_operand")
-                  (match_operand:SI 3 "nonimmediate_operand"))))
-   (set (match_operand:SI 1 "register_operand")
-       (umod:SI (match_dup 2) (match_dup 3)))
-   (clobber (reg:CC FLAGS_REG))]
-  "TARGET_64BIT
-   && TARGET_USE_8BIT_IDIV
-   && TARGET_QIMODE_MATH
-   && can_create_pseudo_p ()
-   && !optimize_insn_for_size_p ()"
-  [(const_int 0)]
-  "ix86_split_idivmod (SImode, operands, false); DONE;")
-
-(define_split
-  [(set (match_operand:DI 1 "register_operand")
-       (zero_extend:DI
-         (umod:SI (match_operand:SI 2 "register_operand")
-                  (match_operand:SI 3 "nonimmediate_operand"))))
-   (set (match_operand:SI 0 "register_operand")
-       (udiv:SI (match_dup 2) (match_dup 3)))
-   (clobber (reg:CC FLAGS_REG))]
-  "TARGET_64BIT
-   && TARGET_USE_8BIT_IDIV
-   && TARGET_QIMODE_MATH
-   && can_create_pseudo_p ()
-   && !optimize_insn_for_size_p ()"
-  [(const_int 0)]
-  "ix86_split_idivmod (SImode, operands, false); DONE;")
-
-(define_insn_and_split "udivmod<mode>4_1"
-  [(set (match_operand:SWI48 0 "register_operand" "=a")
-       (udiv:SWI48 (match_operand:SWI48 2 "register_operand" "0")
-                   (match_operand:SWI48 3 "nonimmediate_operand" "rm")))
-   (set (match_operand:SWI48 1 "register_operand" "=&d")
-       (umod:SWI48 (match_dup 2) (match_dup 3)))
-   (unspec [(const_int 0)] UNSPEC_DIV_ALREADY_SPLIT)
-   (clobber (reg:CC FLAGS_REG))]
-  ""
-  "#"
-  "reload_completed"
-  [(set (match_dup 1) (const_int 0))
-   (parallel [(set (match_dup 0)
-                  (udiv:SWI48 (match_dup 2) (match_dup 3)))
-             (set (match_dup 1)
-                  (umod:SWI48 (match_dup 2) (match_dup 3)))
-             (use (match_dup 1))
-             (clobber (reg:CC FLAGS_REG))])]
-  ""
-  [(set_attr "type" "multi")
-   (set_attr "mode" "<MODE>")])
-
-(define_insn_and_split "udivmodsi4_zext_1"
-  [(set (match_operand:DI 0 "register_operand" "=a")
-       (zero_extend:DI
-         (udiv:SI (match_operand:SI 2 "register_operand" "0")
-                  (match_operand:SI 3 "nonimmediate_operand" "rm"))))
    (set (match_operand:SI 1 "register_operand" "=&d")
-       (umod:SI (match_dup 2) (match_dup 3)))
-   (unspec [(const_int 0)] UNSPEC_DIV_ALREADY_SPLIT)
+       (mod:SI (match_dup 2) (match_dup 3)))
    (clobber (reg:CC FLAGS_REG))]
   "TARGET_64BIT"
   "#"
-  "reload_completed"
-  [(set (match_dup 1) (const_int 0))
+  "&& reload_completed"
+  [(parallel [(set (match_dup 1)
+                  (ashiftrt:SI (match_dup 4) (match_dup 5)))
+             (clobber (reg:CC FLAGS_REG))])
    (parallel [(set (match_dup 0)
-                  (zero_extend:DI (udiv:SI (match_dup 2) (match_dup 3))))
+                  (zero_extend:DI (div:SI (match_dup 2) (match_dup 3))))
              (set (match_dup 1)
-                  (umod:SI (match_dup 2) (match_dup 3)))
+                  (mod:SI (match_dup 2) (match_dup 3)))
              (use (match_dup 1))
              (clobber (reg:CC FLAGS_REG))])]
-  ""
-  [(set_attr "type" "multi")
-   (set_attr "mode" "SI")])
+{
+  operands[5] = GEN_INT (GET_MODE_BITSIZE (SImode)-1);
 
-(define_insn_and_split "udivmodsi4_zext_2"
-  [(set (match_operand:DI 1 "register_operand" "=&d")
-       (zero_extend:DI
-         (umod:SI (match_operand:SI 2 "register_operand" "0")
-                (match_operand:SI 3 "nonimmediate_operand" "rm"))))
-   (set (match_operand:SI 0 "register_operand" "=a")
-       (udiv:SI (match_dup 2) (match_dup 3)))
-   (unspec [(const_int 0)] UNSPEC_DIV_ALREADY_SPLIT)
-   (clobber (reg:CC FLAGS_REG))]
-  "TARGET_64BIT"
-  "#"
-  "reload_completed"
-  [(set (match_dup 4) (const_int 0))
-   (parallel [(set (match_dup 1)
-                  (zero_extend:DI (umod:SI (match_dup 2) (match_dup 3))))
-             (set (match_dup 0)
-                  (udiv:SI (match_dup 2) (match_dup 3)))
-             (use (match_dup 4))
-             (clobber (reg:CC FLAGS_REG))])]
-  "operands[4] = gen_lowpart (SImode, operands[1]);"
+  if (optimize_function_for_size_p (cfun) || TARGET_USE_CLTD)
+    operands[4] = operands[2];
+  else
+    {
+      /* Avoid use of cltd in favor of a mov+shift.  */
+      emit_move_insn (operands[1], operands[2]);
+      operands[4] = operands[1];
+    }
+}
   [(set_attr "type" "multi")
    (set_attr "mode" "SI")])
 
-(define_insn_and_split "*udivmod<mode>4"
-  [(set (match_operand:SWIM248 0 "register_operand" "=a")
-       (udiv:SWIM248 (match_operand:SWIM248 2 "register_operand" "0")
-                     (match_operand:SWIM248 3 "nonimmediate_operand" "rm")))
-   (set (match_operand:SWIM248 1 "register_operand" "=&d")
-       (umod:SWIM248 (match_dup 2) (match_dup 3)))
-   (clobber (reg:CC FLAGS_REG))]
-  ""
-  "#"
-  "reload_completed"
-  [(set (match_dup 1) (const_int 0))
-   (parallel [(set (match_dup 0)
-                  (udiv:SWIM248 (match_dup 2) (match_dup 3)))
-             (set (match_dup 1)
-                  (umod:SWIM248 (match_dup 2) (match_dup 3)))
-             (use (match_dup 1))
-             (clobber (reg:CC FLAGS_REG))])]
-  ""
-  [(set_attr "type" "multi")
-   (set_attr "mode" "<MODE>")])
-
 (define_insn_and_split "*udivmodsi4_zext_1"
   [(set (match_operand:DI 0 "register_operand" "=a")
        (zero_extend:DI
    (clobber (reg:CC FLAGS_REG))]
   "TARGET_64BIT"
   "#"
-  "reload_completed"
+  "&& reload_completed"
   [(set (match_dup 1) (const_int 0))
    (parallel [(set (match_dup 0)
                   (zero_extend:DI (udiv:SI (match_dup 2) (match_dup 3))))
-             (set (match_dup 1)
-                  (umod:SI (match_dup 2) (match_dup 3)))
-             (use (match_dup 1))
-             (clobber (reg:CC FLAGS_REG))])]
-  ""
-  [(set_attr "type" "multi")
-   (set_attr "mode" "SI")])
-
-(define_insn_and_split "*udivmodsi4_zext_2"
-  [(set (match_operand:DI 1 "register_operand" "=&d")
-       (zero_extend:DI
-         (umod:SI (match_operand:SI 2 "register_operand" "0")
-                  (match_operand:SI 3 "nonimmediate_operand" "rm"))))
-   (set (match_operand:SI 0 "register_operand" "=a")
-       (udiv:SI (match_dup 2) (match_dup 3)))
-   (clobber (reg:CC FLAGS_REG))]
-  "TARGET_64BIT"
-  "#"
-  "reload_completed"
-  [(set (match_dup 4) (const_int 0))
-   (parallel [(set (match_dup 1)
-                  (zero_extend:DI (umod:SI (match_dup 2) (match_dup 3))))
-             (set (match_dup 0)
-                  (udiv:SI (match_dup 2) (match_dup 3)))
-             (use (match_dup 4))
-             (clobber (reg:CC FLAGS_REG))])]
-  "operands[4] = gen_lowpart (SImode, operands[1]);"
-  [(set_attr "type" "multi")
-   (set_attr "mode" "SI")])
-
-;; Optimize division or modulo by constant power of 2, if the constant
-;; materializes only after expansion.
-(define_insn_and_split "*udivmod<mode>4_pow2"
-  [(set (match_operand:SWI48 0 "register_operand" "=r")
-       (udiv:SWI48 (match_operand:SWI48 2 "register_operand" "0")
-                   (match_operand:SWI48 3 "const_int_operand" "n")))
-   (set (match_operand:SWI48 1 "register_operand" "=r")
-       (umod:SWI48 (match_dup 2) (match_dup 3)))
-   (clobber (reg:CC FLAGS_REG))]
-  "IN_RANGE (INTVAL (operands[3]), 2, HOST_WIDE_INT_UC (0x80000000))
-   && (UINTVAL (operands[3]) & (UINTVAL (operands[3]) - 1)) == 0"
-  "#"
-  "&& 1"
-  [(set (match_dup 1) (match_dup 2))
-   (parallel [(set (match_dup 0) (lshiftrt:<MODE> (match_dup 2) (match_dup 4)))
-             (clobber (reg:CC FLAGS_REG))])
-   (parallel [(set (match_dup 1) (and:<MODE> (match_dup 1) (match_dup 5)))
+             (set (match_dup 1)
+                  (umod:SI (match_dup 2) (match_dup 3)))
+             (use (match_dup 1))
              (clobber (reg:CC FLAGS_REG))])]
-{
-  int v = exact_log2 (UINTVAL (operands[3]));
-  operands[4] = GEN_INT (v);
-  operands[5] = GEN_INT ((HOST_WIDE_INT_1U << v) - 1);
-}
+  ""
   [(set_attr "type" "multi")
-   (set_attr "mode" "<MODE>")])
+   (set_attr "mode" "SI")])
 
 (define_insn_and_split "*udivmodsi4_pow2_zext_1"
   [(set (match_operand:DI 0 "register_operand" "=r")
        (umod:SI (match_dup 2) (match_dup 3)))
    (clobber (reg:CC FLAGS_REG))]
   "TARGET_64BIT
-   && IN_RANGE (INTVAL (operands[3]), 2, HOST_WIDE_INT_UC (0x80000000))
-   && (UINTVAL (operands[3]) & (UINTVAL (operands[3]) - 1)) == 0"
+   && exact_log2 (UINTVAL (operands[3])) > 0"
   "#"
-  "&& 1"
+  "&& reload_completed"
   [(set (match_dup 1) (match_dup 2))
    (parallel [(set (match_dup 0)
                   (zero_extend:DI (lshiftrt:SI (match_dup 2) (match_dup 4))))
   [(set_attr "type" "multi")
    (set_attr "mode" "SI")])
 
+(define_insn_and_split "*divmodsi4_zext_2"
+  [(set (match_operand:DI 1 "register_operand" "=&d")
+       (zero_extend:DI
+         (mod:SI (match_operand:SI 2 "register_operand" "0")
+                 (match_operand:SI 3 "nonimmediate_operand" "rm"))))
+   (set (match_operand:SI 0 "register_operand" "=a")
+       (div:SI (match_dup 2) (match_dup 3)))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_64BIT"
+  "#"
+  "&& reload_completed"
+  [(parallel [(set (match_dup 6)
+                  (ashiftrt:SI (match_dup 4) (match_dup 5)))
+             (clobber (reg:CC FLAGS_REG))])
+   (parallel [(set (match_dup 1)
+                  (zero_extend:DI (mod:SI (match_dup 2) (match_dup 3))))
+             (set (match_dup 0)
+                  (div:SI (match_dup 2) (match_dup 3)))
+             (use (match_dup 6))
+             (clobber (reg:CC FLAGS_REG))])]
+{
+  operands[5] = GEN_INT (GET_MODE_BITSIZE (SImode)-1);
+  operands[6] = gen_lowpart (SImode, operands[1]);
+
+  if (optimize_function_for_size_p (cfun) || TARGET_USE_CLTD)
+    operands[4] = operands[2];
+  else
+    {
+      /* Avoid use of cltd in favor of a mov+shift.  */
+      emit_move_insn (operands[6], operands[2]);
+      operands[4] = operands[6];
+    }
+}
+  [(set_attr "type" "multi")
+   (set_attr "mode" "SI")])
+
+(define_insn_and_split "*udivmodsi4_zext_2"
+  [(set (match_operand:DI 1 "register_operand" "=&d")
+       (zero_extend:DI
+         (umod:SI (match_operand:SI 2 "register_operand" "0")
+                  (match_operand:SI 3 "nonimmediate_operand" "rm"))))
+   (set (match_operand:SI 0 "register_operand" "=a")
+       (udiv:SI (match_dup 2) (match_dup 3)))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_64BIT"
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 4) (const_int 0))
+   (parallel [(set (match_dup 1)
+                  (zero_extend:DI (umod:SI (match_dup 2) (match_dup 3))))
+             (set (match_dup 0)
+                  (udiv:SI (match_dup 2) (match_dup 3)))
+             (use (match_dup 4))
+             (clobber (reg:CC FLAGS_REG))])]
+  "operands[4] = gen_lowpart (SImode, operands[1]);"
+  [(set_attr "type" "multi")
+   (set_attr "mode" "SI")])
+
 (define_insn_and_split "*udivmodsi4_pow2_zext_2"
   [(set (match_operand:DI 1 "register_operand" "=r")
        (zero_extend:DI
        (umod:SI (match_dup 2) (match_dup 3)))
    (clobber (reg:CC FLAGS_REG))]
   "TARGET_64BIT
-   && IN_RANGE (INTVAL (operands[3]), 2, HOST_WIDE_INT_UC (0x80000000))
-   && (UINTVAL (operands[3]) & (UINTVAL (operands[3]) - 1)) == 0"
+   && exact_log2 (UINTVAL (operands[3])) > 0"
   "#"
-  "&& 1"
+  "&& reload_completed"
   [(set (match_dup 1) (match_dup 2))
    (parallel [(set (match_dup 0) (lshiftrt:SI (match_dup 2) (match_dup 4)))
              (clobber (reg:CC FLAGS_REG))])
   [(set_attr "type" "multi")
    (set_attr "mode" "SI")])
 
-(define_insn "*udivmod<mode>4_noext"
+(define_insn "*<u>divmod<mode>4_noext"
   [(set (match_operand:SWIM248 0 "register_operand" "=a")
-       (udiv:SWIM248 (match_operand:SWIM248 2 "register_operand" "0")
-                     (match_operand:SWIM248 3 "nonimmediate_operand" "rm")))
+       (any_div:SWIM248
+         (match_operand:SWIM248 2 "register_operand" "0")
+         (match_operand:SWIM248 3 "nonimmediate_operand" "rm")))
    (set (match_operand:SWIM248 1 "register_operand" "=d")
-       (umod:SWIM248 (match_dup 2) (match_dup 3)))
+       (<paired_mod>:SWIM248 (match_dup 2) (match_dup 3)))
    (use (match_operand:SWIM248 4 "register_operand" "1"))
    (clobber (reg:CC FLAGS_REG))]
   ""
-  "div{<imodesuffix>}\t%3"
+  "<sgnprefix>div{<imodesuffix>}\t%3"
   [(set_attr "type" "idiv")
    (set_attr "mode" "<MODE>")])
 
-(define_insn "*udivmodsi4_noext_zext_1"
+(define_insn "*<u>divmodsi4_noext_zext_1"
   [(set (match_operand:DI 0 "register_operand" "=a")
        (zero_extend:DI
-         (udiv:SI (match_operand:SI 2 "register_operand" "0")
-                  (match_operand:SI 3 "nonimmediate_operand" "rm"))))
+         (any_div:SI (match_operand:SI 2 "register_operand" "0")
+                     (match_operand:SI 3 "nonimmediate_operand" "rm"))))
    (set (match_operand:SI 1 "register_operand" "=d")
-       (umod:SI (match_dup 2) (match_dup 3)))
+       (<paired_mod>:SI (match_dup 2) (match_dup 3)))
    (use (match_operand:SI 4 "register_operand" "1"))
    (clobber (reg:CC FLAGS_REG))]
   "TARGET_64BIT"
-  "div{l}\t%3"
+  "<sgnprefix>div{l}\t%3"
   [(set_attr "type" "idiv")
    (set_attr "mode" "SI")])
 
-(define_insn "*udivmodsi4_noext_zext_2"
+(define_insn "*<u>divmodsi4_noext_zext_2"
   [(set (match_operand:DI 1 "register_operand" "=d")
        (zero_extend:DI
-         (umod:SI (match_operand:SI 2 "register_operand" "0")
-                  (match_operand:SI 3 "nonimmediate_operand" "rm"))))
+         (<paired_mod>:SI (match_operand:SI 2 "register_operand" "0")
+                          (match_operand:SI 3 "nonimmediate_operand" "rm"))))
    (set (match_operand:SI 0 "register_operand" "=a")
-       (udiv:SI (match_dup 2) (match_dup 3)))
+       (any_div:SI (match_dup 2) (match_dup 3)))
    (use (match_operand:SI 4 "register_operand" "1"))
    (clobber (reg:CC FLAGS_REG))]
   "TARGET_64BIT"
-  "div{l}\t%3"
+  "<sgnprefix>div{l}\t%3"
   [(set_attr "type" "idiv")
    (set_attr "mode" "SI")])
 
+(define_expand "divmodqi4"
+  [(parallel [(set (match_operand:QI 0 "register_operand")
+                  (div:QI
+                    (match_operand:QI 1 "register_operand")
+                    (match_operand:QI 2 "nonimmediate_operand")))
+             (set (match_operand:QI 3 "register_operand")
+                  (mod:QI (match_dup 1) (match_dup 2)))
+             (clobber (reg:CC FLAGS_REG))])]
+  "TARGET_QIMODE_MATH"
+{
+  rtx div, mod;
+  rtx tmp0, tmp1;
+
+  tmp0 = gen_reg_rtx (HImode);
+  tmp1 = gen_reg_rtx (HImode);
+
+  /* Extend operands[1] to HImode.  Generate 8bit divide.  Result is in AX.  */
+  emit_insn (gen_extendqihi2 (tmp1, operands[1]));
+  emit_insn (gen_divmodhiqi3 (tmp0, tmp1, operands[2]));
+
+  /* Extract remainder from AH.  */
+  tmp1 = gen_rtx_ZERO_EXTRACT (SImode, tmp0, GEN_INT (8), GEN_INT (8));
+  tmp1 = lowpart_subreg (QImode, tmp1, SImode);
+  rtx_insn *insn = emit_move_insn (operands[3], tmp1);
+
+  mod = gen_rtx_MOD (QImode, operands[1], operands[2]);
+  set_unique_reg_note (insn, REG_EQUAL, mod);
+
+  /* Extract quotient from AL.  */
+  insn = emit_move_insn (operands[0], gen_lowpart (QImode, tmp0));
+
+  div = gen_rtx_DIV (QImode, operands[1], operands[2]);
+  set_unique_reg_note (insn, REG_EQUAL, div);
+
+  DONE;
+})
+
 (define_expand "udivmodqi4"
   [(parallel [(set (match_operand:QI 0 "register_operand")
                   (udiv:QI
 {
   rtx div, mod;
   rtx tmp0, tmp1;
-  
+
   tmp0 = gen_reg_rtx (HImode);
   tmp1 = gen_reg_rtx (HImode);
 
   DONE;
 })
 
-(define_insn "udivmodhiqi3"
+;; Divide AX by r/m8, with result stored in
+;; AL <- Quotient
+;; AH <- Remainder
+;; Change div/mod to HImode and extend the second argument to HImode
+;; so that mode of div/mod matches with mode of arguments.  Otherwise
+;; combine may fail.
+(define_insn "<u>divmodhiqi3"
   [(set (match_operand:HI 0 "register_operand" "=a")
        (ior:HI
          (ashift:HI
            (zero_extend:HI
              (truncate:QI
                (mod:HI (match_operand:HI 1 "register_operand" "0")
-                       (zero_extend:HI
+                       (any_extend:HI
                          (match_operand:QI 2 "nonimmediate_operand" "qm")))))
            (const_int 8))
          (zero_extend:HI
            (truncate:QI
-             (div:HI (match_dup 1) (zero_extend:HI (match_dup 2)))))))
+             (div:HI (match_dup 1) (any_extend:HI (match_dup 2)))))))
    (clobber (reg:CC FLAGS_REG))]
   "TARGET_QIMODE_MATH"
-  "div{b}\t%2"
+  "<sgnprefix>div{b}\t%2"
   [(set_attr "type" "idiv")
    (set_attr "mode" "QI")])
 
 ;; it should be done with splitters.
 
 (define_expand "and<mode>3"
-  [(set (match_operand:SWIM1248x 0 "nonimmediate_operand")
-       (and:SWIM1248x (match_operand:SWIM1248x 1 "nonimmediate_operand")
-                     (match_operand:SWIM1248x 2 "<general_szext_operand>")))]
+  [(set (match_operand:SWIM1248s 0 "nonimmediate_operand")
+       (and:SWIM1248s (match_operand:SWIM1248s 1 "nonimmediate_operand")
+                      (match_operand:SWIM1248s 2 "<general_szext_operand>")))]
   ""
 {
   machine_mode mode = <MODE>mode;
-  rtx (*insn) (rtx, rtx);
 
-  if (CONST_INT_P (operands[2]) && REG_P (operands[0]))
+  if (<MODE>mode == DImode && !TARGET_64BIT)
+    ;
+  else if (const_int_operand (operands[2], <MODE>mode)
+          && register_operand (operands[0], <MODE>mode)
+          && !(TARGET_ZERO_EXTEND_WITH_AND
+               && optimize_function_for_speed_p (cfun)))
     {
-      HOST_WIDE_INT ival = INTVAL (operands[2]);
+      unsigned HOST_WIDE_INT ival = UINTVAL (operands[2]);
 
-      if (ival == (HOST_WIDE_INT) 0xffffffff)
+      if (ival == GET_MODE_MASK (SImode))
        mode = SImode;
-      else if (ival == 0xffff)
+      else if (ival == GET_MODE_MASK (HImode))
        mode = HImode;
-      else if (ival == 0xff)
+      else if (ival == GET_MODE_MASK (QImode))
        mode = QImode;
-      }
-
-  if (mode == <MODE>mode)
-    {
-      ix86_expand_binary_operator (AND, <MODE>mode, operands);
-      DONE;
     }
 
-  if (<MODE>mode == DImode)
-    insn = (mode == SImode)
-          ? gen_zero_extendsidi2
-          : (mode == HImode)
-          ? gen_zero_extendhidi2
-          : gen_zero_extendqidi2;
-  else if (<MODE>mode == SImode)
-    insn = (mode == HImode)
-          ? gen_zero_extendhisi2
-          : gen_zero_extendqisi2;
-  else if (<MODE>mode == HImode)
-    insn = gen_zero_extendqihi2;
+  if (mode != <MODE>mode)
+    emit_insn (gen_extend_insn
+              (operands[0], gen_lowpart (mode, operands[1]),
+               <MODE>mode, mode, 1));
   else
-    gcc_unreachable ();
+    ix86_expand_binary_operator (AND, <MODE>mode, operands);
 
-  emit_insn (insn (operands[0], gen_lowpart (mode, operands[1])));
   DONE;
 })
 
 (define_insn_and_split "*anddi3_doubleword"
-  [(set (match_operand:DI 0 "nonimmediate_operand" "=r,rm,r")
+  [(set (match_operand:DI 0 "nonimmediate_operand")
        (and:DI
-        (match_operand:DI 1 "nonimmediate_operand" "%0,0,0")
-        (match_operand:DI 2 "x86_64_szext_general_operand" "Z,re,rm")))
+        (match_operand:DI 1 "nonimmediate_operand")
+        (match_operand:DI 2 "x86_64_szext_general_operand")))
    (clobber (reg:CC FLAGS_REG))]
   "!TARGET_64BIT && TARGET_STV && TARGET_SSE2
-   && ix86_binary_operator_ok (AND, DImode, operands)"
+   && ix86_binary_operator_ok (AND, DImode, operands)
+   && can_create_pseudo_p ()"
   "#"
-  "&& reload_completed"
+  "&& 1"
   [(const_int 0)]
 {
   split_double_mode (DImode, &operands[0], 3, &operands[0], &operands[3]);
+
   if (operands[2] == const0_rtx)
-    {
-      operands[1] = const0_rtx;
-      ix86_expand_move (SImode, &operands[0]);
-    }
-  else if (operands[2] != constm1_rtx)
-    ix86_expand_binary_operator (AND, SImode, &operands[0]);
-  else if (operands[5] == constm1_rtx)
-    emit_note (NOTE_INSN_DELETED);
+    emit_move_insn (operands[0], const0_rtx);
+  else if (operands[2] == constm1_rtx)
+    emit_move_insn (operands[0], operands[1]);
+  else
+    emit_insn (gen_andsi3 (operands[0], operands[1], operands[2]));
+
   if (operands[5] == const0_rtx)
-    {
-      operands[4] = const0_rtx;
-      ix86_expand_move (SImode, &operands[3]);
-    }
-  else if (operands[5] != constm1_rtx)
-    ix86_expand_binary_operator (AND, SImode, &operands[3]);
+    emit_move_insn (operands[3], const0_rtx);
+  else if (operands[5] == constm1_rtx)
+    emit_move_insn (operands[3], operands[4]);
+  else
+    emit_insn (gen_andsi3 (operands[3], operands[4], operands[5]));
+
   DONE;
 })
 
   [(parallel [(set (match_dup 0)
                   (zero_extend:DI (and:SI (match_dup 1) (match_dup 2))))
              (clobber (reg:CC FLAGS_REG))])]
-  "operands[2] = gen_lowpart (SImode, operands[2]);")
+{
+  if (GET_CODE (operands[2]) == SYMBOL_REF
+      || GET_CODE (operands[2]) == LABEL_REF)
+    {
+      operands[2] = shallow_copy_rtx (operands[2]);
+      PUT_MODE (operands[2], SImode);
+    }
+  else if (GET_CODE (operands[2]) == CONST)
+    {
+      /* (const:DI (plus:DI (symbol_ref:DI ("...")) (const_int N))) */
+      operands[2] = copy_rtx (operands[2]);
+      PUT_MODE (operands[2], SImode);
+      PUT_MODE (XEXP (operands[2], 0), SImode);
+      PUT_MODE (XEXP (XEXP (operands[2], 0), 0), SImode);
+    }    
+  else
+    operands[2] = gen_lowpart (SImode, operands[2]);
+})
 
 ;; See comment for addsi_1_zext why we do use nonimmediate_operand
 (define_insn "*andsi_1_zext"
        || REGNO (operands[0]) != REGNO (operands[1]))"
   [(const_int 0)]
 {
-  HOST_WIDE_INT ival = INTVAL (operands[2]);
+  unsigned HOST_WIDE_INT ival = UINTVAL (operands[2]);
   machine_mode mode;
-  rtx (*insn) (rtx, rtx);
 
-  if (ival == (HOST_WIDE_INT) 0xffffffff)
+  if (ival == GET_MODE_MASK (SImode))
     mode = SImode;
-  else if (ival == 0xffff)
+  else if (ival == GET_MODE_MASK (HImode))
     mode = HImode;
+  else if (ival == GET_MODE_MASK (QImode))
+    mode = QImode;
   else
-    {
-      gcc_assert (ival == 0xff);
-      mode = QImode;
-    }
+    gcc_unreachable ();
 
-  if (<MODE>mode == DImode)
-    insn = (mode == SImode)
-          ? gen_zero_extendsidi2
-          : (mode == HImode)
-          ? gen_zero_extendhidi2
-          : gen_zero_extendqidi2;
-  else
-    {
-      if (<MODE>mode != SImode)
-       /* Zero extend to SImode to avoid partial register stalls.  */
-       operands[0] = gen_lowpart (SImode, operands[0]);
+  /* Zero extend to SImode to avoid partial register stalls.  */
+  if (<MODE_SIZE> < GET_MODE_SIZE (SImode))
+    operands[0] = gen_lowpart (SImode, operands[0]);
 
-      insn = (mode == HImode)
-            ? gen_zero_extendhisi2
-            : gen_zero_extendqisi2;
-    }
-  emit_insn (insn (operands[0], gen_lowpart (mode, operands[1])));
+  emit_insn (gen_extend_insn
+            (operands[0], gen_lowpart (mode, operands[1]),
+             GET_MODE (operands[0]), mode, 1));
   DONE;
 })
 
 })
 
 (define_insn "*andndi3_doubleword"
-  [(set (match_operand:DI 0 "register_operand" "=&r,r,r,&r")
+  [(set (match_operand:DI 0 "register_operand")
        (and:DI
-         (not:DI (match_operand:DI 1 "register_operand" "r,0,r,0"))
-         (match_operand:DI 2 "nonimmediate_operand" "rm,rm,0,rm")))
+         (not:DI (match_operand:DI 1 "register_operand"))
+         (match_operand:DI 2 "nonimmediate_operand")))
    (clobber (reg:CC FLAGS_REG))]
-  "!TARGET_64BIT && TARGET_STV && TARGET_SSE2"
-  "#"
-  [(set_attr "isa" "bmi,bmi,bmi,*")])
+  "!TARGET_64BIT && TARGET_STV && TARGET_SSE2
+   && can_create_pseudo_p ()"
+  "#")
 
 (define_split
   [(set (match_operand:DI 0 "register_operand")
          (match_operand:DI 2 "nonimmediate_operand")))
    (clobber (reg:CC FLAGS_REG))]
   "!TARGET_64BIT && TARGET_BMI && TARGET_STV && TARGET_SSE2
-   && reload_completed"
+   && can_create_pseudo_p ()"
   [(parallel [(set (match_dup 0)
                   (and:SI (not:SI (match_dup 1)) (match_dup 2)))
              (clobber (reg:CC FLAGS_REG))])
 (define_split
   [(set (match_operand:DI 0 "register_operand")
        (and:DI
-         (not:DI (match_dup 0))
-         (match_operand:DI 1 "nonimmediate_operand")))
+         (not:DI (match_operand:DI 1 "register_operand"))
+         (match_operand:DI 2 "nonimmediate_operand")))
    (clobber (reg:CC FLAGS_REG))]
   "!TARGET_64BIT && !TARGET_BMI && TARGET_STV && TARGET_SSE2
-   && reload_completed"
-  [(set (match_dup 0) (not:SI (match_dup 0)))
+   && can_create_pseudo_p ()"
+  [(set (match_dup 6) (not:SI (match_dup 1)))
    (parallel [(set (match_dup 0)
-                  (and:SI (match_dup 0) (match_dup 1)))
+                  (and:SI (match_dup 6) (match_dup 2)))
              (clobber (reg:CC FLAGS_REG))])
-   (set (match_dup 2) (not:SI (match_dup 2)))
-   (parallel [(set (match_dup 2)
-                  (and:SI (match_dup 2) (match_dup 3)))
+   (set (match_dup 7) (not:SI (match_dup 4)))
+   (parallel [(set (match_dup 3)
+                  (and:SI (match_dup 7) (match_dup 5)))
              (clobber (reg:CC FLAGS_REG))])]
-  "split_double_mode (DImode, &operands[0], 2, &operands[0], &operands[2]);")
+{
+  operands[6] = gen_reg_rtx (SImode);
+  operands[7] = gen_reg_rtx (SImode);
+
+  split_double_mode (DImode, &operands[0], 3, &operands[0], &operands[3]);
+})
 
 (define_insn "*andn<mode>_1"
   [(set (match_operand:SWI48 0 "register_operand" "=r,r")
 ;; If this is considered useful, it should be done with splitters.
 
 (define_expand "<code><mode>3"
-  [(set (match_operand:SWIM1248x 0 "nonimmediate_operand")
-       (any_or:SWIM1248x (match_operand:SWIM1248x 1 "nonimmediate_operand")
-                            (match_operand:SWIM1248x 2 "<general_operand>")))]
+  [(set (match_operand:SWIM1248s 0 "nonimmediate_operand")
+       (any_or:SWIM1248s (match_operand:SWIM1248s 1 "nonimmediate_operand")
+                         (match_operand:SWIM1248s 2 "<general_operand>")))]
   ""
   "ix86_expand_binary_operator (<CODE>, <MODE>mode, operands); DONE;")
 
 (define_insn_and_split "*<code>di3_doubleword"
-  [(set (match_operand:DI 0 "nonimmediate_operand" "=r,rm,r")
+  [(set (match_operand:DI 0 "nonimmediate_operand")
        (any_or:DI
-        (match_operand:DI 1 "nonimmediate_operand" "%0,0,0")
-        (match_operand:DI 2 "x86_64_szext_general_operand" "Z,re,rm")))
+        (match_operand:DI 1 "nonimmediate_operand")
+        (match_operand:DI 2 "x86_64_szext_general_operand")))
    (clobber (reg:CC FLAGS_REG))]
   "!TARGET_64BIT && TARGET_STV && TARGET_SSE2
-   && ix86_binary_operator_ok (<CODE>, DImode, operands)"
+   && ix86_binary_operator_ok (<CODE>, DImode, operands)
+   && can_create_pseudo_p ()"
   "#"
-  "&& reload_completed"
+  "&& 1"
   [(const_int 0)]
 {
   split_double_mode (DImode, &operands[0], 3, &operands[0], &operands[3]);
-  if (operands[2] == constm1_rtx)
+
+  if (operands[2] == const0_rtx)
+    emit_move_insn (operands[0], operands[1]);
+  else if (operands[2] == constm1_rtx)
     {
       if (<CODE> == IOR)
-       {
-         operands[1] = constm1_rtx;
-         ix86_expand_move (SImode, &operands[0]);
-       }
+       emit_move_insn (operands[0], constm1_rtx);
       else
        ix86_expand_unary_operator (NOT, SImode, &operands[0]);
     }
-  else if (operands[2] != const0_rtx)
+  else
     ix86_expand_binary_operator (<CODE>, SImode, &operands[0]);
-  else if (operands[5] == const0_rtx)
-    emit_note (NOTE_INSN_DELETED);
-  if (operands[5] == constm1_rtx)
+
+  if (operands[5] == const0_rtx)
+    emit_move_insn (operands[3], operands[4]);
+  else if (operands[5] == constm1_rtx)
     {
       if (<CODE> == IOR)
-       {
-         operands[4] = constm1_rtx;
-         ix86_expand_move (SImode, &operands[3]);
-       }
+       emit_move_insn (operands[3], constm1_rtx);
       else
        ix86_expand_unary_operator (NOT, SImode, &operands[3]);
     }
-  else if (operands[5] != const0_rtx)
+  else
     ix86_expand_binary_operator (<CODE>, SImode, &operands[3]);
+
   DONE;
 })
 
   [(set_attr "type" "negnot")
    (set_attr "mode" "<MODE>")])
 
-;; Changing of sign for FP values is doable using integer unit too.
+(define_expand "<code>tf2"
+  [(set (match_operand:TF 0 "register_operand")
+       (absneg:TF (match_operand:TF 1 "register_operand")))]
+  "TARGET_SSE"
+  "ix86_expand_fp_absneg_operator (<CODE>, TFmode, operands); DONE;")
+
+(define_insn "*<code>tf2_1"
+  [(set (match_operand:TF 0 "register_operand" "=x,x,Yv,Yv")
+       (absneg:TF
+         (match_operand:TF 1 "vector_operand" "0,xBm,Yv,m")))
+   (use (match_operand:TF 2 "vector_operand" "xBm,0,Yvm,Yv"))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_SSE"
+  "#"
+  [(set_attr "isa" "noavx,noavx,avx,avx")])
+
+(define_insn "*nabstf2_1"
+  [(set (match_operand:TF 0 "register_operand" "=x,x,Yv,Yv")
+       (neg:TF
+         (abs:TF
+           (match_operand:TF 1 "vector_operand" "0,xBm,Yv,m"))))
+   (use (match_operand:TF 2 "vector_operand" "xBm,0,Yvm,Yv"))]
+  "TARGET_SSE"
+  "#"
+  [(set_attr "isa" "noavx,noavx,avx,avx")])
 
 (define_expand "<code><mode>2"
   [(set (match_operand:X87MODEF 0 "register_operand")
   "TARGET_80387 || (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)"
   "ix86_expand_fp_absneg_operator (<CODE>, <MODE>mode, operands); DONE;")
 
-(define_insn "*absneg<mode>2"
-  [(set (match_operand:MODEF 0 "register_operand" "=Yv,Yv,f,!r")
-       (match_operator:MODEF 3 "absneg_operator"
-         [(match_operand:MODEF 1 "register_operand" "0,Yv,0,0")]))
-   (use (match_operand:<ssevecmode> 2 "nonimmediate_operand" "Yvm,0,X,X"))
+;; Changing of sign for FP values is doable using integer unit too.
+(define_insn "*<code><mode>2_i387_1"
+  [(set (match_operand:X87MODEF 0 "register_operand" "=f,!r")
+       (absneg:X87MODEF
+         (match_operand:X87MODEF 1 "register_operand" "0,0")))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_80387"
+  "#")
+
+(define_split
+  [(set (match_operand:X87MODEF 0 "fp_register_operand")
+       (absneg:X87MODEF (match_operand:X87MODEF 1 "fp_register_operand")))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_80387 && reload_completed"
+  [(set (match_dup 0) (absneg:X87MODEF (match_dup 1)))])
+
+(define_split
+  [(set (match_operand:X87MODEF 0 "general_reg_operand")
+       (absneg:X87MODEF (match_operand:X87MODEF 1 "general_reg_operand")))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_80387 && reload_completed"
+  [(const_int 0)]
+  "ix86_split_fp_absneg_operator (<CODE>, <MODE>mode, operands); DONE;")
+
+(define_insn "*<code><mode>2_1"
+  [(set (match_operand:MODEF 0 "register_operand" "=x,x,Yv,f,!r")
+       (absneg:MODEF
+         (match_operand:MODEF 1 "register_operand" "0,x,Yv,0,0")))
+   (use (match_operand:<ssevecmode> 2 "vector_operand" "xBm,0,Yvm,X,X"))
    (clobber (reg:CC FLAGS_REG))]
   "TARGET_80387 || (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)"
   "#"
-  [(set (attr "enabled")
+  [(set_attr "isa" "noavx,noavx,avx,*,*")
+   (set (attr "enabled")
      (if_then_else
        (match_test ("SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH"))
        (if_then_else
-        (eq_attr "alternative" "2")
+        (eq_attr "alternative" "3,4")
         (symbol_ref "TARGET_MIX_SSE_I387")
-        (symbol_ref "true"))
+        (const_string "*"))
        (if_then_else
-        (eq_attr "alternative" "2,3")
+        (eq_attr "alternative" "3,4")
         (symbol_ref "true")
         (symbol_ref "false"))))])
 
-(define_insn "*absnegxf2_i387"
-  [(set (match_operand:XF 0 "register_operand" "=f,!r")
-       (match_operator:XF 3 "absneg_operator"
-         [(match_operand:XF 1 "register_operand" "0,0")]))
-   (use (match_operand 2))
-   (clobber (reg:CC FLAGS_REG))]
-  "TARGET_80387"
-  "#")
-
-(define_expand "<code>tf2"
-  [(set (match_operand:TF 0 "register_operand")
-       (absneg:TF (match_operand:TF 1 "register_operand")))]
-  "TARGET_SSE"
-  "ix86_expand_fp_absneg_operator (<CODE>, TFmode, operands); DONE;")
-
-(define_insn "*absnegtf2_sse"
-  [(set (match_operand:TF 0 "register_operand" "=Yv,Yv")
-       (match_operator:TF 3 "absneg_operator"
-         [(match_operand:TF 1 "register_operand" "0,Yv")]))
-   (use (match_operand:TF 2 "nonimmediate_operand" "Yvm,0"))
-   (clobber (reg:CC FLAGS_REG))]
-  "TARGET_SSE"
-  "#")
-
-;; Splitters for fp abs and neg.
-
-(define_split
-  [(set (match_operand 0 "fp_register_operand")
-       (match_operator 1 "absneg_operator" [(match_dup 0)]))
-   (use (match_operand 2))
-   (clobber (reg:CC FLAGS_REG))]
-  "reload_completed"
-  [(set (match_dup 0) (match_op_dup 1 [(match_dup 0)]))])
-
 (define_split
-  [(set (match_operand 0 "sse_reg_operand")
-       (match_operator 3 "absneg_operator"
-         [(match_operand 1 "register_operand")]))
-   (use (match_operand 2 "nonimmediate_operand"))
+  [(set (match_operand:SSEMODEF 0 "sse_reg_operand")
+       (absneg:SSEMODEF
+         (match_operand:SSEMODEF 1 "vector_operand")))
+   (use (match_operand:<ssevecmodef> 2 "vector_operand"))
    (clobber (reg:CC FLAGS_REG))]
-  "reload_completed"
+  "((SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
+    || (TARGET_SSE && (<MODE>mode == TFmode)))
+   && reload_completed"
   [(set (match_dup 0) (match_dup 3))]
 {
-  machine_mode mode = GET_MODE (operands[0]);
-  machine_mode vmode = GET_MODE (operands[2]);
-  rtx tmp;
+  machine_mode mode = <MODE>mode;
+  machine_mode vmode = <ssevecmodef>mode;
+  enum rtx_code absneg_op = <CODE> == ABS ? AND : XOR;
 
   operands[0] = lowpart_subreg (vmode, operands[0], mode);
   operands[1] = lowpart_subreg (vmode, operands[1], mode);
-  if (operands_match_p (operands[0], operands[2]))
-    std::swap (operands[1], operands[2]);
-  if (GET_CODE (operands[3]) == ABS)
-    tmp = gen_rtx_AND (vmode, operands[1], operands[2]);
-  else
-    tmp = gen_rtx_XOR (vmode, operands[1], operands[2]);
-  operands[3] = tmp;
-})
 
-(define_split
-  [(set (match_operand:SF 0 "general_reg_operand")
-       (match_operator:SF 1 "absneg_operator" [(match_dup 0)]))
-   (use (match_operand:V4SF 2))
-   (clobber (reg:CC FLAGS_REG))]
-  "reload_completed"
-  [(parallel [(set (match_dup 0) (match_dup 1))
-             (clobber (reg:CC FLAGS_REG))])]
-{
-  rtx tmp;
-  operands[0] = gen_lowpart (SImode, operands[0]);
-  if (GET_CODE (operands[1]) == ABS)
+  if (TARGET_AVX)
     {
-      tmp = gen_int_mode (0x7fffffff, SImode);
-      tmp = gen_rtx_AND (SImode, operands[0], tmp);
+      if (MEM_P (operands[1]))
+        std::swap (operands[1], operands[2]);
     }
   else
-    {
-      tmp = gen_int_mode (0x80000000, SImode);
-      tmp = gen_rtx_XOR (SImode, operands[0], tmp);
-    }
-  operands[1] = tmp;
+   {
+     if (operands_match_p (operands[0], operands[2]))
+       std::swap (operands[1], operands[2]);
+   }
+
+  operands[3]
+    = gen_rtx_fmt_ee (absneg_op, vmode, operands[1], operands[2]);
 })
 
 (define_split
-  [(set (match_operand:DF 0 "general_reg_operand")
-       (match_operator:DF 1 "absneg_operator" [(match_dup 0)]))
+  [(set (match_operand:MODEF 0 "fp_register_operand")
+       (absneg:MODEF (match_operand:MODEF 1 "fp_register_operand")))
    (use (match_operand 2))
    (clobber (reg:CC FLAGS_REG))]
-  "reload_completed"
-  [(parallel [(set (match_dup 0) (match_dup 1))
-             (clobber (reg:CC FLAGS_REG))])]
-{
-  rtx tmp;
-  if (TARGET_64BIT)
-    {
-      tmp = gen_lowpart (DImode, operands[0]);
-      tmp = gen_rtx_ZERO_EXTRACT (DImode, tmp, const1_rtx, GEN_INT (63));
-      operands[0] = tmp;
-
-      if (GET_CODE (operands[1]) == ABS)
-       tmp = const0_rtx;
-      else
-       tmp = gen_rtx_NOT (DImode, tmp);
-    }
-  else
-    {
-      operands[0] = gen_highpart (SImode, operands[0]);
-      if (GET_CODE (operands[1]) == ABS)
-       {
-         tmp = gen_int_mode (0x7fffffff, SImode);
-         tmp = gen_rtx_AND (SImode, operands[0], tmp);
-       }
-      else
-       {
-         tmp = gen_int_mode (0x80000000, SImode);
-         tmp = gen_rtx_XOR (SImode, operands[0], tmp);
-       }
-    }
-  operands[1] = tmp;
-})
+  "TARGET_80387 && reload_completed"
+  [(set (match_dup 0) (absneg:MODEF (match_dup 1)))])
 
 (define_split
-  [(set (match_operand:XF 0 "general_reg_operand")
-       (match_operator:XF 1 "absneg_operator" [(match_dup 0)]))
+  [(set (match_operand:MODEF 0 "general_reg_operand")
+       (absneg:MODEF (match_operand:MODEF 1 "general_reg_operand")))
    (use (match_operand 2))
    (clobber (reg:CC FLAGS_REG))]
-  "reload_completed"
-  [(parallel [(set (match_dup 0) (match_dup 1))
-             (clobber (reg:CC FLAGS_REG))])]
+  "TARGET_80387 && reload_completed"
+  [(const_int 0)]
+  "ix86_split_fp_absneg_operator (<CODE>, <MODE>mode, operands); DONE;")
+
+(define_insn "*nabs<mode>2_1"
+  [(set (match_operand:MODEF 0 "register_operand" "=x,x,Yv")
+       (neg:MODEF
+         (abs:MODEF
+           (match_operand:MODEF 1 "register_operand" "0,x,Yv"))))
+   (use (match_operand:<ssevecmode> 2 "vector_operand" "xBm,0,Yvm"))]
+  "SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH"
+  "#"
+  [(set_attr "isa" "noavx,noavx,avx")])
+
+(define_split
+  [(set (match_operand:SSEMODEF 0 "sse_reg_operand")
+       (neg:SSEMODEF
+         (abs:SSEMODEF
+           (match_operand:SSEMODEF 1 "vector_operand"))))
+   (use (match_operand:<ssevecmodef> 2 "vector_operand"))]
+  "((SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
+    || (TARGET_SSE && (<MODE>mode == TFmode)))
+   && reload_completed"
+  [(set (match_dup 0) (match_dup 3))]
 {
-  rtx tmp;
-  operands[0] = gen_rtx_REG (SImode,
-                            REGNO (operands[0]) + (TARGET_64BIT ? 1 : 2));
-  if (GET_CODE (operands[1]) == ABS)
+  machine_mode mode = <MODE>mode;
+  machine_mode vmode = <ssevecmodef>mode;
+
+  operands[0] = lowpart_subreg (vmode, operands[0], mode);
+  operands[1] = lowpart_subreg (vmode, operands[1], mode);
+
+  if (TARGET_AVX)
     {
-      tmp = GEN_INT (0x7fff);
-      tmp = gen_rtx_AND (SImode, operands[0], tmp);
+      if (MEM_P (operands[1]))
+        std::swap (operands[1], operands[2]);
     }
   else
-    {
-      tmp = GEN_INT (0x8000);
-      tmp = gen_rtx_XOR (SImode, operands[0], tmp);
-    }
-  operands[1] = tmp;
+   {
+     if (operands_match_p (operands[0], operands[2]))
+       std::swap (operands[1], operands[2]);
+   }
+
+  operands[3]
+    = gen_rtx_fmt_ee (IOR, vmode, operands[1], operands[2]);
 })
 
 ;; Conditionalize these after reload. If they match before reload, we
 ;; lose the clobber and ability to use integer instructions.
 
-(define_insn "*<code><mode>2_1"
+(define_insn "*<code><mode>2_i387"
   [(set (match_operand:X87MODEF 0 "register_operand" "=f")
        (absneg:X87MODEF (match_operand:X87MODEF 1 "register_operand" "0")))]
-  "TARGET_80387
-   && (reload_completed
-       || !(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH))"
-  "f<absneg_mnemonic>"
+  "TARGET_80387 && reload_completed"
+  "<absneg_mnemonic>"
   [(set_attr "type" "fsgn")
    (set_attr "mode" "<MODE>")])
 
-(define_insn "*<code>extendsfdf2"
-  [(set (match_operand:DF 0 "register_operand" "=f")
-       (absneg:DF (float_extend:DF
-                    (match_operand:SF 1 "register_operand" "0"))))]
-  "TARGET_80387 && (!TARGET_SSE_MATH || TARGET_MIX_SSE_I387)"
-  "f<absneg_mnemonic>"
-  [(set_attr "type" "fsgn")
-   (set_attr "mode" "DF")])
-
-(define_insn "*<code>extend<mode>xf2"
-  [(set (match_operand:XF 0 "register_operand" "=f")
-       (absneg:XF (float_extend:XF
-                    (match_operand:MODEF 1 "register_operand" "0"))))]
-  "TARGET_80387"
-  "f<absneg_mnemonic>"
-  [(set_attr "type" "fsgn")
-   (set_attr "mode" "XF")])
-
 ;; Copysign instructions
 
-(define_mode_iterator CSGNMODE [SF DF TF])
-(define_mode_attr CSGNVMODE [(SF "V4SF") (DF "V2DF") (TF "TF")])
-
 (define_expand "copysign<mode>3"
-  [(match_operand:CSGNMODE 0 "register_operand")
-   (match_operand:CSGNMODE 1 "nonmemory_operand")
-   (match_operand:CSGNMODE 2 "register_operand")]
+  [(match_operand:SSEMODEF 0 "register_operand")
+   (match_operand:SSEMODEF 1 "nonmemory_operand")
+   (match_operand:SSEMODEF 2 "register_operand")]
   "(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
    || (TARGET_SSE && (<MODE>mode == TFmode))"
   "ix86_expand_copysign (operands); DONE;")
 
 (define_insn_and_split "copysign<mode>3_const"
-  [(set (match_operand:CSGNMODE 0 "register_operand" "=Yv")
-       (unspec:CSGNMODE
-         [(match_operand:<CSGNVMODE> 1 "vector_move_operand" "YvmC")
-          (match_operand:CSGNMODE 2 "register_operand" "0")
-          (match_operand:<CSGNVMODE> 3 "nonimmediate_operand" "Yvm")]
+  [(set (match_operand:SSEMODEF 0 "register_operand" "=Yv")
+       (unspec:SSEMODEF
+         [(match_operand:<ssevecmodef> 1 "nonimm_or_0_operand" "YvmC")
+          (match_operand:SSEMODEF 2 "register_operand" "0")
+          (match_operand:<ssevecmodef> 3 "nonimmediate_operand" "Yvm")]
          UNSPEC_COPYSIGN))]
   "(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
    || (TARGET_SSE && (<MODE>mode == TFmode))"
   "ix86_split_copysign_const (operands); DONE;")
 
 (define_insn "copysign<mode>3_var"
-  [(set (match_operand:CSGNMODE 0 "register_operand" "=Yv,Yv,Yv,Yv,Yv")
-       (unspec:CSGNMODE
-         [(match_operand:CSGNMODE 2 "register_operand" "Yv,0,0,Yv,Yv")
-          (match_operand:CSGNMODE 3 "register_operand" "1,1,Yv,1,Yv")
-          (match_operand:<CSGNVMODE> 4
+  [(set (match_operand:SSEMODEF 0 "register_operand" "=Yv,Yv,Yv,Yv,Yv")
+       (unspec:SSEMODEF
+         [(match_operand:SSEMODEF 2 "register_operand" "Yv,0,0,Yv,Yv")
+          (match_operand:SSEMODEF 3 "register_operand" "1,1,Yv,1,Yv")
+          (match_operand:<ssevecmodef> 4
             "nonimmediate_operand" "X,Yvm,Yvm,0,0")
-          (match_operand:<CSGNVMODE> 5
+          (match_operand:<ssevecmodef> 5
             "nonimmediate_operand" "0,Yvm,1,Yvm,1")]
          UNSPEC_COPYSIGN))
-   (clobber (match_scratch:<CSGNVMODE> 1 "=Yv,Yv,Yv,Yv,Yv"))]
+   (clobber (match_scratch:<ssevecmodef> 1 "=Yv,Yv,Yv,Yv,Yv"))]
   "(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
    || (TARGET_SSE && (<MODE>mode == TFmode))"
   "#")
 
 (define_split
-  [(set (match_operand:CSGNMODE 0 "register_operand")
-       (unspec:CSGNMODE
-         [(match_operand:CSGNMODE 2 "register_operand")
-          (match_operand:CSGNMODE 3 "register_operand")
-          (match_operand:<CSGNVMODE> 4)
-          (match_operand:<CSGNVMODE> 5)]
+  [(set (match_operand:SSEMODEF 0 "register_operand")
+       (unspec:SSEMODEF
+         [(match_operand:SSEMODEF 2 "register_operand")
+          (match_operand:SSEMODEF 3 "register_operand")
+          (match_operand:<ssevecmodef> 4)
+          (match_operand:<ssevecmodef> 5)]
          UNSPEC_COPYSIGN))
-   (clobber (match_scratch:<CSGNVMODE> 1))]
+   (clobber (match_scratch:<ssevecmodef> 1))]
   "((SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
     || (TARGET_SSE && (<MODE>mode == TFmode)))
    && reload_completed"
   [(const_int 0)]
   "ix86_split_copysign_var (operands); DONE;")
+
+(define_expand "xorsign<mode>3"
+  [(match_operand:MODEF 0 "register_operand")
+   (match_operand:MODEF 1 "register_operand")
+   (match_operand:MODEF 2 "register_operand")]
+  "SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH"
+  "ix86_expand_xorsign (operands); DONE;")
+
+(define_insn_and_split "xorsign<mode>3_1"
+  [(set (match_operand:MODEF 0 "register_operand" "=Yv")
+       (unspec:MODEF
+         [(match_operand:MODEF 1 "register_operand" "Yv")
+          (match_operand:MODEF 2 "register_operand" "0")
+          (match_operand:<ssevecmode> 3 "nonimmediate_operand" "Yvm")]
+         UNSPEC_XORSIGN))]
+  "SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH"
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+  "ix86_split_xorsign (operands); DONE;")
 \f
 ;; One complement instructions
 
 (define_expand "one_cmpl<mode>2"
-  [(set (match_operand:SWIM1248x 0 "nonimmediate_operand")
-       (not:SWIM1248x (match_operand:SWIM1248x 1 "nonimmediate_operand")))]
+  [(set (match_operand:SWIM1248s 0 "nonimmediate_operand")
+       (not:SWIM1248s (match_operand:SWIM1248s 1 "nonimmediate_operand")))]
   ""
   "ix86_expand_unary_operator (NOT, <MODE>mode, operands); DONE;")
 
 (define_insn_and_split "*one_cmpldi2_doubleword"
-  [(set (match_operand:DI 0 "nonimmediate_operand" "=rm")
-       (not:DI (match_operand:DI 1 "nonimmediate_operand" "0")))]
+  [(set (match_operand:DI 0 "nonimmediate_operand")
+       (not:DI (match_operand:DI 1 "nonimmediate_operand")))]
   "!TARGET_64BIT && TARGET_STV && TARGET_SSE2
-   && ix86_unary_operator_ok (NOT, DImode, operands)"
+   && ix86_unary_operator_ok (NOT, DImode, operands)
+   && can_create_pseudo_p ()"
   "#"
-  "&& reload_completed"
+  "&& 1"
   [(set (match_dup 0)
        (not:SI (match_dup 1)))
    (set (match_dup 2)
    (set_attr "amdfam10_decode" "vector")
    (set_attr "bdver1_decode" "vector")])
 
-(define_insn "ashrdi3_cvt"
-  [(set (match_operand:DI 0 "nonimmediate_operand" "=*d,rm")
-       (ashiftrt:DI (match_operand:DI 1 "nonimmediate_operand" "*a,0")
-                    (match_operand:QI 2 "const_int_operand")))
+;; Base name for insn mnemonic.
+(define_mode_attr cvt_mnemonic
+  [(SI "{cltd|cdq}") (DI "{cqto|cqo}")])
+
+(define_insn "ashr<mode>3_cvt"
+  [(set (match_operand:SWI48 0 "nonimmediate_operand" "=*d,rm")
+       (ashiftrt:SWI48
+         (match_operand:SWI48 1 "nonimmediate_operand" "*a,0")
+         (match_operand:QI 2 "const_int_operand")))
    (clobber (reg:CC FLAGS_REG))]
-  "TARGET_64BIT && INTVAL (operands[2]) == 63
+  "INTVAL (operands[2]) == GET_MODE_BITSIZE (<MODE>mode)-1
    && (TARGET_USE_CLTD || optimize_function_for_size_p (cfun))
-   && ix86_binary_operator_ok (ASHIFTRT, DImode, operands)"
+   && ix86_binary_operator_ok (ASHIFTRT, <MODE>mode, operands)"
   "@
-   {cqto|cqo}
-   sar{q}\t{%2, %0|%0, %2}"
+   <cvt_mnemonic>
+   sar{<imodesuffix>}\t{%2, %0|%0, %2}"
   [(set_attr "type" "imovx,ishift")
    (set_attr "prefix_0f" "0,*")
    (set_attr "length_immediate" "0,*")
    (set_attr "modrm" "0,1")
-   (set_attr "mode" "DI")])
+   (set_attr "mode" "<MODE>")])
 
 (define_insn "*ashrsi3_cvt_zext"
   [(set (match_operand:DI 0 "register_operand" "=*d,r")
    (set_attr "modrm" "0,1")
    (set_attr "mode" "SI")])
 
-(define_insn "ashrsi3_cvt"
-  [(set (match_operand:SI 0 "nonimmediate_operand" "=*d,rm")
-       (ashiftrt:SI (match_operand:SI 1 "nonimmediate_operand" "*a,0")
-                    (match_operand:QI 2 "const_int_operand")))
-   (clobber (reg:CC FLAGS_REG))]
-  "INTVAL (operands[2]) == 31
-   && (TARGET_USE_CLTD || optimize_function_for_size_p (cfun))
-   && ix86_binary_operator_ok (ASHIFTRT, SImode, operands)"
-  "@
-   {cltd|cdq}
-   sar{l}\t{%2, %0|%0, %2}"
-  [(set_attr "type" "imovx,ishift")
-   (set_attr "prefix_0f" "0,*")
-   (set_attr "length_immediate" "0,*")
-   (set_attr "modrm" "0,1")
-   (set_attr "mode" "SI")])
-
 (define_expand "x86_shift<mode>_adj_3"
   [(use (match_operand:SWI48 0 "register_operand"))
    (use (match_operand:SWI48 1 "register_operand"))
   [(const_int 0)]
   "ix86_expand_epilogue (2); DONE;")
 
-(define_insn "leave"
+(define_expand "@leave_<mode>"
+  [(parallel
+    [(set (reg:W SP_REG) (plus:W (reg:W BP_REG) (match_dup 0)))
+     (set (reg:W BP_REG) (mem:W (reg:W BP_REG)))
+     (clobber (mem:BLK (scratch)))])]
+  ""
+  "operands[0] = GEN_INT (<MODE_SIZE>);")
+
+(define_insn "*leave"
   [(set (reg:SI SP_REG) (plus:SI (reg:SI BP_REG) (const_int 4)))
    (set (reg:SI BP_REG) (mem:SI (reg:SI BP_REG)))
    (clobber (mem:BLK (scratch)))]
   "leave"
   [(set_attr "type" "leave")])
 
-(define_insn "leave_rex64"
+(define_insn "*leave_rex64"
   [(set (reg:DI SP_REG) (plus:DI (reg:DI BP_REG) (const_int 8)))
    (set (reg:DI BP_REG) (mem:DI (reg:DI BP_REG)))
    (clobber (mem:BLK (scratch)))]
 (define_expand "bmi2_bzhi_<mode>3"
   [(parallel
     [(set (match_operand:SWI48 0 "register_operand")
-         (zero_extract:SWI48
-           (match_operand:SWI48 1 "nonimmediate_operand")
-           (umin:SWI48
-             (and:SWI48 (match_operand:SWI48 2 "register_operand")
-                        (const_int 255))
-             (match_dup 3))
+         (if_then_else:SWI48
+           (ne:QI (and:SWI48 (match_operand:SWI48 2 "register_operand")
+                             (const_int 255))
+                  (const_int 0))
+           (zero_extract:SWI48
+             (match_operand:SWI48 1 "nonimmediate_operand")
+             (umin:SWI48 (and:SWI48 (match_dup 2) (const_int 255))
+                         (match_dup 3))
+             (const_int 0))
            (const_int 0)))
      (clobber (reg:CC FLAGS_REG))])]
   "TARGET_BMI2"
 
 (define_insn "*bmi2_bzhi_<mode>3"
   [(set (match_operand:SWI48 0 "register_operand" "=r")
-       (zero_extract:SWI48
-         (match_operand:SWI48 1 "nonimmediate_operand" "rm")
-         (umin:SWI48
-           (and:SWI48 (match_operand:SWI48 2 "register_operand" "r")
-                      (const_int 255))
-           (match_operand:SWI48 3 "const_int_operand" "n"))
+       (if_then_else:SWI48
+         (ne:QI (and:SWI48 (match_operand:SWI48 2 "register_operand" "r")
+                           (const_int 255))
+                (const_int 0))
+         (zero_extract:SWI48
+           (match_operand:SWI48 1 "nonimmediate_operand" "rm")
+           (umin:SWI48 (and:SWI48 (match_dup 2) (const_int 255))
+                       (match_operand:SWI48 3 "const_int_operand" "n"))
+           (const_int 0))
          (const_int 0)))
    (clobber (reg:CC FLAGS_REG))]
   "TARGET_BMI2 && INTVAL (operands[3]) == <MODE_SIZE> * BITS_PER_UNIT"
 
 (define_insn "*bmi2_bzhi_<mode>3_1"
   [(set (match_operand:SWI48 0 "register_operand" "=r")
-       (zero_extract:SWI48
-         (match_operand:SWI48 1 "nonimmediate_operand" "rm")
-         (umin:SWI48
-           (zero_extend:SWI48 (match_operand:QI 2 "register_operand" "r"))
-           (match_operand:SWI48 3 "const_int_operand" "n"))
+       (if_then_else:SWI48
+         (ne:QI (match_operand:QI 2 "register_operand" "r") (const_int 0))
+         (zero_extract:SWI48
+           (match_operand:SWI48 1 "nonimmediate_operand" "rm")
+           (umin:SWI48 (zero_extend:SWI48 (match_dup 2))
+                       (match_operand:SWI48 3 "const_int_operand" "n"))
+           (const_int 0))
          (const_int 0)))
    (clobber (reg:CC FLAGS_REG))]
   "TARGET_BMI2 && INTVAL (operands[3]) == <MODE_SIZE> * BITS_PER_UNIT"
 (define_insn "*bmi2_bzhi_<mode>3_1_ccz"
   [(set (reg:CCZ FLAGS_REG)
        (compare:CCZ
-         (zero_extract:SWI48
-           (match_operand:SWI48 1 "nonimmediate_operand" "rm")
-           (umin:SWI48
-             (zero_extend:SWI48 (match_operand:QI 2 "register_operand" "r"))
-             (match_operand:SWI48 3 "const_int_operand" "n"))
+         (if_then_else:SWI48
+           (ne:QI (match_operand:QI 2 "register_operand" "r") (const_int 0))
+           (zero_extract:SWI48
+             (match_operand:SWI48 1 "nonimmediate_operand" "rm")
+             (umin:SWI48 (zero_extend:SWI48 (match_dup 2))
+                         (match_operand:SWI48 3 "const_int_operand" "n"))
+             (const_int 0))
            (const_int 0))
        (const_int 0)))
    (clobber (match_scratch:SWI48 0 "=r"))]
    (set_attr "mode" "<MODE>")])
 
 ;; TBM instructions.
-(define_insn "tbm_bextri_<mode>"
+(define_expand "tbm_bextri_<mode>"
+  [(parallel
+    [(set (match_operand:SWI48 0 "register_operand")
+         (zero_extract:SWI48
+           (match_operand:SWI48 1 "nonimmediate_operand")
+           (match_operand 2 "const_0_to_255_operand" "N")
+           (match_operand 3 "const_0_to_255_operand" "N")))
+     (clobber (reg:CC FLAGS_REG))])]
+  "TARGET_TBM"
+{
+  if (operands[2] == const0_rtx
+      || INTVAL (operands[3]) >= <MODE_SIZE> * BITS_PER_UNIT)
+    {
+      emit_move_insn (operands[0], const0_rtx);
+      DONE;
+    }
+  if (INTVAL (operands[2]) + INTVAL (operands[3])
+      > <MODE_SIZE> * BITS_PER_UNIT)
+    operands[2] = GEN_INT (<MODE_SIZE> * BITS_PER_UNIT - INTVAL (operands[3]));
+})
+
+(define_insn "*tbm_bextri_<mode>"
   [(set (match_operand:SWI48 0 "register_operand" "=r")
         (zero_extract:SWI48
           (match_operand:SWI48 1 "nonimmediate_operand" "rm")
   [(set_attr "type" "multi")
    (set_attr "length" "22")])
 
-(define_expand "tls_global_dynamic_64_<mode>"
+(define_expand "@tls_global_dynamic_64_<mode>"
   [(parallel
     [(set (match_operand:P 0 "register_operand")
          (call:P
   [(set_attr "type" "multi")
    (set_attr "length" "22")])
 
-(define_expand "tls_local_dynamic_base_64_<mode>"
+(define_expand "@tls_local_dynamic_base_64_<mode>"
   [(parallel
      [(set (match_operand:P 0 "register_operand")
           (call:P
 ;; Gcc is slightly more smart about handling normal two address instructions
 ;; so use special patterns for add and mull.
 
+(define_insn "*fop_xf_comm_i387"
+  [(set (match_operand:XF 0 "register_operand" "=f")
+       (match_operator:XF 3 "binary_fp_operator"
+                       [(match_operand:XF 1 "register_operand" "%0")
+                        (match_operand:XF 2 "register_operand" "f")]))]
+  "TARGET_80387
+   && COMMUTATIVE_ARITH_P (operands[3])"
+  "* return output_387_binary_op (insn, operands);"
+  [(set (attr "type")
+       (if_then_else (match_operand:XF 3 "mult_operator")
+          (const_string "fmul")
+          (const_string "fop")))
+   (set_attr "mode" "XF")])
+
 (define_insn "*fop_<mode>_comm"
   [(set (match_operand:MODEF 0 "register_operand" "=f,x,v")
        (match_operator:MODEF 3 "binary_fp_operator"
         (symbol_ref "false"))))])
 
 (define_insn "*rcpsf2_sse"
-  [(set (match_operand:SF 0 "register_operand" "=x,x")
-       (unspec:SF [(match_operand:SF 1 "nonimmediate_operand" "x,m")]
+  [(set (match_operand:SF 0 "register_operand" "=x,x,x")
+       (unspec:SF [(match_operand:SF 1 "nonimmediate_operand" "0,x,m")]
                   UNSPEC_RCP))]
   "TARGET_SSE && TARGET_SSE_MATH"
   "@
+   %vrcpss\t{%d1, %0|%0, %d1}
    %vrcpss\t{%d1, %0|%0, %d1}
    %vrcpss\t{%1, %d0|%d0, %1}"
   [(set_attr "type" "sse")
    (set_attr "atom_sse_attr" "rcp")
    (set_attr "btver2_sse_attr" "rcp")
    (set_attr "prefix" "maybe_vex")
-   (set_attr "mode" "SF")])
+   (set_attr "mode" "SF")
+   (set (attr "preferred_for_speed")
+     (cond [(eq_attr "alternative" "1")
+             (symbol_ref "TARGET_AVX || !TARGET_SSE_PARTIAL_REG_DEPENDENCY")
+          (eq_attr "alternative" "2")
+             (symbol_ref "!TARGET_SSE_PARTIAL_REG_DEPENDENCY")
+          ]
+          (symbol_ref "true")))])
+
+(define_insn "*fop_xf_1_i387"
+  [(set (match_operand:XF 0 "register_operand" "=f,f")
+       (match_operator:XF 3 "binary_fp_operator"
+                       [(match_operand:XF 1 "register_operand" "0,f")
+                        (match_operand:XF 2 "register_operand" "f,0")]))]
+  "TARGET_80387
+   && !COMMUTATIVE_ARITH_P (operands[3])"
+  "* return output_387_binary_op (insn, operands);"
+  [(set (attr "type")
+       (if_then_else (match_operand:XF 3 "div_operator")
+          (const_string "fdiv")
+          (const_string "fop")))
+   (set_attr "mode" "XF")])
 
 (define_insn "*fop_<mode>_1"
   [(set (match_operand:MODEF 0 "register_operand" "=f,f,x,v")
         (symbol_ref "true")
         (symbol_ref "false"))))])
 
-;; ??? Add SSE splitters for these!
-(define_insn "*fop_<MODEF:mode>_2_i387"
-  [(set (match_operand:MODEF 0 "register_operand" "=f")
-       (match_operator:MODEF 3 "binary_fp_operator"
-         [(float:MODEF
+(define_insn "*fop_<X87MODEF:mode>_2_i387"
+  [(set (match_operand:X87MODEF 0 "register_operand" "=f")
+       (match_operator:X87MODEF 3 "binary_fp_operator"
+         [(float:X87MODEF
             (match_operand:SWI24 1 "nonimmediate_operand" "m"))
-          (match_operand:MODEF 2 "register_operand" "0")]))]
-  "TARGET_80387 && X87_ENABLE_FLOAT (<MODEF:MODE>mode, <SWI24:MODE>mode)
-   && !(SSE_FLOAT_MODE_P (<MODEF:MODE>mode) && TARGET_SSE_MATH)
+          (match_operand:X87MODEF 2 "register_operand" "0")]))]
+  "TARGET_80387 && X87_ENABLE_FLOAT (<X87MODEF:MODE>mode, <SWI24:MODE>mode)
+   && !(SSE_FLOAT_MODE_P (<X87MODEF:MODE>mode) && TARGET_SSE_MATH)
    && (TARGET_USE_<SWI24:MODE>MODE_FIOP
        || optimize_function_for_size_p (cfun))"
   "* return output_387_binary_op (insn, operands);"
   [(set (attr "type")
-        (cond [(match_operand:MODEF 3 "mult_operator")
-                 (const_string "fmul")
-               (match_operand:MODEF 3 "div_operator")
-                 (const_string "fdiv")
-              ]
-              (const_string "fop")))
+       (cond [(match_operand:X87MODEF 3 "mult_operator")
+                (const_string "fmul")
+              (match_operand:X87MODEF 3 "div_operator")
+                (const_string "fdiv")
+             ]
+             (const_string "fop")))
    (set_attr "fp_int_src" "true")
    (set_attr "mode" "<SWI24:MODE>")])
 
-(define_insn "*fop_<MODEF:mode>_3_i387"
-  [(set (match_operand:MODEF 0 "register_operand" "=f")
-       (match_operator:MODEF 3 "binary_fp_operator"
-         [(match_operand:MODEF 1 "register_operand" "0")
-          (float:MODEF
+(define_insn "*fop_<X87MODEF:mode>_3_i387"
+  [(set (match_operand:X87MODEF 0 "register_operand" "=f")
+       (match_operator:X87MODEF 3 "binary_fp_operator"
+         [(match_operand:X87MODEF 1 "register_operand" "0")
+          (float:X87MODEF
             (match_operand:SWI24 2 "nonimmediate_operand" "m"))]))]
-  "TARGET_80387 && X87_ENABLE_FLOAT (<MODEF:MODE>mode, <SWI24:MODE>mode)
-   && !(SSE_FLOAT_MODE_P (<MODEF:MODE>mode) && TARGET_SSE_MATH)
+  "TARGET_80387 && X87_ENABLE_FLOAT (<X87MODEF:MODE>mode, <SWI24:MODE>mode)
+   && !(SSE_FLOAT_MODE_P (<X87MODEF:MODE>mode) && TARGET_SSE_MATH)
    && (TARGET_USE_<SWI24:MODE>MODE_FIOP
        || optimize_function_for_size_p (cfun))"
   "* return output_387_binary_op (insn, operands);"
   [(set (attr "type")
-        (cond [(match_operand:MODEF 3 "mult_operator")
-                 (const_string "fmul")
-               (match_operand:MODEF 3 "div_operator")
-                 (const_string "fdiv")
-              ]
-              (const_string "fop")))
-   (set_attr "fp_int_src" "true")
-   (set_attr "mode" "<MODE>")])
-
-(define_insn "*fop_df_4_i387"
-  [(set (match_operand:DF 0 "register_operand" "=f,f")
-       (match_operator:DF 3 "binary_fp_operator"
-          [(float_extend:DF
-            (match_operand:SF 1 "nonimmediate_operand" "fm,0"))
-           (match_operand:DF 2 "register_operand" "0,f")]))]
-  "TARGET_80387 && X87_ENABLE_ARITH (DFmode)
-   && !(SSE_FLOAT_MODE_P (DFmode) && TARGET_SSE_MATH)"
-  "* return output_387_binary_op (insn, operands);"
-  [(set (attr "type")
-        (cond [(match_operand:DF 3 "mult_operator")
-                 (const_string "fmul")
-               (match_operand:DF 3 "div_operator")
-                 (const_string "fdiv")
-              ]
-              (const_string "fop")))
-   (set_attr "mode" "SF")])
-
-(define_insn "*fop_df_5_i387"
-  [(set (match_operand:DF 0 "register_operand" "=f,f")
-       (match_operator:DF 3 "binary_fp_operator"
-         [(match_operand:DF 1 "register_operand" "0,f")
-          (float_extend:DF
-           (match_operand:SF 2 "nonimmediate_operand" "fm,0"))]))]
-  "TARGET_80387 && X87_ENABLE_ARITH (DFmode)
-   && !(SSE_FLOAT_MODE_P (DFmode) && TARGET_SSE_MATH)"
-  "* return output_387_binary_op (insn, operands);"
-  [(set (attr "type")
-        (cond [(match_operand:DF 3 "mult_operator")
-                 (const_string "fmul")
-               (match_operand:DF 3 "div_operator")
-                 (const_string "fdiv")
-              ]
-              (const_string "fop")))
-   (set_attr "mode" "SF")])
-
-(define_insn "*fop_df_6_i387"
-  [(set (match_operand:DF 0 "register_operand" "=f,f")
-       (match_operator:DF 3 "binary_fp_operator"
-         [(float_extend:DF
-           (match_operand:SF 1 "register_operand" "0,f"))
-          (float_extend:DF
-           (match_operand:SF 2 "nonimmediate_operand" "fm,0"))]))]
-  "TARGET_80387 && X87_ENABLE_ARITH (DFmode)
-   && !(SSE_FLOAT_MODE_P (DFmode) && TARGET_SSE_MATH)"
-  "* return output_387_binary_op (insn, operands);"
-  [(set (attr "type")
-        (cond [(match_operand:DF 3 "mult_operator")
-                 (const_string "fmul")
-               (match_operand:DF 3 "div_operator")
-                 (const_string "fdiv")
-              ]
-              (const_string "fop")))
-   (set_attr "mode" "SF")])
-
-(define_insn "*fop_xf_comm_i387"
-  [(set (match_operand:XF 0 "register_operand" "=f")
-       (match_operator:XF 3 "binary_fp_operator"
-                       [(match_operand:XF 1 "register_operand" "%0")
-                        (match_operand:XF 2 "register_operand" "f")]))]
-  "TARGET_80387
-   && COMMUTATIVE_ARITH_P (operands[3])"
-  "* return output_387_binary_op (insn, operands);"
-  [(set (attr "type")
-        (if_then_else (match_operand:XF 3 "mult_operator")
-           (const_string "fmul")
-           (const_string "fop")))
-   (set_attr "mode" "XF")])
-
-(define_insn "*fop_xf_1_i387"
-  [(set (match_operand:XF 0 "register_operand" "=f,f")
-       (match_operator:XF 3 "binary_fp_operator"
-                       [(match_operand:XF 1 "register_operand" "0,f")
-                        (match_operand:XF 2 "register_operand" "f,0")]))]
-  "TARGET_80387
-   && !COMMUTATIVE_ARITH_P (operands[3])"
-  "* return output_387_binary_op (insn, operands);"
-  [(set (attr "type")
-        (if_then_else (match_operand:XF 3 "div_operator")
-           (const_string "fdiv")
-           (const_string "fop")))
-   (set_attr "mode" "XF")])
-
-(define_insn "*fop_xf_2_i387"
-  [(set (match_operand:XF 0 "register_operand" "=f")
-       (match_operator:XF 3 "binary_fp_operator"
-         [(float:XF
-            (match_operand:SWI24 1 "nonimmediate_operand" "m"))
-          (match_operand:XF 2 "register_operand" "0")]))]
-  "TARGET_80387
-   && (TARGET_USE_<MODE>MODE_FIOP || optimize_function_for_size_p (cfun))"
-  "* return output_387_binary_op (insn, operands);"
-  [(set (attr "type")
-        (cond [(match_operand:XF 3 "mult_operator")
-                 (const_string "fmul")
-               (match_operand:XF 3 "div_operator")
-                 (const_string "fdiv")
-              ]
-              (const_string "fop")))
-   (set_attr "fp_int_src" "true")
-   (set_attr "mode" "<MODE>")])
-
-(define_insn "*fop_xf_3_i387"
-  [(set (match_operand:XF 0 "register_operand" "=f")
-       (match_operator:XF 3 "binary_fp_operator"
-         [(match_operand:XF 1 "register_operand" "0")
-          (float:XF
-            (match_operand:SWI24 2 "nonimmediate_operand" "m"))]))]
-  "TARGET_80387
-   && (TARGET_USE_<MODE>MODE_FIOP || optimize_function_for_size_p (cfun))"
-  "* return output_387_binary_op (insn, operands);"
-  [(set (attr "type")
-        (cond [(match_operand:XF 3 "mult_operator")
-                 (const_string "fmul")
-               (match_operand:XF 3 "div_operator")
-                 (const_string "fdiv")
-              ]
-              (const_string "fop")))
+       (cond [(match_operand:X87MODEF 3 "mult_operator")
+                (const_string "fmul")
+              (match_operand:X87MODEF 3 "div_operator")
+                (const_string "fdiv")
+             ]
+             (const_string "fop")))
    (set_attr "fp_int_src" "true")
    (set_attr "mode" "<MODE>")])
 
   "TARGET_80387"
   "* return output_387_binary_op (insn, operands);"
   [(set (attr "type")
-        (cond [(match_operand:XF 3 "mult_operator")
-                 (const_string "fmul")
-               (match_operand:XF 3 "div_operator")
-                 (const_string "fdiv")
-              ]
-              (const_string "fop")))
-   (set_attr "mode" "<MODE>")])
+       (cond [(match_operand:XF 3 "mult_operator")
+                (const_string "fmul")
+              (match_operand:XF 3 "div_operator")
+                (const_string "fdiv")
+             ]
+             (const_string "fop")))
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "*fop_df_4_i387"
+  [(set (match_operand:DF 0 "register_operand" "=f,f")
+       (match_operator:DF 3 "binary_fp_operator"
+          [(float_extend:DF
+            (match_operand:SF 1 "nonimmediate_operand" "fm,0"))
+           (match_operand:DF 2 "register_operand" "0,f")]))]
+  "TARGET_80387 && X87_ENABLE_ARITH (DFmode)
+   && !(SSE_FLOAT_MODE_P (DFmode) && TARGET_SSE_MATH)"
+  "* return output_387_binary_op (insn, operands);"
+  [(set (attr "type")
+       (cond [(match_operand:DF 3 "mult_operator")
+                (const_string "fmul")
+              (match_operand:DF 3 "div_operator")
+                (const_string "fdiv")
+             ]
+             (const_string "fop")))
+   (set_attr "mode" "SF")])
 
 (define_insn "*fop_xf_5_i387"
   [(set (match_operand:XF 0 "register_operand" "=f,f")
   "TARGET_80387"
   "* return output_387_binary_op (insn, operands);"
   [(set (attr "type")
-        (cond [(match_operand:XF 3 "mult_operator")
-                 (const_string "fmul")
-               (match_operand:XF 3 "div_operator")
-                 (const_string "fdiv")
-              ]
-              (const_string "fop")))
+       (cond [(match_operand:XF 3 "mult_operator")
+                (const_string "fmul")
+              (match_operand:XF 3 "div_operator")
+                (const_string "fdiv")
+             ]
+             (const_string "fop")))
    (set_attr "mode" "<MODE>")])
 
+(define_insn "*fop_df_5_i387"
+  [(set (match_operand:DF 0 "register_operand" "=f,f")
+       (match_operator:DF 3 "binary_fp_operator"
+         [(match_operand:DF 1 "register_operand" "0,f")
+          (float_extend:DF
+           (match_operand:SF 2 "nonimmediate_operand" "fm,0"))]))]
+  "TARGET_80387 && X87_ENABLE_ARITH (DFmode)
+   && !(SSE_FLOAT_MODE_P (DFmode) && TARGET_SSE_MATH)"
+  "* return output_387_binary_op (insn, operands);"
+  [(set (attr "type")
+       (cond [(match_operand:DF 3 "mult_operator")
+                (const_string "fmul")
+              (match_operand:DF 3 "div_operator")
+                (const_string "fdiv")
+             ]
+             (const_string "fop")))
+   (set_attr "mode" "SF")])
+
 (define_insn "*fop_xf_6_i387"
   [(set (match_operand:XF 0 "register_operand" "=f,f")
        (match_operator:XF 3 "binary_fp_operator"
   "TARGET_80387"
   "* return output_387_binary_op (insn, operands);"
   [(set (attr "type")
-        (cond [(match_operand:XF 3 "mult_operator")
-                 (const_string "fmul")
-               (match_operand:XF 3 "div_operator")
-                 (const_string "fdiv")
-              ]
-              (const_string "fop")))
+       (cond [(match_operand:XF 3 "mult_operator")
+                (const_string "fmul")
+              (match_operand:XF 3 "div_operator")
+                (const_string "fdiv")
+             ]
+             (const_string "fop")))
    (set_attr "mode" "<MODE>")])
+
+(define_insn "*fop_df_6_i387"
+  [(set (match_operand:DF 0 "register_operand" "=f,f")
+       (match_operator:DF 3 "binary_fp_operator"
+         [(float_extend:DF
+           (match_operand:SF 1 "register_operand" "0,f"))
+          (float_extend:DF
+           (match_operand:SF 2 "nonimmediate_operand" "fm,0"))]))]
+  "TARGET_80387 && X87_ENABLE_ARITH (DFmode)
+   && !(SSE_FLOAT_MODE_P (DFmode) && TARGET_SSE_MATH)"
+  "* return output_387_binary_op (insn, operands);"
+  [(set (attr "type")
+       (cond [(match_operand:DF 3 "mult_operator")
+                (const_string "fmul")
+              (match_operand:DF 3 "div_operator")
+                (const_string "fdiv")
+             ]
+             (const_string "fop")))
+   (set_attr "mode" "SF")])
 \f
 ;; FPU special functions.
 
    (set_attr "bdver1_decode" "direct")])
 
 (define_insn "*rsqrtsf2_sse"
-  [(set (match_operand:SF 0 "register_operand" "=x,x")
-       (unspec:SF [(match_operand:SF 1 "nonimmediate_operand" "x,m")]
+  [(set (match_operand:SF 0 "register_operand" "=x,x,x")
+       (unspec:SF [(match_operand:SF 1 "nonimmediate_operand" "0,x,m")]
                   UNSPEC_RSQRT))]
   "TARGET_SSE && TARGET_SSE_MATH"
   "@
+   %vrsqrtss\t{%d1, %0|%0, %d1}
    %vrsqrtss\t{%d1, %0|%0, %d1}
    %vrsqrtss\t{%1, %d0|%d0, %1}"
   [(set_attr "type" "sse")
    (set_attr "atom_sse_attr" "rcp")
    (set_attr "btver2_sse_attr" "rcp")
    (set_attr "prefix" "maybe_vex")
-   (set_attr "mode" "SF")])
+   (set_attr "mode" "SF")
+   (set (attr "preferred_for_speed")
+     (cond [(eq_attr "alternative" "1")
+             (symbol_ref "TARGET_AVX || !TARGET_SSE_PARTIAL_REG_DEPENDENCY")
+          (eq_attr "alternative" "2")
+             (symbol_ref "!TARGET_SSE_PARTIAL_REG_DEPENDENCY")
+          ]
+          (symbol_ref "true")))])
 
 (define_expand "rsqrtsf2"
   [(set (match_operand:SF 0 "register_operand")
 })
 
 (define_insn "*sqrt<mode>2_sse"
-  [(set (match_operand:MODEF 0 "register_operand" "=v,v")
+  [(set (match_operand:MODEF 0 "register_operand" "=v,v,v")
        (sqrt:MODEF
-         (match_operand:MODEF 1 "nonimmediate_operand" "v,m")))]
+         (match_operand:MODEF 1 "nonimmediate_operand" "0,v,m")))]
   "SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH"
   "@
+   %vsqrt<ssemodesuffix>\t{%d1, %0|%0, %d1}
    %vsqrt<ssemodesuffix>\t{%d1, %0|%0, %d1}
    %vsqrt<ssemodesuffix>\t{%1, %d0|%d0, %1}"
   [(set_attr "type" "sse")
    (set_attr "btver2_sse_attr" "sqrt")
    (set_attr "prefix" "maybe_vex")
    (set_attr "mode" "<MODE>")
-   (set_attr "athlon_decode" "*")
-   (set_attr "amdfam10_decode" "*")
-   (set_attr "bdver1_decode" "*")])
+   (set (attr "preferred_for_speed")
+     (cond [(eq_attr "alternative" "1")
+             (symbol_ref "TARGET_AVX || !TARGET_SSE_PARTIAL_REG_DEPENDENCY")
+          (eq_attr "alternative" "2")
+             (symbol_ref "!TARGET_SSE_PARTIAL_REG_DEPENDENCY")
+          ]
+          (symbol_ref "true")))])
 
 (define_expand "sqrt<mode>2"
   [(set (match_operand:MODEF 0 "register_operand")
    }
 })
 
+(define_expand "hypot<mode>3"
+  [(use (match_operand:MODEF 0 "register_operand"))
+   (use (match_operand:MODEF 1 "general_operand"))
+   (use (match_operand:MODEF 2 "general_operand"))]
+  "TARGET_USE_FANCY_MATH_387
+   && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
+       || TARGET_MIX_SSE_I387)
+   && flag_finite_math_only
+   && flag_unsafe_math_optimizations"
+{
+  rtx op0 = gen_reg_rtx (XFmode);
+  rtx op1 = gen_reg_rtx (XFmode);
+  rtx op2 = gen_reg_rtx (XFmode);
+
+  emit_insn (gen_extend<mode>xf2 (op2, operands[2]));
+  emit_insn (gen_extend<mode>xf2 (op1, operands[1]));
+
+  emit_insn (gen_mulxf3 (op1, op1, op1));
+  emit_insn (gen_mulxf3 (op2, op2, op2));
+  emit_insn (gen_addxf3 (op0, op2, op1));
+  emit_insn (gen_sqrtxf2 (op0, op0));
+
+  emit_insn (gen_truncxf<mode>2 (operands[0], op0));
+  DONE;
+})
+
+(define_insn "x86_fnstsw_1"
+  [(set (match_operand:HI 0 "register_operand" "=a")
+       (unspec:HI [(reg:CCFP FPSR_REG)] UNSPEC_FNSTSW))]
+  "TARGET_80387"
+  "fnstsw\t%0"
+  [(set_attr "length" "2")
+   (set_attr "mode" "SI")
+   (set_attr "unit" "i387")])
+
 (define_insn "fpremxf4_i387"
   [(set (match_operand:XF 0 "register_operand" "=f")
        (unspec:XF [(match_operand:XF 2 "register_operand" "0")
                    (match_operand:XF 3 "register_operand" "1")]
                   UNSPEC_FPREM_F))
-   (set (match_operand:XF 1 "register_operand" "=u")
+   (set (match_operand:XF 1 "register_operand" "=f")
        (unspec:XF [(match_dup 2) (match_dup 3)]
                   UNSPEC_FPREM_U))
    (set (reg:CCFP FPSR_REG)
        (unspec:XF [(match_operand:XF 2 "register_operand" "0")
                    (match_operand:XF 3 "register_operand" "1")]
                   UNSPEC_FPREM1_F))
-   (set (match_operand:XF 1 "register_operand" "=u")
+   (set (match_operand:XF 1 "register_operand" "=f")
        (unspec:XF [(match_dup 2) (match_dup 3)]
                   UNSPEC_FPREM1_U))
    (set (reg:CCFP FPSR_REG)
   [(set (match_operand:XF 0 "register_operand" "=f")
        (unspec:XF [(match_operand:XF 2 "register_operand" "0")]
                   UNSPEC_SINCOS_COS))
-   (set (match_operand:XF 1 "register_operand" "=u")
+   (set (match_operand:XF 1 "register_operand" "=f")
         (unspec:XF [(match_dup 2)] UNSPEC_SINCOS_SIN))]
   "TARGET_USE_FANCY_MATH_387
    && flag_unsafe_math_optimizations"
 (define_insn "fptanxf4_i387"
   [(set (match_operand:SF 0 "register_operand" "=f")
        (match_operand:SF 3 "const1_operand"))
-   (set (match_operand:XF 1 "register_operand" "=u")
+   (set (match_operand:XF 1 "register_operand" "=f")
         (unspec:XF [(match_operand:XF 2 "register_operand" "0")]
                   UNSPEC_TAN))]
   "TARGET_USE_FANCY_MATH_387
 (define_insn "atan2xf3"
   [(set (match_operand:XF 0 "register_operand" "=f")
         (unspec:XF [(match_operand:XF 1 "register_operand" "0")
-                   (match_operand:XF 2 "register_operand" "u")]
+                   (match_operand:XF 2 "register_operand" "f")]
                   UNSPEC_FPATAN))
    (clobber (match_scratch:XF 3 "=2"))]
   "TARGET_USE_FANCY_MATH_387
   DONE;
 })
 
+(define_expand "sinhxf2"
+  [(use (match_operand:XF 0 "register_operand"))
+   (use (match_operand:XF 1 "register_operand"))]
+  "TARGET_USE_FANCY_MATH_387
+   && flag_finite_math_only
+   && flag_unsafe_math_optimizations"
+{
+  ix86_emit_i387_sinh (operands[0], operands[1]);
+  DONE;
+})
+
+(define_expand "sinh<mode>2"
+  [(use (match_operand:MODEF 0 "register_operand"))
+   (use (match_operand:MODEF 1 "general_operand"))]
+  "TARGET_USE_FANCY_MATH_387
+   && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
+       || TARGET_MIX_SSE_I387)
+   && flag_finite_math_only
+   && flag_unsafe_math_optimizations"
+{
+  rtx op0 = gen_reg_rtx (XFmode);
+  rtx op1 = gen_reg_rtx (XFmode);
+
+  emit_insn (gen_extend<mode>xf2 (op1, operands[1]));
+  emit_insn (gen_sinhxf2 (op0, op1));
+  emit_insn (gen_truncxf<mode>2 (operands[0], op0));
+  DONE;
+})
+
+(define_expand "coshxf2"
+  [(use (match_operand:XF 0 "register_operand"))
+   (use (match_operand:XF 1 "register_operand"))]
+  "TARGET_USE_FANCY_MATH_387
+   && flag_unsafe_math_optimizations"
+{
+  ix86_emit_i387_cosh (operands[0], operands[1]);
+  DONE;
+})
+
+(define_expand "cosh<mode>2"
+  [(use (match_operand:MODEF 0 "register_operand"))
+   (use (match_operand:MODEF 1 "general_operand"))]
+  "TARGET_USE_FANCY_MATH_387
+   && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
+       || TARGET_MIX_SSE_I387)
+   && flag_unsafe_math_optimizations"
+{
+  rtx op0 = gen_reg_rtx (XFmode);
+  rtx op1 = gen_reg_rtx (XFmode);
+
+  emit_insn (gen_extend<mode>xf2 (op1, operands[1]));
+  emit_insn (gen_coshxf2 (op0, op1));
+  emit_insn (gen_truncxf<mode>2 (operands[0], op0));
+  DONE;
+})
+
+(define_expand "tanhxf2"
+  [(use (match_operand:XF 0 "register_operand"))
+   (use (match_operand:XF 1 "register_operand"))]
+  "TARGET_USE_FANCY_MATH_387
+   && flag_unsafe_math_optimizations"
+{
+  ix86_emit_i387_tanh (operands[0], operands[1]);
+  DONE;
+})
+
+(define_expand "tanh<mode>2"
+  [(use (match_operand:MODEF 0 "register_operand"))
+   (use (match_operand:MODEF 1 "general_operand"))]
+  "TARGET_USE_FANCY_MATH_387
+   && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
+       || TARGET_MIX_SSE_I387)
+   && flag_unsafe_math_optimizations"
+{
+  rtx op0 = gen_reg_rtx (XFmode);
+  rtx op1 = gen_reg_rtx (XFmode);
+
+  emit_insn (gen_extend<mode>xf2 (op1, operands[1]));
+  emit_insn (gen_tanhxf2 (op0, op1));
+  emit_insn (gen_truncxf<mode>2 (operands[0], op0));
+  DONE;
+})
+
+(define_expand "asinhxf2"
+  [(use (match_operand:XF 0 "register_operand"))
+   (use (match_operand:XF 1 "register_operand"))]
+  "TARGET_USE_FANCY_MATH_387
+   && flag_finite_math_only
+   && flag_unsafe_math_optimizations"
+{
+  ix86_emit_i387_asinh (operands[0], operands[1]);
+  DONE;
+})
+
+(define_expand "asinh<mode>2"
+  [(use (match_operand:MODEF 0 "register_operand"))
+   (use (match_operand:MODEF 1 "general_operand"))]
+  "TARGET_USE_FANCY_MATH_387
+   && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
+       || TARGET_MIX_SSE_I387)
+   && flag_finite_math_only
+   && flag_unsafe_math_optimizations"
+{
+  rtx op0 = gen_reg_rtx (XFmode);
+  rtx op1 = gen_reg_rtx (XFmode);
+
+  emit_insn (gen_extend<mode>xf2 (op1, operands[1]));
+  emit_insn (gen_asinhxf2 (op0, op1));
+  emit_insn (gen_truncxf<mode>2 (operands[0], op0));
+  DONE;
+})
+
+(define_expand "acoshxf2"
+  [(use (match_operand:XF 0 "register_operand"))
+   (use (match_operand:XF 1 "register_operand"))]
+  "TARGET_USE_FANCY_MATH_387
+   && flag_unsafe_math_optimizations"
+{
+  ix86_emit_i387_acosh (operands[0], operands[1]);
+  DONE;
+})
+
+(define_expand "acosh<mode>2"
+  [(use (match_operand:MODEF 0 "register_operand"))
+   (use (match_operand:MODEF 1 "general_operand"))]
+  "TARGET_USE_FANCY_MATH_387
+   && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
+       || TARGET_MIX_SSE_I387)
+   && flag_unsafe_math_optimizations"
+{
+  rtx op0 = gen_reg_rtx (XFmode);
+  rtx op1 = gen_reg_rtx (XFmode);
+
+  emit_insn (gen_extend<mode>xf2 (op1, operands[1]));
+  emit_insn (gen_acoshxf2 (op0, op1));
+  emit_insn (gen_truncxf<mode>2 (operands[0], op0));
+  DONE;
+})
+
+(define_expand "atanhxf2"
+  [(use (match_operand:XF 0 "register_operand"))
+   (use (match_operand:XF 1 "register_operand"))]
+  "TARGET_USE_FANCY_MATH_387
+   && flag_unsafe_math_optimizations"
+{
+  ix86_emit_i387_atanh (operands[0], operands[1]);
+  DONE;
+})
+
+(define_expand "atanh<mode>2"
+  [(use (match_operand:MODEF 0 "register_operand"))
+   (use (match_operand:MODEF 1 "general_operand"))]
+  "TARGET_USE_FANCY_MATH_387
+   && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
+       || TARGET_MIX_SSE_I387)
+   && flag_unsafe_math_optimizations"
+{
+  rtx op0 = gen_reg_rtx (XFmode);
+  rtx op1 = gen_reg_rtx (XFmode);
+
+  emit_insn (gen_extend<mode>xf2 (op1, operands[1]));
+  emit_insn (gen_atanhxf2 (op0, op1));
+  emit_insn (gen_truncxf<mode>2 (operands[0], op0));
+  DONE;
+})
+
 (define_insn "fyl2xxf3_i387"
   [(set (match_operand:XF 0 "register_operand" "=f")
         (unspec:XF [(match_operand:XF 1 "register_operand" "0")
-                   (match_operand:XF 2 "register_operand" "u")]
+                   (match_operand:XF 2 "register_operand" "f")]
                   UNSPEC_FYL2X))
    (clobber (match_scratch:XF 3 "=2"))]
   "TARGET_USE_FANCY_MATH_387
 (define_insn "fyl2xp1xf3_i387"
   [(set (match_operand:XF 0 "register_operand" "=f")
         (unspec:XF [(match_operand:XF 1 "register_operand" "0")
-                   (match_operand:XF 2 "register_operand" "u")]
+                   (match_operand:XF 2 "register_operand" "f")]
                   UNSPEC_FYL2XP1))
    (clobber (match_scratch:XF 3 "=2"))]
   "TARGET_USE_FANCY_MATH_387
   [(set (match_operand:XF 0 "register_operand" "=f")
        (unspec:XF [(match_operand:XF 2 "register_operand" "0")]
                   UNSPEC_XTRACT_FRACT))
-   (set (match_operand:XF 1 "register_operand" "=u")
+   (set (match_operand:XF 1 "register_operand" "=f")
         (unspec:XF [(match_dup 2)] UNSPEC_XTRACT_EXP))]
   "TARGET_USE_FANCY_MATH_387
    && flag_unsafe_math_optimizations"
        (unspec:XF [(match_operand:XF 2 "register_operand" "0")
                    (match_operand:XF 3 "register_operand" "1")]
                   UNSPEC_FSCALE_FRACT))
-   (set (match_operand:XF 1 "register_operand" "=u")
+   (set (match_operand:XF 1 "register_operand" "=f")
        (unspec:XF [(match_dup 2) (match_dup 3)]
                   UNSPEC_FSCALE_EXP))]
   "TARGET_USE_FANCY_MATH_387
 \f
 
 (define_insn "sse4_1_round<mode>2"
-  [(set (match_operand:MODEF 0 "register_operand" "=x,v")
-       (unspec:MODEF [(match_operand:MODEF 1 "nonimmediate_operand" "xm,vm")
-                      (match_operand:SI 2 "const_0_to_15_operand" "n,n")]
-                     UNSPEC_ROUND))]
+  [(set (match_operand:MODEF 0 "register_operand" "=x,x,x,v")
+       (unspec:MODEF
+         [(match_operand:MODEF 1 "nonimmediate_operand" "0,x,m,vm")
+          (match_operand:SI 2 "const_0_to_15_operand" "n,n,n,n")]
+         UNSPEC_ROUND))]
   "TARGET_SSE4_1"
   "@
+   %vround<ssemodesuffix>\t{%2, %d1, %0|%0, %d1, %2}
+   %vround<ssemodesuffix>\t{%2, %d1, %0|%0, %d1, %2}
    %vround<ssemodesuffix>\t{%2, %1, %d0|%d0, %1, %2}
    vrndscale<ssemodesuffix>\t{%2, %1, %d0|%d0, %1, %2}"
   [(set_attr "type" "ssecvt")
-   (set_attr "prefix_extra" "1,*")
-   (set_attr "length_immediate" "*,1")
-   (set_attr "prefix" "maybe_vex,evex")
-   (set_attr "isa" "noavx512f,avx512f")
-   (set_attr "mode" "<MODE>")])
+   (set_attr "prefix_extra" "1,1,1,*")
+   (set_attr "length_immediate" "*,*,*,1")
+   (set_attr "prefix" "maybe_vex,maybe_vex,maybe_vex,evex")
+   (set_attr "isa" "noavx512f,noavx512f,noavx512f,avx512f")
+   (set_attr "mode" "<MODE>")
+   (set (attr "preferred_for_speed")
+     (cond [(eq_attr "alternative" "1")
+             (symbol_ref "TARGET_AVX || !TARGET_SSE_PARTIAL_REG_DEPENDENCY")
+          (eq_attr "alternative" "2")
+             (symbol_ref "!TARGET_SSE_PARTIAL_REG_DEPENDENCY")
+          ]
+          (symbol_ref "true")))])
 
 (define_insn "rintxf2"
   [(set (match_operand:XF 0 "register_operand" "=f")
 (define_expand "rint<mode>2"
   [(use (match_operand:MODEF 0 "register_operand"))
    (use (match_operand:MODEF 1 "nonimmediate_operand"))]
-  "TARGET_USE_FANCY_MATH_387
-   || (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)"
-{
-  if (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
-    {
-      if (TARGET_SSE4_1)
-       emit_insn (gen_sse4_1_round<mode>2
-                  (operands[0], operands[1], GEN_INT (ROUND_MXCSR)));
-      else
-       ix86_expand_rint (operands[0], operands[1]);
-    }
+  "TARGET_USE_FANCY_MATH_387
+   || (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)"
+{
+  if (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
+    {
+      if (TARGET_SSE4_1)
+       emit_insn (gen_sse4_1_round<mode>2
+                  (operands[0], operands[1], GEN_INT (ROUND_MXCSR)));
+      else
+       ix86_expand_rint (operands[0], operands[1]);
+    }
+  else
+    {
+      rtx op0 = gen_reg_rtx (XFmode);
+      rtx op1 = gen_reg_rtx (XFmode);
+
+      emit_insn (gen_extend<mode>xf2 (op1, operands[1]));
+      emit_insn (gen_rintxf2 (op0, op1));
+      emit_insn (gen_truncxf<mode>2_i387_noop_unspec (operands[0], op0));
+    }
+  DONE;
+})
+
+(define_expand "nearbyintxf2"
+  [(set (match_operand:XF 0 "register_operand")
+       (unspec:XF [(match_operand:XF 1 "register_operand")]
+                  UNSPEC_FRNDINT))]
+  "TARGET_USE_FANCY_MATH_387
+   && !flag_trapping_math")
+
+(define_expand "nearbyint<mode>2"
+  [(use (match_operand:MODEF 0 "register_operand"))
+   (use (match_operand:MODEF 1 "nonimmediate_operand"))]
+  "(TARGET_USE_FANCY_MATH_387
+    && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
+         || TARGET_MIX_SSE_I387)
+    && !flag_trapping_math)
+   || (TARGET_SSE4_1 && TARGET_SSE_MATH)"
+{
+  if (TARGET_SSE4_1 && TARGET_SSE_MATH)
+    emit_insn (gen_sse4_1_round<mode>2
+              (operands[0], operands[1], GEN_INT (ROUND_MXCSR
+                                                  | ROUND_NO_EXC)));
   else
     {
       rtx op0 = gen_reg_rtx (XFmode);
       rtx op1 = gen_reg_rtx (XFmode);
 
       emit_insn (gen_extend<mode>xf2 (op1, operands[1]));
-      emit_insn (gen_rintxf2 (op0, op1));
+      emit_insn (gen_nearbyintxf2 (op0, op1));
       emit_insn (gen_truncxf<mode>2_i387_noop_unspec (operands[0], op0));
     }
   DONE;
   [(set (match_operand:DI 0 "nonimmediate_operand" "=m")
        (unspec:DI [(match_operand:XF 1 "register_operand" "f")]
                   UNSPEC_FIST))
-   (clobber (match_scratch:XF 2 "=&1f"))]
+   (clobber (match_scratch:XF 2 "=&f"))]
   "TARGET_USE_FANCY_MATH_387"
   "* return output_fix_trunc (insn, operands, false);"
   [(set_attr "type" "fpspc")
   DONE;
 })
 
-;; Rounding mode control word calculation could clobber FLAGS_REG.
-(define_insn_and_split "frndintxf2_mask_pm"
-  [(set (match_operand:XF 0 "register_operand")
-       (unspec:XF [(match_operand:XF 1 "register_operand")]
-                  UNSPEC_FRNDINT_MASK_PM))
-   (clobber (reg:CC FLAGS_REG))]
-  "TARGET_USE_FANCY_MATH_387
-   && flag_unsafe_math_optimizations
-   && can_create_pseudo_p ()"
-  "#"
-  "&& 1"
-  [(const_int 0)]
-{
-  ix86_optimize_mode_switching[I387_MASK_PM] = 1;
-
-  operands[2] = assign_386_stack_local (HImode, SLOT_CW_STORED);
-  operands[3] = assign_386_stack_local (HImode, SLOT_CW_MASK_PM);
-
-  emit_insn (gen_frndintxf2_mask_pm_i387 (operands[0], operands[1],
-                                         operands[2], operands[3]));
-  DONE;
-}
-  [(set_attr "type" "frndint")
-   (set_attr "i387_cw" "mask_pm")
-   (set_attr "mode" "XF")])
-
-(define_insn "frndintxf2_mask_pm_i387"
-  [(set (match_operand:XF 0 "register_operand" "=f")
-       (unspec:XF [(match_operand:XF 1 "register_operand" "0")]
-                  UNSPEC_FRNDINT_MASK_PM))
-   (use (match_operand:HI 2 "memory_operand" "m"))
-   (use (match_operand:HI 3 "memory_operand" "m"))]
-  "TARGET_USE_FANCY_MATH_387
-   && flag_unsafe_math_optimizations"
-  "fldcw\t%3\n\tfrndint\n\tfclex\n\tfldcw\t%2"
-  [(set_attr "type" "frndint")
-   (set_attr "i387_cw" "mask_pm")
-   (set_attr "mode" "XF")])
-
-(define_expand "nearbyintxf2"
-  [(parallel [(set (match_operand:XF 0 "register_operand")
-                  (unspec:XF [(match_operand:XF 1 "register_operand")]
-                             UNSPEC_FRNDINT_MASK_PM))
-             (clobber (reg:CC FLAGS_REG))])]
-  "TARGET_USE_FANCY_MATH_387
-   && flag_unsafe_math_optimizations")
-
-(define_expand "nearbyint<mode>2"
-  [(use (match_operand:MODEF 0 "register_operand"))
-   (use (match_operand:MODEF 1 "register_operand"))]
-  "TARGET_USE_FANCY_MATH_387
-   && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
-       || TARGET_MIX_SSE_I387)
-   && flag_unsafe_math_optimizations"
-{
-  rtx op0 = gen_reg_rtx (XFmode);
-  rtx op1 = gen_reg_rtx (XFmode);
-
-  emit_insn (gen_extend<mode>xf2 (op1, operands[1]));
-  emit_insn (gen_frndintxf2_mask_pm (op0, op1));
-  emit_insn (gen_truncxf<mode>2 (operands[0], op0));
-  DONE;
-})
-
 ;; Rounding mode control word calculation could clobber FLAGS_REG.
 (define_insn_and_split "*fist<mode>2_<rounding>_1"
   [(set (match_operand:SWI248x 0 "nonimmediate_operand")
                   FIST_ROUNDING))
    (use (match_operand:HI 2 "memory_operand" "m"))
    (use (match_operand:HI 3 "memory_operand" "m"))
-   (clobber (match_scratch:XF 4 "=&1f"))]
+   (clobber (match_scratch:XF 4 "=&f"))]
   "TARGET_USE_FANCY_MATH_387
    && flag_unsafe_math_optimizations"
   "* return output_fix_trunc (insn, operands, false);"
    (set_attr "unit" "i387")
    (set_attr "mode" "<MODE>")])
 
-(define_insn_and_split "fxam<mode>2_i387_with_temp"
-  [(set (match_operand:HI 0 "register_operand")
-       (unspec:HI
-         [(match_operand:MODEF 1 "memory_operand")]
-         UNSPEC_FXAM_MEM))]
-  "TARGET_USE_FANCY_MATH_387
-   && can_create_pseudo_p ()"
-  "#"
-  "&& 1"
-  [(set (match_dup 2)(match_dup 1))
-   (set (match_dup 0)
-       (unspec:HI [(match_dup 2)] UNSPEC_FXAM))]
-{
-  operands[2] = gen_reg_rtx (<MODE>mode);
-
-  MEM_VOLATILE_P (operands[1]) = 1;
-}
-  [(set_attr "type" "multi")
-   (set_attr "unit" "i387")
-   (set_attr "mode" "<MODE>")])
-
-(define_expand "isinfxf2"
-  [(use (match_operand:SI 0 "register_operand"))
-   (use (match_operand:XF 1 "register_operand"))]
-  "TARGET_USE_FANCY_MATH_387
-   && ix86_libc_has_function (function_c99_misc)"
-{
-  rtx mask = GEN_INT (0x45);
-  rtx val = GEN_INT (0x05);
-
-  rtx scratch = gen_reg_rtx (HImode);
-  rtx res = gen_reg_rtx (QImode);
-
-  emit_insn (gen_fxamxf2_i387 (scratch, operands[1]));
-
-  emit_insn (gen_andqi_ext_1 (scratch, scratch, mask));
-  emit_insn (gen_cmpqi_ext_3 (scratch, val));
-  ix86_expand_setcc (res, EQ,
-                    gen_rtx_REG (CCmode, FLAGS_REG), const0_rtx);
-  emit_insn (gen_zero_extendqisi2 (operands[0], res));
-  DONE;
-})
-
-(define_expand "isinf<mode>2"
-  [(use (match_operand:SI 0 "register_operand"))
-   (use (match_operand:MODEF 1 "nonimmediate_operand"))]
-  "TARGET_USE_FANCY_MATH_387
-   && ix86_libc_has_function (function_c99_misc)
-   && !(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)"
-{
-  rtx mask = GEN_INT (0x45);
-  rtx val = GEN_INT (0x05);
-
-  rtx scratch = gen_reg_rtx (HImode);
-  rtx res = gen_reg_rtx (QImode);
-
-  /* Remove excess precision by forcing value through memory. */
-  if (memory_operand (operands[1], VOIDmode))
-    emit_insn (gen_fxam<mode>2_i387_with_temp (scratch, operands[1]));
-  else
-    {
-      rtx temp = assign_386_stack_local (<MODE>mode, SLOT_TEMP);
-
-      emit_move_insn (temp, operands[1]);
-      emit_insn (gen_fxam<mode>2_i387_with_temp (scratch, temp));
-    }
-
-  emit_insn (gen_andqi_ext_1 (scratch, scratch, mask));
-  emit_insn (gen_cmpqi_ext_3 (scratch, val));
-  ix86_expand_setcc (res, EQ,
-                    gen_rtx_REG (CCmode, FLAGS_REG), const0_rtx);
-  emit_insn (gen_zero_extendqisi2 (operands[0], res));
-  DONE;
-})
-
 (define_expand "signbittf2"
   [(use (match_operand:SI 0 "register_operand"))
    (use (match_operand:TF 1 "register_operand"))]
    (set_attr "mode" "<MODE>")
    (set_attr "length_immediate" "0")])
 
+(define_insn_and_split "*x86_mov<SWI48:mode>cc_0_m1_neg_leu<SWI:mode>"
+  [(set (match_operand:SWI48 0 "register_operand" "=r")
+       (neg:SWI48
+         (leu:SWI48
+           (match_operand:SWI 1 "nonimmediate_operand" "<SWI:r>m")
+           (match_operand:SWI 2 "<SWI:immediate_operand>" "<SWI:i>"))))
+   (clobber (reg:CC FLAGS_REG))]
+  "CONST_INT_P (operands[2])
+   && INTVAL (operands[2]) != -1
+   && INTVAL (operands[2]) != 2147483647"
+  "#"
+  ""
+  [(set (reg:CC FLAGS_REG) (compare:CC (match_dup 1) (match_dup 2)))
+   (parallel [(set (match_dup 0)
+                  (neg:SWI48 (ltu:SWI48 (reg:CC FLAGS_REG) (const_int 0))))
+             (clobber (reg:CC FLAGS_REG))])]
+  "operands[2] = GEN_INT (INTVAL (operands[2]) + 1);")
+
 (define_insn "*mov<mode>cc_noc"
   [(set (match_operand:SWI248 0 "register_operand" "=r,r")
        (if_then_else:SWI248 (match_operator 1 "ix86_comparison_operator"
 ;;
 ;; in proper program order.
 
-(define_insn "pro_epilogue_adjust_stack_<mode>_add"
+(define_insn "@pro_epilogue_adjust_stack_add_<mode>"
   [(set (match_operand:P 0 "register_operand" "=r,r")
        (plus:P (match_operand:P 1 "register_operand" "0,r")
                (match_operand:P 2 "<nonmemory_operand>" "r<i>,l<i>")))
              (const_string "*")))
    (set_attr "mode" "<MODE>")])
 
-(define_insn "pro_epilogue_adjust_stack_<mode>_sub"
+(define_insn "@pro_epilogue_adjust_stack_sub_<mode>"
   [(set (match_operand:P 0 "register_operand" "=r")
        (minus:P (match_operand:P 1 "register_operand" "0")
                 (match_operand:P 2 "register_operand" "r")))
   [(set_attr "type" "alu")
    (set_attr "mode" "<MODE>")])
 
-(define_insn "allocate_stack_worker_probe_<mode>"
+(define_insn "@allocate_stack_worker_probe_<mode>"
   [(set (match_operand:P 0 "register_operand" "=a")
        (unspec_volatile:P [(match_operand:P 1 "register_operand" "0")]
                            UNSPECV_STACK_PROBE))
     x = operands[1];
   else
     {
-      rtx (*insn) (rtx, rtx);
-
       x = copy_to_mode_reg (Pmode, operands[1]);
 
-      insn = (TARGET_64BIT
-             ? gen_allocate_stack_worker_probe_di
-             : gen_allocate_stack_worker_probe_si);
-
-      emit_insn (insn (x, x));
+      emit_insn (gen_allocate_stack_worker_probe (Pmode, x, x));
     }
 
   x = expand_simple_binop (Pmode, MINUS, stack_pointer_rtx, x,
   [(match_operand 0 "memory_operand")]
   ""
 {
-  rtx (*insn) (rtx, rtx)
-    = (GET_MODE (operands[0]) == DImode
-       ? gen_probe_stack_di : gen_probe_stack_si);
-
-  emit_insn (insn (operands[0], const0_rtx));
+  emit_insn (gen_probe_stack_1
+            (word_mode, operands[0], const0_rtx));
   DONE;
 })
 
 ;; Use OR for stack probes, this is shorter.
-(define_insn "probe_stack_<mode>"
+(define_insn "@probe_stack_1_<mode>"
   [(set (match_operand:W 0 "memory_operand" "=m")
        (unspec:W [(match_operand:W 1 "const0_operand")]
                  UNSPEC_PROBE_STACK))
    (set_attr "mode" "<MODE>")
    (set_attr "length_immediate" "1")])
   
-(define_insn "adjust_stack_and_probe<mode>"
+(define_insn "@adjust_stack_and_probe_<mode>"
   [(set (match_operand:P 0 "register_operand" "=r")
        (unspec_volatile:P [(match_operand:P 1 "register_operand" "0")]
                            UNSPECV_PROBE_STACK_RANGE))
   "* return output_adjust_stack_and_probe (operands[0]);"
   [(set_attr "type" "multi")])
 
-(define_insn "probe_stack_range<mode>"
+(define_insn "@probe_stack_range_<mode>"
   [(set (match_operand:P 0 "register_operand" "=r")
        (unspec_volatile:P [(match_operand:P 1 "register_operand" "0")
                            (match_operand:P 2 "const_int_operand" "n")]
 (define_peephole2
   [(set (match_operand:SWI 0 "register_operand")
        (match_operand:SWI 1 "memory_operand"))
-   (set (match_operand:SWI 3 "register_operand")
-       (plus:SWI (match_dup 0)
-                 (match_operand:SWI 2 "<nonmemory_operand>")))
-   (set (match_dup 1) (match_dup 3))
-   (set (reg FLAGS_REG) (compare (match_dup 3) (const_int 0)))]
+   (set (match_operand:<LEAMODE> 3 "register_operand")
+       (plus:<LEAMODE> (match_operand:<LEAMODE> 4 "register_operand")
+                       (match_operand:<LEAMODE> 2 "<nonmemory_operand>")))
+   (set (match_dup 1) (match_operand:SWI 5 "register_operand"))
+   (set (reg FLAGS_REG) (compare (match_dup 5) (const_int 0)))]
   "(TARGET_READ_MODIFY_WRITE || optimize_insn_for_size_p ())
+   && REGNO (operands[4]) == REGNO (operands[0])
+   && REGNO (operands[5]) == REGNO (operands[3])
    && peep2_reg_dead_p (4, operands[3])
-   && (rtx_equal_p (operands[0], operands[3])
+   && ((REGNO (operands[0]) == REGNO (operands[3]))
        || peep2_reg_dead_p (2, operands[0]))
    && !reg_overlap_mentioned_p (operands[0], operands[1])
    && !reg_overlap_mentioned_p (operands[3], operands[1])
        || immediate_operand (operands[2], QImode)
        || any_QIreg_operand (operands[2], QImode))
    && ix86_match_ccmode (peep2_next_insn (3), CCGOCmode)"
-  [(parallel [(set (match_dup 4) (match_dup 6))
-             (set (match_dup 1) (match_dup 5))])]
+  [(parallel [(set (match_dup 6) (match_dup 8))
+             (set (match_dup 1) (match_dup 7))])]
 {
-  operands[4] = SET_DEST (PATTERN (peep2_next_insn (3)));
-  operands[5]
+  operands[6] = SET_DEST (PATTERN (peep2_next_insn (3)));
+  operands[7]
     = gen_rtx_PLUS (<MODE>mode,
                    copy_rtx (operands[1]),
-                   operands[2]);
-  operands[6]
-    = gen_rtx_COMPARE (GET_MODE (operands[4]),
-                      copy_rtx (operands[5]),
+                   gen_lowpart (<MODE>mode, operands[2]));
+  operands[8]
+    = gen_rtx_COMPARE (GET_MODE (operands[6]),
+                      copy_rtx (operands[7]),
                       const0_rtx);
 })
 
                         (GET_CODE (operands[3]) == PLUS
                          || GET_CODE (operands[3]) == MINUS)
                         ? CCGOCmode : CCNOmode)"
-  [(parallel [(set (match_dup 4) (match_dup 6))
-             (set (match_dup 1) (match_dup 5))])]
+  [(parallel [(set (match_dup 5) (match_dup 7))
+             (set (match_dup 1) (match_dup 6))])]
 {
-  operands[4] = SET_DEST (PATTERN (peep2_next_insn (3)));
-  operands[5]
+  operands[5] = SET_DEST (PATTERN (peep2_next_insn (3)));
+  operands[6]
     = gen_rtx_fmt_ee (GET_CODE (operands[3]), <MODE>mode,
                      copy_rtx (operands[1]),
                      gen_lowpart (<MODE>mode, operands[2]));
+  operands[7]
+    = gen_rtx_COMPARE (GET_MODE (operands[5]),
+                      copy_rtx (operands[6]),
+                      const0_rtx);
+})
+
+;; peephole2 comes before regcprop, so deal also with a case that
+;; would be cleaned up by regcprop.
+(define_peephole2
+  [(set (match_operand:SWI 0 "register_operand")
+       (match_operand:SWI 1 "memory_operand"))
+   (parallel [(set (match_dup 0)
+                  (match_operator:SWI 3 "plusminuslogic_operator"
+                    [(match_dup 0)
+                     (match_operand:SWI 2 "<nonmemory_operand>")]))
+             (clobber (reg:CC FLAGS_REG))])
+   (set (match_operand:SWI 4 "register_operand") (match_dup 0))
+   (set (match_dup 1) (match_dup 4))
+   (set (reg FLAGS_REG) (compare (match_dup 4) (const_int 0)))]
+  "(TARGET_READ_MODIFY_WRITE || optimize_insn_for_size_p ())
+   && peep2_reg_dead_p (3, operands[0])
+   && peep2_reg_dead_p (5, operands[4])
+   && !reg_overlap_mentioned_p (operands[0], operands[1])
+   && !reg_overlap_mentioned_p (operands[0], operands[2])
+   && !reg_overlap_mentioned_p (operands[4], operands[1])
+   && (<MODE>mode != QImode
+       || immediate_operand (operands[2], QImode)
+       || any_QIreg_operand (operands[2], QImode))
+   && ix86_match_ccmode (peep2_next_insn (4),
+                        (GET_CODE (operands[3]) == PLUS
+                         || GET_CODE (operands[3]) == MINUS)
+                        ? CCGOCmode : CCNOmode)"
+  [(parallel [(set (match_dup 5) (match_dup 7))
+             (set (match_dup 1) (match_dup 6))])]
+{
+  operands[5] = SET_DEST (PATTERN (peep2_next_insn (4)));
   operands[6]
-    = gen_rtx_COMPARE (GET_MODE (operands[4]),
-                      copy_rtx (operands[5]),
+    = gen_rtx_fmt_ee (GET_CODE (operands[3]), GET_MODE (operands[3]),
+                     copy_rtx (operands[1]),
+                     operands[2]);
+  operands[7]
+    = gen_rtx_COMPARE (GET_MODE (operands[5]),
+                      copy_rtx (operands[6]),
+                      const0_rtx);
+})
+
+(define_peephole2
+  [(set (match_operand:SWI12 0 "register_operand")
+       (match_operand:SWI12 1 "memory_operand"))
+   (parallel [(set (match_operand:SI 4 "register_operand")
+                  (match_operator:SI 3 "plusminuslogic_operator"
+                    [(match_dup 4)
+                     (match_operand:SI 2 "nonmemory_operand")]))
+             (clobber (reg:CC FLAGS_REG))])
+   (set (match_operand:SWI12 5 "register_operand") (match_dup 0))
+   (set (match_dup 1) (match_dup 5))
+   (set (reg FLAGS_REG) (compare (match_dup 5) (const_int 0)))]
+  "(TARGET_READ_MODIFY_WRITE || optimize_insn_for_size_p ())
+   && REGNO (operands[0]) == REGNO (operands[4])
+   && peep2_reg_dead_p (3, operands[0])
+   && peep2_reg_dead_p (5, operands[5])
+   && (<MODE>mode != QImode
+       || immediate_operand (operands[2], SImode)
+       || any_QIreg_operand (operands[2], SImode))
+   && !reg_overlap_mentioned_p (operands[0], operands[1])
+   && !reg_overlap_mentioned_p (operands[0], operands[2])
+   && !reg_overlap_mentioned_p (operands[5], operands[1])
+   && ix86_match_ccmode (peep2_next_insn (4),
+                        (GET_CODE (operands[3]) == PLUS
+                         || GET_CODE (operands[3]) == MINUS)
+                        ? CCGOCmode : CCNOmode)"
+  [(parallel [(set (match_dup 6) (match_dup 8))
+             (set (match_dup 1) (match_dup 7))])]
+{
+  operands[6] = SET_DEST (PATTERN (peep2_next_insn (4)));
+  operands[7]
+    = gen_rtx_fmt_ee (GET_CODE (operands[3]), <MODE>mode,
+                     copy_rtx (operands[1]),
+                     gen_lowpart (<MODE>mode, operands[2]));
+  operands[8]
+    = gen_rtx_COMPARE (GET_MODE (operands[6]),
+                      copy_rtx (operands[7]),
+                      const0_rtx);
+})
+
+;; Likewise for cmpelim optimized pattern.
+(define_peephole2
+  [(set (match_operand:SWI 0 "register_operand")
+       (match_operand:SWI 1 "memory_operand"))
+   (parallel [(set (reg FLAGS_REG)
+                  (compare (match_operator:SWI 3 "plusminuslogic_operator"
+                             [(match_dup 0)
+                              (match_operand:SWI 2 "<nonmemory_operand>")])
+                           (const_int 0)))
+             (set (match_dup 0) (match_dup 3))])
+   (set (match_operand:SWI 4 "register_operand") (match_dup 0))
+   (set (match_dup 1) (match_dup 4))]
+  "(TARGET_READ_MODIFY_WRITE || optimize_insn_for_size_p ())
+   && peep2_reg_dead_p (3, operands[0])
+   && peep2_reg_dead_p (4, operands[4])
+   && !reg_overlap_mentioned_p (operands[0], operands[1])
+   && !reg_overlap_mentioned_p (operands[0], operands[2])
+   && !reg_overlap_mentioned_p (operands[4], operands[1])
+   && ix86_match_ccmode (peep2_next_insn (1),
+                        (GET_CODE (operands[3]) == PLUS
+                         || GET_CODE (operands[3]) == MINUS)
+                        ? CCGOCmode : CCNOmode)"
+  [(parallel [(set (match_dup 5) (match_dup 7))
+             (set (match_dup 1) (match_dup 6))])]
+{
+  operands[5] = SET_DEST (XVECEXP (PATTERN (peep2_next_insn (1)), 0, 0));
+  operands[6]
+    = gen_rtx_fmt_ee (GET_CODE (operands[3]), GET_MODE (operands[3]),
+                     copy_rtx (operands[1]), operands[2]);
+  operands[7]
+    = gen_rtx_COMPARE (GET_MODE (operands[5]), copy_rtx (operands[6]),
+                      const0_rtx);
+})
+
+;; Special cases for xor, where (x ^= y) != 0 is (misoptimized)
+;; into x = z; x ^= y; x != z
+(define_peephole2
+  [(set (match_operand:SWI 0 "register_operand")
+       (match_operand:SWI 1 "memory_operand"))
+   (set (match_operand:SWI 3 "register_operand") (match_dup 0))
+   (parallel [(set (match_operand:SWI 4 "register_operand")
+                  (xor:SWI (match_dup 4)
+                           (match_operand:SWI 2 "<nonmemory_operand>")))
+             (clobber (reg:CC FLAGS_REG))])
+   (set (match_dup 1) (match_dup 4))
+   (set (reg:CCZ FLAGS_REG)
+       (compare:CCZ (match_operand:SWI 5 "register_operand")
+                    (match_operand:SWI 6 "<nonmemory_operand>")))]
+  "(TARGET_READ_MODIFY_WRITE || optimize_insn_for_size_p ())
+   && (REGNO (operands[4]) == REGNO (operands[0])
+       || REGNO (operands[4]) == REGNO (operands[3]))
+   && (rtx_equal_p (operands[REGNO (operands[4]) == REGNO (operands[0])
+                            ? 3 : 0], operands[5])
+       ? rtx_equal_p (operands[2], operands[6])
+       : rtx_equal_p (operands[2], operands[5])
+        && rtx_equal_p (operands[REGNO (operands[4]) == REGNO (operands[0])
+                                 ? 3 : 0], operands[6]))
+   && peep2_reg_dead_p (4, operands[4])
+   && peep2_reg_dead_p (5, operands[REGNO (operands[4]) == REGNO (operands[0])
+                                   ? 3 : 0])
+   && !reg_overlap_mentioned_p (operands[0], operands[1])
+   && !reg_overlap_mentioned_p (operands[0], operands[2])
+   && !reg_overlap_mentioned_p (operands[3], operands[0])
+   && !reg_overlap_mentioned_p (operands[3], operands[1])
+   && !reg_overlap_mentioned_p (operands[3], operands[2])
+   && (<MODE>mode != QImode
+       || immediate_operand (operands[2], QImode)
+       || any_QIreg_operand (operands[2], QImode))"
+  [(parallel [(set (match_dup 7) (match_dup 9))
+             (set (match_dup 1) (match_dup 8))])]
+{
+  operands[7] = SET_DEST (PATTERN (peep2_next_insn (4)));
+  operands[8] = gen_rtx_XOR (<MODE>mode, copy_rtx (operands[1]),
+                            operands[2]);
+  operands[9]
+    = gen_rtx_COMPARE (GET_MODE (operands[7]),
+                      copy_rtx (operands[8]),
+                      const0_rtx);
+})
+
+(define_peephole2
+  [(set (match_operand:SWI12 0 "register_operand")
+       (match_operand:SWI12 1 "memory_operand"))
+   (set (match_operand:SWI12 3 "register_operand") (match_dup 0))
+   (parallel [(set (match_operand:SI 4 "register_operand")
+                  (xor:SI (match_dup 4)
+                          (match_operand:SI 2 "<nonmemory_operand>")))
+             (clobber (reg:CC FLAGS_REG))])
+   (set (match_dup 1) (match_operand:SWI12 5 "register_operand"))
+   (set (reg:CCZ FLAGS_REG)
+       (compare:CCZ (match_operand:SWI12 6 "register_operand")
+                    (match_operand:SWI12 7 "<nonmemory_operand>")))]
+  "(TARGET_READ_MODIFY_WRITE || optimize_insn_for_size_p ())
+   && (REGNO (operands[5]) == REGNO (operands[0])
+       || REGNO (operands[5]) == REGNO (operands[3]))
+   && REGNO (operands[5]) == REGNO (operands[4])
+   && (rtx_equal_p (operands[REGNO (operands[5]) == REGNO (operands[0])
+                            ? 3 : 0], operands[6])
+       ? (REG_P (operands[2])
+         ? REG_P (operands[7]) && REGNO (operands[2]) == REGNO (operands[7])
+         : rtx_equal_p (operands[2], operands[7]))
+       : (rtx_equal_p (operands[REGNO (operands[5]) == REGNO (operands[0])
+                               ? 3 : 0], operands[7])
+         && REG_P (operands[2])
+         && REGNO (operands[2]) == REGNO (operands[6])))
+   && peep2_reg_dead_p (4, operands[5])
+   && peep2_reg_dead_p (5, operands[REGNO (operands[5]) == REGNO (operands[0])
+                                   ? 3 : 0])
+   && !reg_overlap_mentioned_p (operands[0], operands[1])
+   && !reg_overlap_mentioned_p (operands[0], operands[2])
+   && !reg_overlap_mentioned_p (operands[3], operands[0])
+   && !reg_overlap_mentioned_p (operands[3], operands[1])
+   && !reg_overlap_mentioned_p (operands[3], operands[2])
+   && (<MODE>mode != QImode
+       || immediate_operand (operands[2], SImode)
+       || any_QIreg_operand (operands[2], SImode))"
+  [(parallel [(set (match_dup 8) (match_dup 10))
+             (set (match_dup 1) (match_dup 9))])]
+{
+  operands[8] = SET_DEST (PATTERN (peep2_next_insn (4)));
+  operands[9] = gen_rtx_XOR (<MODE>mode, copy_rtx (operands[1]),
+                            gen_lowpart (<MODE>mode, operands[2]));
+  operands[10]
+    = gen_rtx_COMPARE (GET_MODE (operands[8]),
+                      copy_rtx (operands[9]),
                       const0_rtx);
 })
 
+;; Attempt to optimize away memory stores of values the memory already
+;; has.  See PR79593.
+(define_peephole2
+  [(set (match_operand 0 "register_operand")
+        (match_operand 1 "memory_operand"))
+   (set (match_operand 2 "memory_operand") (match_dup 0))]
+  "!MEM_VOLATILE_P (operands[1])
+   && !MEM_VOLATILE_P (operands[2])
+   && rtx_equal_p (operands[1], operands[2])
+   && !reg_overlap_mentioned_p (operands[0], operands[2])"
+  [(set (match_dup 0) (match_dup 1))])
+
 ;; Attempt to always use XOR for zeroing registers (including FP modes).
 (define_peephole2
   [(set (match_operand 0 "general_reg_operand")
 (define_expand "stack_protect_set"
   [(match_operand 0 "memory_operand")
    (match_operand 1 "memory_operand")]
-  "TARGET_SSP_TLS_GUARD"
+  ""
 {
-  rtx (*insn)(rtx, rtx);
-
-  insn = (TARGET_LP64
-         ? gen_stack_protect_set_di
-         : gen_stack_protect_set_si);
-
-  emit_insn (insn (operands[0], operands[1]));
+  emit_insn (gen_stack_protect_set_1
+            (ptr_mode, operands[0], operands[1]));
   DONE;
 })
 
-(define_insn "stack_protect_set_<mode>"
+(define_insn "@stack_protect_set_1_<mode>"
   [(set (match_operand:PTR 0 "memory_operand" "=m")
        (unspec:PTR [(match_operand:PTR 1 "memory_operand" "m")]
                    UNSPEC_SP_SET))
    (set (match_scratch:PTR 2 "=&r") (const_int 0))
    (clobber (reg:CC FLAGS_REG))]
-  "TARGET_SSP_TLS_GUARD"
+  ""
   "mov{<imodesuffix>}\t{%1, %2|%2, %1}\;mov{<imodesuffix>}\t{%2, %0|%0, %2}\;xor{l}\t%k2, %k2"
   [(set_attr "type" "multi")])
 
   [(match_operand 0 "memory_operand")
    (match_operand 1 "memory_operand")
    (match_operand 2)]
-  "TARGET_SSP_TLS_GUARD"
+  ""
 {
   rtx flags = gen_rtx_REG (CCZmode, FLAGS_REG);
 
-  rtx (*insn)(rtx, rtx, rtx);
-
-  insn = (TARGET_LP64
-         ? gen_stack_protect_test_di
-         : gen_stack_protect_test_si);
-
-  emit_insn (insn (flags, operands[0], operands[1]));
+  emit_insn (gen_stack_protect_test_1
+            (ptr_mode, flags, operands[0], operands[1]));
 
   emit_jump_insn (gen_cbranchcc4 (gen_rtx_EQ (VOIDmode, flags, const0_rtx),
                                  flags, const0_rtx, operands[2]));
   DONE;
 })
 
-(define_insn "stack_protect_test_<mode>"
+(define_insn "@stack_protect_test_1_<mode>"
   [(set (match_operand:CCZ 0 "flags_reg_operand")
        (unspec:CCZ [(match_operand:PTR 1 "memory_operand" "m")
                     (match_operand:PTR 2 "memory_operand" "m")]
                    UNSPEC_SP_TEST))
    (clobber (match_scratch:PTR 3 "=&r"))]
-  "TARGET_SSP_TLS_GUARD"
-  "mov{<imodesuffix>}\t{%1, %3|%3, %1}\;xor{<imodesuffix>}\t{%2, %3|%3, %2}"
+  ""
+  "mov{<imodesuffix>}\t{%1, %3|%3, %1}\;sub{<imodesuffix>}\t{%2, %3|%3, %2}"
   [(set_attr "type" "multi")])
 
 (define_insn "sse4_2_crc32<mode>"
 (define_insn "fnstenv"
   [(set (match_operand:BLK 0 "memory_operand" "=m")
        (unspec_volatile:BLK [(const_int 0)] UNSPECV_FNSTENV))
-   (clobber (reg:HI FPCR_REG))
    (clobber (reg:XF ST0_REG))
    (clobber (reg:XF ST1_REG))
    (clobber (reg:XF ST2_REG))
 (define_insn "fldenv"
   [(unspec_volatile [(match_operand:BLK 0 "memory_operand" "m")]
                    UNSPECV_FLDENV)
-   (clobber (reg:CCFP FPSR_REG))
-   (clobber (reg:HI FPCR_REG))
    (clobber (reg:XF ST0_REG))
    (clobber (reg:XF ST1_REG))
    (clobber (reg:XF ST2_REG))
   [(set_attr "type" "other")
    (set_attr "prefix_extra" "2")])
 
+(define_insn "ptwrite<mode>"
+  [(unspec_volatile [(match_operand:SWI48 0 "nonimmediate_operand" "rm")]
+                   UNSPECV_PTWRITE)]
+  "TARGET_PTWRITE"
+  "ptwrite\t%0"
+  [(set_attr "type" "other")
+   (set_attr "prefix_extra" "2")])
+
 (define_insn "rdrand<mode>_1"
   [(set (match_operand:SWI248 0 "register_operand" "=r")
        (unspec_volatile:SWI248 [(const_int 0)] UNSPECV_RDRAND))
   "mwaitx"
   [(set_attr "length" "3")])
 
-(define_insn "monitorx_<mode>"
+(define_insn "@monitorx_<mode>"
   [(unspec_volatile [(match_operand:P 0 "register_operand" "a")
                     (match_operand:SI 1 "register_operand" "c")
                     (match_operand:SI 2 "register_operand" "d")]
      (symbol_ref ("(Pmode != word_mode) + 3")))])
 
 ;; CLZERO
-(define_insn "clzero_<mode>"
+(define_insn "@clzero_<mode>"
   [(unspec_volatile [(match_operand: P 0 "register_operand" "a")]
                    UNSPECV_CLZERO)]
   "TARGET_CLZERO"
   "movdiri\t{%1, %0|%0, %1}"
   [(set_attr "type" "other")])
 
-(define_insn "movdir64b_<mode>"
+(define_insn "@movdir64b_<mode>"
   [(unspec_volatile:XI [(match_operand:P 0 "register_operand" "r")
                        (match_operand:XI 1 "memory_operand")]
                       UNSPECV_MOVDIR64B)]
   "movdir64b\t{%1, %0|%0, %1}"
   [(set_attr "type" "other")])
 
+;; ENQCMD and ENQCMDS
+
+(define_int_iterator ENQCMD [UNSPECV_ENQCMD UNSPECV_ENQCMDS])
+(define_int_attr enqcmd_sfx [(UNSPECV_ENQCMD "") (UNSPECV_ENQCMDS "s")])
+
+(define_insn "@enqcmd<enqcmd_sfx>_<mode>"
+  [(set (reg:CCZ FLAGS_REG)
+       (unspec_volatile:CCZ [(match_operand:P 0 "register_operand" "r")
+                             (match_operand:XI 1 "memory_operand" "m")]
+                            ENQCMD))]
+  "TARGET_ENQCMD"
+  "enqcmd<enqcmd_sfx>\t{%1, %0|%0, %1}"
+  [(set_attr "type" "other")])
+
 ;; WAITPKG
 
 (define_insn "umwait"