]> git.ipfire.org Git - thirdparty/gcc.git/blobdiff - gcc/config/i386/i386.md
combine: Fix ICE in try_combine on pr112494.c [PR112560]
[thirdparty/gcc.git] / gcc / config / i386 / i386.md
index cb32de7fab87f75b35209cc430ea918be151e1a7..10ae3113ae8dbeeab7143a30a2a445338e1dbb20 100644 (file)
@@ -1,5 +1,5 @@
 ;; GCC machine description for IA-32 and x86-64.
-;; Copyright (C) 1988-2023 Free Software Foundation, Inc.
+;; Copyright (C) 1988-2024 Free Software Foundation, Inc.
 ;; Mostly by William Schelter.
 ;; x86_64 support added by Jan Hubicka
 ;;
   ;; For USER_MSR support
   UNSPECV_URDMSR
   UNSPECV_UWRMSR
+
+  ;; For AMX-TILE
+  UNSPECV_LDTILECFG
+  UNSPECV_STTILECFG
 ])
 
 ;; Constants to represent rounding modes in the ROUND instruction
 ;; Processor type.
 (define_attr "cpu" "none,pentium,pentiumpro,geode,k6,athlon,k8,core2,nehalem,
                    atom,slm,glm,haswell,generic,lujiazui,yongfeng,amdfam10,bdver1,
-                   bdver2,bdver3,bdver4,btver2,znver1,znver2,znver3,znver4"
+                   bdver2,bdver3,bdver4,btver2,znver1,znver2,znver3,znver4,
+                   znver5"
   (const (symbol_ref "ix86_schedule")))
 
 ;; A basic instruction type.  Refinements due to arguments to be
 
 ;; Main data type used by the insn
 (define_attr "mode"
-  "unknown,none,QI,HI,SI,DI,TI,OI,XI,HF,BF,SF,DF,XF,TF,V32HF,V16HF,V8HF,
-   V16SF,V8SF,V4DF,V4SF,V2DF,V2SF,V1DF,V8DF,V4HF,V4BF,V2HF,V2BF"
+  "unknown,none,QI,HI,SI,DI,TI,OI,XI,HF,BF,SF,DF,XF,TF,
+   V32HF,V16HF,V8HF,V4HF,V2HF,V32BF,V16BF,V8BF,V4BF,V2BF,
+   V16SF,V8SF,V4DF,V4SF,V2DF,V2SF,V1DF,V8DF"
   (const_string "unknown"))
 
 ;; The CPU unit operations uses.
 
 ;; Used to control the "enabled" attribute on a per-instruction basis.
 (define_attr "isa" "base,x64,nox64,x64_sse2,x64_sse4,x64_sse4_noavx,
-                   x64_avx,x64_avx512bw,x64_avx512dq,aes,
+                   x64_avx,x64_avx512bw,x64_avx512dq,aes,apx_ndd,
                    sse_noavx,sse2,sse2_noavx,sse3,sse3_noavx,sse4,sse4_noavx,
                    avx,noavx,avx2,noavx2,bmi,bmi2,fma4,fma,avx512f,avx512f_512,
                    noavx512f,avx512bw,avx512bw_512,noavx512bw,avx512dq,
           (symbol_ref "TARGET_AVX512BF16 && TARGET_AVX512VL")
         (eq_attr "isa" "vpclmulqdqvl")
           (symbol_ref "TARGET_VPCLMULQDQ && TARGET_AVX512VL")
+        (eq_attr "isa" "apx_ndd")
+          (symbol_ref "TARGET_APX_NDD")
 
         (eq_attr "mmx_isa" "native")
           (symbol_ref "!TARGET_MMX_WITH_SSE")
 ;; SSE and x87 SFmode and DFmode floating point modes
 (define_mode_iterator MODEF [SF DF])
 
+(define_mode_iterator MODEF248 [BF HF SF (DF "TARGET_SSE2")])
+
 ;; SSE floating point modes
 (define_mode_iterator MODEFH [(HF "TARGET_AVX512FP16") SF DF])
 
    (V64QI "b") (V32HI "w") (V16SI "d") (V8DI "q")])
 
 ;; SSE vector suffix for floating point modes
-(define_mode_attr ssevecmodesuffix [(SF "ps") (DF "pd")])
+;; BF HF use same suffix as SF for logic operations.
+(define_mode_attr ssevecmodesuffix [(BF "ps") (HF "ps") (SF "ps") (DF "pd")])
 
 ;; SSE vector mode corresponding to a scalar mode
 (define_mode_attr ssevecmode
 
 ;; AVX512F vector mode corresponding to a scalar mode
 (define_mode_attr avx512fvecmode
-  [(QI "V64QI") (HI "V32HI") (SI "V16SI") (DI "V8DI") (SF "V16SF") (DF "V8DF")])
+  [(QI "V64QI") (HI "V32HI") (SI "V16SI") (DI "V8DI")
+   (HF "V32HF") (BF "V32BF") (SF "V16SF") (DF "V8DF")])
 
 ;; Instruction suffix for REX 64bit operators.
 (define_mode_attr rex64suffix [(SI "{l}") (DI "{q}")])
 (include "bdver3.md")
 (include "btver2.md")
 (include "znver.md")
-(include "znver4.md")
+(include "zn4zn5.md")
 (include "geode.md")
 (include "atom.md")
 (include "slm.md")
 
 (define_mode_iterator SWI1248_AVX512BWDQ_64
   [(QI "TARGET_AVX512DQ") HI
-   (SI "TARGET_AVX512BW")
-   (DI "TARGET_AVX512BW && TARGET_EVEX512 && TARGET_64BIT")])
+   (SI "TARGET_AVX512BW") (DI "TARGET_AVX512BW && TARGET_64BIT")])
 
 (define_insn "*cmp<mode>_ccz_1"
   [(set (reg FLAGS_REG)
              (set (match_dup 4) (ior:DWIH (match_dup 4) (match_dup 5)))])]
 {
   split_double_mode (<DWI>mode, &operands[0], 2, &operands[0], &operands[2]);
-  /* Placing the SUBREG pieces in pseudos helps reload.  */
-  for (int i = 0; i < 4; i++)
-    if (SUBREG_P (operands[i]))
-      operands[i] = force_reg (<MODE>mode, operands[i]);
 
   operands[4] = gen_reg_rtx (<MODE>mode);
 
 
 (define_insn "@pushfl<mode>2"
   [(set (match_operand:W 0 "push_operand" "=<")
-       (unspec:W [(match_operand:CC 1 "flags_reg_operand")]
+       (unspec:W [(match_operand 1 "flags_reg_operand")]
                  UNSPEC_PUSHFL))]
-  ""
+  "GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_CC"
   "pushf{<imodesuffix>}"
   [(set_attr "type" "push")
    (set_attr "mode" "<MODE>")])
    && !x86_64_immediate_operand (operands[1], DImode)
    && !x86_64_zext_immediate_operand (operands[1], DImode)
    && !((UINTVAL (operands[1]) >> ctz_hwi (UINTVAL (operands[1])))
-        & ~(HOST_WIDE_INT) 0xffffffff)
+       & ~HOST_WIDE_INT_C (0xffffffff))
    && peep2_regno_dead_p (0, FLAGS_REG)"
   [(set (match_dup 0) (match_dup 1))
    (parallel [(set (match_dup 0) (ashift:DI (match_dup 0) (match_dup 2)))
              (clobber (reg:CC FLAGS_REG))])]
 {
   int shift = ctz_hwi (UINTVAL (operands[1]));
-  operands[1] = gen_int_mode (UINTVAL (operands[1]) >> shift, DImode);
+  rtx op1 = gen_int_mode (UINTVAL (operands[1]) >> shift, DImode);
+  if (ix86_endbr_immediate_operand (op1, VOIDmode))
+    FAIL;
+  operands[1] = op1;
   operands[2] = gen_int_mode (shift, QImode);
 })
 
   [(set (match_operand:SWI48 0 "general_reg_operand")
        (match_dup 4))]
 {
-  HOST_WIDE_INT tmp = INTVAL (operands[1]) & ~(HOST_WIDE_INT)0xff00;
+  HOST_WIDE_INT tmp = INTVAL (operands[1]) & ~HOST_WIDE_INT_C (0xff00);
   tmp |= (INTVAL (operands[3]) & 0xff) << 8;
   operands[4] = gen_int_mode (tmp, <SWI48:MODE>mode);
 })
            (eq_attr "alternative" "12")
              (const_string "x64_avx512bw")
            (eq_attr "alternative" "13")
-             (const_string "avx512bw_512")
+             (const_string "avx512bw")
           ]
           (const_string "*")))
    (set (attr "mmx_isa")
   "split_double_mode (DImode, &operands[0], 1, &operands[3], &operands[4]);")
 
 (define_mode_attr kmov_isa
-  [(QI "avx512dq") (HI "avx512f") (SI "avx512bw") (DI "avx512bw_512")])
+  [(QI "avx512dq") (HI "avx512f") (SI "avx512bw") (DI "avx512bw")])
 
 (define_insn "zero_extend<mode>di2"
   [(set (match_operand:DI 0 "register_operand" "=r,*r,*k")
    && optimize_insn_for_speed_p ()
    && reload_completed
    && (!EXT_REX_SSE_REG_P (operands[0])
-       || TARGET_AVX512VL || TARGET_EVEX512)"
+       || TARGET_AVX512VL)"
    [(set (match_dup 2)
         (float_extend:V2DF
           (vec_select:V2SF
       /* If it is unsafe to overwrite upper half of source, we need
         to move to destination and unpack there.  */
       if (REGNO (operands[0]) != REGNO (operands[1])
-         || (EXT_REX_SSE_REG_P (operands[1])
-             && !TARGET_AVX512VL))
+         || (EXT_REX_SSE_REG_P (operands[1]) && !TARGET_AVX512VL))
        {
          rtx tmp = lowpart_subreg (SFmode, operands[0], DFmode);
          emit_move_insn (tmp, operands[1]);
        (plus:SDWIM (match_operand:SDWIM 1 "nonimmediate_operand")
                    (match_operand:SDWIM 2 "<general_hilo_operand>")))]
   ""
-  "ix86_expand_binary_operator (PLUS, <MODE>mode, operands); DONE;")
+{
+  ix86_expand_binary_operator (PLUS, <MODE>mode, operands, TARGET_APX_NDD);
+  DONE;
+})
 
 (define_insn_and_split "*add<dwi>3_doubleword"
-  [(set (match_operand:<DWI> 0 "nonimmediate_operand" "=ro,r")
+  [(set (match_operand:<DWI> 0 "nonimmediate_operand" "=ro,r,&r,&r,&r")
        (plus:<DWI>
-         (match_operand:<DWI> 1 "nonimmediate_operand" "%0,0")
-         (match_operand:<DWI> 2 "x86_64_hilo_general_operand" "r<di>,o")))
+         (match_operand:<DWI> 1 "nonimmediate_operand" "%0,0,ro,rjO,r")
+         (match_operand:<DWI> 2 "x86_64_hilo_general_operand" "r<di>,o,r,<di>,r")))
    (clobber (reg:CC FLAGS_REG))]
-  "ix86_binary_operator_ok (PLUS, <DWI>mode, operands)"
+  "ix86_binary_operator_ok (PLUS, <DWI>mode, operands, TARGET_APX_NDD)"
   "#"
   "&& reload_completed"
   [(parallel [(set (reg:CCC FLAGS_REG)
   split_double_mode (<DWI>mode, &operands[0], 3, &operands[0], &operands[3]);
   if (operands[2] == const0_rtx)
     {
+      /* Under NDD op0 and op1 may not equal, do not delete insn then.  */
+      bool emit_insn_deleted_note_p = true;
+      if (!rtx_equal_p (operands[0], operands[1]))
+       {
+         emit_move_insn (operands[0], operands[1]);
+         emit_insn_deleted_note_p = false;
+       }
       if (operands[5] != const0_rtx)
-       ix86_expand_binary_operator (PLUS, <MODE>mode, &operands[3]);
+       ix86_expand_binary_operator (PLUS, <MODE>mode, &operands[3],
+                                    TARGET_APX_NDD);
       else if (!rtx_equal_p (operands[3], operands[4]))
        emit_move_insn (operands[3], operands[4]);
-      else
+      else if (emit_insn_deleted_note_p)
        emit_note (NOTE_INSN_DELETED);
       DONE;
     }
-})
+}
+[(set_attr "isa" "*,*,apx_ndd,apx_ndd,apx_ndd")])
 
 (define_insn_and_split "*add<dwi>3_doubleword_zext"
-  [(set (match_operand:<DWI> 0 "nonimmediate_operand" "=r,o")
+  [(set (match_operand:<DWI> 0 "nonimmediate_operand" "=r,o,&r,&r")
        (plus:<DWI>
          (zero_extend:<DWI>
-           (match_operand:DWIH 2 "nonimmediate_operand" "rm,r")) 
-         (match_operand:<DWI> 1 "nonimmediate_operand" "0,0")))
+           (match_operand:DWIH 2 "nonimmediate_operand" "rm,r,rm,r"))
+         (match_operand:<DWI> 1 "nonimmediate_operand" "0,0,r,m")))
    (clobber (reg:CC FLAGS_REG))]
-  "ix86_binary_operator_ok (UNKNOWN, <DWI>mode, operands)"
+  "ix86_binary_operator_ok (UNKNOWN, <DWI>mode, operands, TARGET_APX_NDD)"
   "#"
   "&& reload_completed"
   [(parallel [(set (reg:CCC FLAGS_REG)
                       (match_dup 4))
                     (const_int 0)))
              (clobber (reg:CC FLAGS_REG))])]
- "split_double_mode (<DWI>mode, &operands[0], 2, &operands[0], &operands[3]);")
+ "split_double_mode (<DWI>mode, &operands[0], 2, &operands[0], &operands[3]);"
+ [(set_attr "isa" "*,*,apx_ndd,apx_ndd")])
 
 (define_insn_and_split "*add<dwi>3_doubleword_concat"
   [(set (match_operand:<DWI> 0 "register_operand" "=&r")
   "#"
   "&& reload_completed"
   [(set (match_dup 0) (match_dup 4))
-   (set (match_dup 5) (match_dup 2))
    (parallel [(set (reg:CCC FLAGS_REG)
                   (compare:CCC
                     (plus:DWIH (match_dup 0) (match_dup 1))
                     (match_dup 0)))
              (set (match_dup 0)
                   (plus:DWIH (match_dup 0) (match_dup 1)))])
+   (set (match_dup 5) (match_dup 2))
    (parallel [(set (match_dup 5)
                   (plus:DWIH
                     (plus:DWIH
  "split_double_mode (<DWI>mode, &operands[0], 1, &operands[0], &operands[5]);")
 
 (define_insn "*add<mode>_1"
-  [(set (match_operand:SWI48 0 "nonimmediate_operand" "=rm,r,r,r")
+  [(set (match_operand:SWI48 0 "nonimmediate_operand" "=rm,r,r,r,r,r,r")
        (plus:SWI48
-         (match_operand:SWI48 1 "nonimmediate_operand" "%0,0,r,r")
-         (match_operand:SWI48 2 "x86_64_general_operand" "re,BM,0,le")))
+         (match_operand:SWI48 1 "nonimmediate_operand" "%0,0,r,r,rje,jM,r")
+         (match_operand:SWI48 2 "x86_64_general_operand" "re,BM,0,le,r,e,BM")))
    (clobber (reg:CC FLAGS_REG))]
-  "ix86_binary_operator_ok (PLUS, <MODE>mode, operands)"
+  "ix86_binary_operator_ok (PLUS, <MODE>mode, operands, TARGET_APX_NDD)"
 {
+  bool use_ndd = get_attr_isa (insn) == ISA_APX_NDD;
   switch (get_attr_type (insn))
     {
     case TYPE_LEA:
       return "#";
 
     case TYPE_INCDEC:
-      gcc_assert (rtx_equal_p (operands[0], operands[1]));
       if (operands[2] == const1_rtx)
-        return "inc{<imodesuffix>}\t%0";
+        return use_ndd ? "inc{<imodesuffix>}\t{%1, %0|%0, %1}"
+                     : "inc{<imodesuffix>}\t%0";
       else
         {
          gcc_assert (operands[2] == constm1_rtx);
-          return "dec{<imodesuffix>}\t%0";
+         return use_ndd ? "dec{<imodesuffix>}\t{%1, %0|%0, %1}"
+                       : "dec{<imodesuffix>}\t%0";
        }
 
     default:
       if (which_alternative == 2)
         std::swap (operands[1], operands[2]);
         
-      gcc_assert (rtx_equal_p (operands[0], operands[1]));
       if (x86_maybe_negate_const_int (&operands[2], <MODE>mode))
-        return "sub{<imodesuffix>}\t{%2, %0|%0, %2}";
+        return use_ndd ? "sub{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
+                     : "sub{<imodesuffix>}\t{%2, %0|%0, %2}";
 
-      return "add{<imodesuffix>}\t{%2, %0|%0, %2}";
+      return use_ndd ? "add{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
+                   : "add{<imodesuffix>}\t{%2, %0|%0, %2}";
     }
 }
-  [(set (attr "type")
+  [(set_attr "isa" "*,*,*,*,apx_ndd,apx_ndd,apx_ndd")
+   (set (attr "type")
      (cond [(eq_attr "alternative" "3")
               (const_string "lea")
            (match_operand:SWI48 2 "incdec_operand")
 ;; patterns constructed from addsi_1 to match.
 
 (define_insn "addsi_1_zext"
-  [(set (match_operand:DI 0 "register_operand" "=r,r,r")
+  [(set (match_operand:DI 0 "register_operand" "=r,r,r,r,r,r")
        (zero_extend:DI
-         (plus:SI (match_operand:SI 1 "nonimmediate_operand" "%0,r,r")
-                  (match_operand:SI 2 "x86_64_general_operand" "rBMe,0,le"))))
+         (plus:SI (match_operand:SI 1 "nonimmediate_operand" "%0,r,r,r,rm,rjM")
+                  (match_operand:SI 2 "x86_64_general_operand" "rBMe,0,le,rBMe,r,e"))))
    (clobber (reg:CC FLAGS_REG))]
-  "TARGET_64BIT && ix86_binary_operator_ok (PLUS, SImode, operands)"
+  "TARGET_64BIT
+   && ix86_binary_operator_ok (PLUS, SImode, operands, TARGET_APX_NDD)"
 {
+  bool use_ndd = get_attr_isa (insn) == ISA_APX_NDD;
   switch (get_attr_type (insn))
     {
     case TYPE_LEA:
 
     case TYPE_INCDEC:
       if (operands[2] == const1_rtx)
-        return "inc{l}\t%k0";
+        return use_ndd ? "inc{l}\t{%1, %k0|%k0, %1}"
+                      : "inc{l}\t%k0";
       else
         {
          gcc_assert (operands[2] == constm1_rtx);
-          return "dec{l}\t%k0";
+         return use_ndd ? "dec{l}\t{%1, %k0|%k0, %1}"
+                        : "dec{l}\t%k0";
        }
 
     default:
         std::swap (operands[1], operands[2]);
 
       if (x86_maybe_negate_const_int (&operands[2], SImode))
-        return "sub{l}\t{%2, %k0|%k0, %2}";
+        return use_ndd ? "sub{l}\t{%2 ,%1, %k0|%k0, %1, %2}"
+                      : "sub{l}\t{%2, %k0|%k0, %2}";
 
-      return "add{l}\t{%2, %k0|%k0, %2}";
+      return use_ndd ? "add{l}\t{%2 ,%1, %k0|%k0, %1, %2}"
+                    : "add{l}\t{%2, %k0|%k0, %2}";
     }
 }
-  [(set (attr "type")
+  [(set_attr "isa" "*,*,*,apx_ndd,apx_ndd,apx_ndd")
+   (set (attr "type")
      (cond [(eq_attr "alternative" "2")
              (const_string "lea")
            (match_operand:SI 2 "incdec_operand")
    (set_attr "mode" "SI")])
 
 (define_insn "*addhi_1"
-  [(set (match_operand:HI 0 "nonimmediate_operand" "=rm,r,r,Yp")
-       (plus:HI (match_operand:HI 1 "nonimmediate_operand" "%0,0,r,Yp")
-                (match_operand:HI 2 "general_operand" "rn,m,0,ln")))
+  [(set (match_operand:HI 0 "nonimmediate_operand" "=rm,r,r,Yp,r,r")
+       (plus:HI (match_operand:HI 1 "nonimmediate_operand" "%0,0,r,Yp,rm,r")
+                (match_operand:HI 2 "general_operand" "rn,m,0,ln,rn,m")))
    (clobber (reg:CC FLAGS_REG))]
-  "ix86_binary_operator_ok (PLUS, HImode, operands)"
+  "ix86_binary_operator_ok (PLUS, HImode, operands, TARGET_APX_NDD)"
 {
+  bool use_ndd = get_attr_isa (insn) == ISA_APX_NDD;
   switch (get_attr_type (insn))
     {
     case TYPE_LEA:
       return "#";
 
     case TYPE_INCDEC:
-      gcc_assert (rtx_equal_p (operands[0], operands[1]));
       if (operands[2] == const1_rtx)
-       return "inc{w}\t%0";
+       return use_ndd ? "inc{w}\t{%1, %0|%0, %1}" : "inc{w}\t%0";
       else
        {
          gcc_assert (operands[2] == constm1_rtx);
-         return "dec{w}\t%0";
+         return use_ndd ? "dec{w}\t{%1, %0|%0, %1}" : "dec{w}\t%0";
        }
 
     default:
       if (which_alternative == 2)
         std::swap (operands[1], operands[2]);
 
-      gcc_assert (rtx_equal_p (operands[0], operands[1]));
       if (x86_maybe_negate_const_int (&operands[2], HImode))
-       return "sub{w}\t{%2, %0|%0, %2}";
+       return use_ndd ? "sub{w}\t{%2, %1, %0|%0, %1, %2}"
+                      : "sub{w}\t{%2, %0|%0, %2}";
 
-      return "add{w}\t{%2, %0|%0, %2}";
+      return use_ndd ? "add{w}\t{%2, %1, %0|%0, %1, %2}"
+                    : "add{w}\t{%2, %0|%0, %2}";
     }
 }
-  [(set (attr "type")
+  [(set_attr "isa" "*,*,*,*,apx_ndd,apx_ndd")
+   (set (attr "type")
      (cond [(eq_attr "alternative" "3")
               (const_string "lea")
            (match_operand:HI 2 "incdec_operand")
        (and (eq_attr "type" "alu") (match_operand 2 "const128_operand"))
        (const_string "1")
        (const_string "*")))
-   (set_attr "mode" "HI,HI,HI,SI")])
+   (set_attr "mode" "HI,HI,HI,SI,HI,HI")])
 
 (define_insn "*addqi_1"
-  [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,q,q,r,r,Yp")
-       (plus:QI (match_operand:QI 1 "nonimmediate_operand" "%0,0,q,0,r,Yp")
-                (match_operand:QI 2 "general_operand" "qn,m,0,rn,0,ln")))
+  [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,q,q,r,r,Yp,r,r")
+       (plus:QI (match_operand:QI 1 "nonimmediate_operand" "%0,0,q,0,r,Yp,rm,r")
+                (match_operand:QI 2 "general_operand" "qn,m,0,rn,0,ln,rn,m")))
    (clobber (reg:CC FLAGS_REG))]
-  "ix86_binary_operator_ok (PLUS, QImode, operands)"
+  "ix86_binary_operator_ok (PLUS, QImode, operands, TARGET_APX_NDD)"
 {
   bool widen = (get_attr_mode (insn) != MODE_QI);
-
+  bool use_ndd = get_attr_isa (insn) == ISA_APX_NDD;
   switch (get_attr_type (insn))
     {
     case TYPE_LEA:
       return "#";
 
     case TYPE_INCDEC:
-      gcc_assert (rtx_equal_p (operands[0], operands[1]));
       if (operands[2] == const1_rtx)
-       return widen ? "inc{l}\t%k0" : "inc{b}\t%0";
+       if (use_ndd)
+         return "inc{b}\t{%1, %0|%0, %1}";
+       else
+         return widen ? "inc{l}\t%k0" : "inc{b}\t%0";
       else
        {
          gcc_assert (operands[2] == constm1_rtx);
-         return widen ? "dec{l}\t%k0" : "dec{b}\t%0";
+         if (use_ndd)
+           return "dec{b}\t{%1, %0|%0, %1}";
+         else
+           return widen ? "dec{l}\t%k0" : "dec{b}\t%0";
        }
 
     default:
       if (which_alternative == 2 || which_alternative == 4)
         std::swap (operands[1], operands[2]);
 
-      gcc_assert (rtx_equal_p (operands[0], operands[1]));
       if (x86_maybe_negate_const_int (&operands[2], QImode))
        {
-         if (widen)
-           return "sub{l}\t{%2, %k0|%k0, %2}";
+         if (use_ndd)
+           return "sub{b}\t{%2, %1, %0|%0, %1, %2}";
          else
-           return "sub{b}\t{%2, %0|%0, %2}";
+           return widen ? "sub{l}\t{%2, %k0|%k0, %2}"
+                        : "sub{b}\t{%2, %0|%0, %2}";
        }
-      if (widen)
-        return "add{l}\t{%k2, %k0|%k0, %k2}";
+      if (use_ndd)
+       return "add{b}\t{%2, %1, %0|%0, %1, %2}";
       else
-        return "add{b}\t{%2, %0|%0, %2}";
+       return widen ? "add{l}\t{%k2, %k0|%k0, %k2}"
+                    : "add{b}\t{%2, %0|%0, %2}";
     }
 }
-  [(set (attr "type")
+  [(set_attr "isa" "*,*,*,*,*,*,apx_ndd,apx_ndd")
+   (set (attr "type")
      (cond [(eq_attr "alternative" "5")
               (const_string "lea")
            (match_operand:QI 2 "incdec_operand")
        (and (eq_attr "type" "alu") (match_operand 2 "const128_operand"))
        (const_string "1")
        (const_string "*")))
-   (set_attr "mode" "QI,QI,QI,SI,SI,SI")
+   (set_attr "mode" "QI,QI,QI,SI,SI,SI,QI,QI")
    ;; Potential partial reg stall on alternatives 3 and 4.
    (set (attr "preferred_for_speed")
      (cond [(eq_attr "alternative" "3,4")
   [(set (reg FLAGS_REG)
        (compare
          (plus:SWI
-           (match_operand:SWI 1 "nonimmediate_operand" "%0,0,<r>")
-           (match_operand:SWI 2 "<general_operand>" "<r><i>,<m>,0"))
+           (match_operand:SWI 1 "nonimmediate_operand" "%0,0,<r>,rm,r")
+           (match_operand:SWI 2 "<general_operand>" "<r><i>,<m>,0,r<i>,<m>"))
          (const_int 0)))
-   (set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m,<r>,<r>")
+   (set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m,<r>,<r>,r,r")
        (plus:SWI (match_dup 1) (match_dup 2)))]
   "ix86_match_ccmode (insn, CCGOCmode)
-   && ix86_binary_operator_ok (PLUS, <MODE>mode, operands)"
+   && ix86_binary_operator_ok (PLUS, <MODE>mode, operands, TARGET_APX_NDD)"
 {
+  bool use_ndd = get_attr_isa (insn) == ISA_APX_NDD;
   switch (get_attr_type (insn))
     {
     case TYPE_INCDEC:
       if (operands[2] == const1_rtx)
-        return "inc{<imodesuffix>}\t%0";
+        return use_ndd ? "inc{<imodesuffix>}\t{%1, %0|%0, %1}"
+                      : "inc{<imodesuffix>}\t%0";
       else
         {
          gcc_assert (operands[2] == constm1_rtx);
-          return "dec{<imodesuffix>}\t%0";
+         return use_ndd ? "dec{<imodesuffix>}\t{%1, %0|%0, %1}"
+                        : "dec{<imodesuffix>}\t%0";
        }
 
     default:
       if (which_alternative == 2)
         std::swap (operands[1], operands[2]);
         
-      gcc_assert (rtx_equal_p (operands[0], operands[1]));
       if (x86_maybe_negate_const_int (&operands[2], <MODE>mode))
-        return "sub{<imodesuffix>}\t{%2, %0|%0, %2}";
+        return use_ndd ? "sub{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
+                      : "sub{<imodesuffix>}\t{%2, %0|%0, %2}";
 
-      return "add{<imodesuffix>}\t{%2, %0|%0, %2}";
+      return use_ndd ? "add{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
+                    : "add{<imodesuffix>}\t{%2, %0|%0, %2}";
     }
 }
-  [(set (attr "type")
+  [(set_attr "isa" "*,*,*,apx_ndd,apx_ndd")
+   (set (attr "type")
      (if_then_else (match_operand:SWI 2 "incdec_operand")
        (const_string "incdec")
        (const_string "alu")))
 (define_insn "*addsi_2_zext"
   [(set (reg FLAGS_REG)
        (compare
-         (plus:SI (match_operand:SI 1 "nonimmediate_operand" "%0,r")
-                  (match_operand:SI 2 "x86_64_general_operand" "rBMe,0"))
+         (plus:SI (match_operand:SI 1 "nonimmediate_operand" "%0,r,r,rm")
+                  (match_operand:SI 2 "x86_64_general_operand" "rBMe,0,rBMe,re"))
          (const_int 0)))
-   (set (match_operand:DI 0 "register_operand" "=r,r")
+   (set (match_operand:DI 0 "register_operand" "=r,r,r,r")
        (zero_extend:DI (plus:SI (match_dup 1) (match_dup 2))))]
   "TARGET_64BIT && ix86_match_ccmode (insn, CCGOCmode)
-   && ix86_binary_operator_ok (PLUS, SImode, operands)"
+   && ix86_binary_operator_ok (PLUS, SImode, operands, TARGET_APX_NDD)"
 {
+  bool use_ndd = get_attr_isa (insn) == ISA_APX_NDD;
   switch (get_attr_type (insn))
     {
     case TYPE_INCDEC:
       if (operands[2] == const1_rtx)
-        return "inc{l}\t%k0";
+        return use_ndd ? "inc{l}\t{%1, %k0|%k0, %1}"
+                      : "inc{l}\t%k0";
       else
        {
          gcc_assert (operands[2] == constm1_rtx);
-          return "dec{l}\t%k0";
+         return use_ndd ? "dec{l}\t{%1, %k0|%k0, %1}"
+                        : "dec{l}\t%k0";
        }
 
     default:
         std::swap (operands[1], operands[2]);
 
       if (x86_maybe_negate_const_int (&operands[2], SImode))
-        return "sub{l}\t{%2, %k0|%k0, %2}";
+       return use_ndd ? "sub{l}\t{%2, %1, %k0|%k0, %1, %2}"
+                      : "sub{l}\t{%2, %k0|%k0, %2}";
 
-      return "add{l}\t{%2, %k0|%k0, %2}";
+      return use_ndd ? "add{l}\t{%2, %1, %k0|%k0, %1, %2}"
+                    : "add{l}\t{%2, %k0|%k0, %2}";
     }
 }
-  [(set (attr "type")
+  [(set_attr "isa" "*,*,apx_ndd,apx_ndd")
+   (set (attr "type")
      (if_then_else (match_operand:SI 2 "incdec_operand")
        (const_string "incdec")
        (const_string "alu")))
 (define_insn "*add<mode>_3"
   [(set (reg FLAGS_REG)
        (compare
-         (neg:SWI (match_operand:SWI 2 "<general_operand>" "<g>,0"))
-         (match_operand:SWI 1 "nonimmediate_operand" "%0,<r>")))
-   (clobber (match_scratch:SWI 0 "=<r>,<r>"))]
+         (neg:SWI (match_operand:SWI 2 "<general_operand>" "<g>,0,<g>,re"))
+         (match_operand:SWI 1 "nonimmediate_operand" "%0,<r>,r,rm")))
+   (clobber (match_scratch:SWI 0 "=<r>,<r>,r,r"))]
   "ix86_match_ccmode (insn, CCZmode)
    && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
 {
+  bool use_ndd = get_attr_isa (insn) == ISA_APX_NDD;
   switch (get_attr_type (insn))
     {
     case TYPE_INCDEC:
       if (operands[2] == const1_rtx)
-        return "inc{<imodesuffix>}\t%0";
+        return use_ndd ? "inc{<imodesuffix>}\t{%1, %0|%0, %1}"
+                      : "inc{<imodesuffix>}\t%0";
       else
         {
          gcc_assert (operands[2] == constm1_rtx);
-          return "dec{<imodesuffix>}\t%0";
+          return use_ndd ? "dec{<imodesuffix>}\t{%1, %0|%0, %1}"
+                        : "dec{<imodesuffix>}\t%0";
        }
 
     default:
       if (which_alternative == 1)
         std::swap (operands[1], operands[2]);
 
-      gcc_assert (rtx_equal_p (operands[0], operands[1]));
       if (x86_maybe_negate_const_int (&operands[2], <MODE>mode))
-        return "sub{<imodesuffix>}\t{%2, %0|%0, %2}";
+        return use_ndd ? "sub{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
+                       : "sub{<imodesuffix>}\t{%2, %0|%0, %2}";
 
-      return "add{<imodesuffix>}\t{%2, %0|%0, %2}";
+      return use_ndd ? "add{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
+                     : "add{<imodesuffix>}\t{%2, %0|%0, %2}";
     }
 }
-  [(set (attr "type")
+  [(set_attr "isa" "*,*,apx_ndd,apx_ndd")
+   (set (attr "type")
      (if_then_else (match_operand:SWI 2 "incdec_operand")
        (const_string "incdec")
        (const_string "alu")))
 (define_insn "*addsi_3_zext"
   [(set (reg FLAGS_REG)
        (compare
-         (neg:SI (match_operand:SI 2 "x86_64_general_operand" "rBMe,0"))
-         (match_operand:SI 1 "nonimmediate_operand" "%0,r")))
-   (set (match_operand:DI 0 "register_operand" "=r,r")
+         (neg:SI (match_operand:SI 2 "x86_64_general_operand" "rBMe,0,rBMe,re"))
+         (match_operand:SI 1 "nonimmediate_operand" "%0,r,r,rm")))
+   (set (match_operand:DI 0 "register_operand" "=r,r,r,r")
        (zero_extend:DI (plus:SI (match_dup 1) (match_dup 2))))]
   "TARGET_64BIT && ix86_match_ccmode (insn, CCZmode)
-   && ix86_binary_operator_ok (PLUS, SImode, operands)"
+   && ix86_binary_operator_ok (PLUS, SImode, operands, TARGET_APX_NDD)"
 {
+  bool use_ndd = get_attr_isa (insn) == ISA_APX_NDD;
   switch (get_attr_type (insn))
     {
     case TYPE_INCDEC:
       if (operands[2] == const1_rtx)
-        return "inc{l}\t%k0";
+        return use_ndd ? "inc{l}\t{%1, %k0|%k0, %1}" : "inc{l}\t%k0";
       else
         {
          gcc_assert (operands[2] == constm1_rtx);
-          return "dec{l}\t%k0";
+         return use_ndd ? "dec{l}\t{%1, %k0|%k0, %1}" : "dec{l}\t%k0";
        }
 
     default:
         std::swap (operands[1], operands[2]);
 
       if (x86_maybe_negate_const_int (&operands[2], SImode))
-        return "sub{l}\t{%2, %k0|%k0, %2}";
+        return use_ndd ? "sub{l}\t{%2, %1, %k0|%k0, %1, %2}"
+                      : "sub{l}\t{%2, %k0|%k0, %2}";
 
-      return "add{l}\t{%2, %k0|%k0, %2}";
+      return use_ndd ? "add{l}\t{%2, %1, %k0|%k0, %1, %2}"
+                    : "add{l}\t{%2, %k0|%k0, %2}";
     }
 }
-  [(set (attr "type")
+  [(set_attr "isa" "*,*,apx_ndd,apx_ndd")
+   (set (attr "type")
      (if_then_else (match_operand:SI 2 "incdec_operand")
        (const_string "incdec")
        (const_string "alu")))
 (define_insn "*adddi_4"
   [(set (reg FLAGS_REG)
        (compare
-         (match_operand:DI 1 "nonimmediate_operand" "0")
-         (match_operand:DI 2 "x86_64_immediate_operand" "e")))
-   (clobber (match_scratch:DI 0 "=r"))]
+         (match_operand:DI 1 "nonimmediate_operand" "0,rm")
+         (match_operand:DI 2 "x86_64_immediate_operand" "e,e")))
+   (clobber (match_scratch:DI 0 "=r,r"))]
   "TARGET_64BIT
    && ix86_match_ccmode (insn, CCGCmode)"
 {
+  bool use_ndd = get_attr_isa (insn) == ISA_APX_NDD;
   switch (get_attr_type (insn))
     {
     case TYPE_INCDEC:
       if (operands[2] == constm1_rtx)
-        return "inc{q}\t%0";
+        return use_ndd ? "inc{q}\t{%1, %0|%0, %1}" : "inc{q}\t%0";
       else
         {
          gcc_assert (operands[2] == const1_rtx);
-          return "dec{q}\t%0";
+         return use_ndd ? "dec{q}\t{%1, %0|%0, %1}" : "dec{q}\t%0";
        }
 
     default:
       if (x86_maybe_negate_const_int (&operands[2], DImode))
-       return "add{q}\t{%2, %0|%0, %2}";
+       return use_ndd ? "add{q}\t{%2, %1, %0|%0, %1, %2}"
+                      : "add{q}\t{%2, %0|%0, %2}";
 
-      return "sub{q}\t{%2, %0|%0, %2}";
+      return use_ndd ? "sub{q}\t{%2, %1, %0|%0, %1, %2}"
+                    : "sub{q}\t{%2, %0|%0, %2}";
     }
 }
-  [(set (attr "type")
+  [(set_attr "isa" "*,apx_ndd")
+   (set (attr "type")
      (if_then_else (match_operand:DI 2 "incdec_operand")
        (const_string "incdec")
        (const_string "alu")))
 (define_insn "*add<mode>_4"
   [(set (reg FLAGS_REG)
        (compare
-         (match_operand:SWI124 1 "nonimmediate_operand" "0")
+         (match_operand:SWI124 1 "nonimmediate_operand" "0,rm")
          (match_operand:SWI124 2 "const_int_operand")))
-   (clobber (match_scratch:SWI124 0 "=<r>"))]
+   (clobber (match_scratch:SWI124 0 "=<r>,r"))]
   "ix86_match_ccmode (insn, CCGCmode)"
 {
+  bool use_ndd = get_attr_isa (insn) == ISA_APX_NDD;
   switch (get_attr_type (insn))
     {
     case TYPE_INCDEC:
       if (operands[2] == constm1_rtx)
-        return "inc{<imodesuffix>}\t%0";
+        return use_ndd ? "inc{<imodesuffix>}\t{%1, %0|%0, %1}"
+                      : "inc{<imodesuffix>}\t%0";
       else
         {
          gcc_assert (operands[2] == const1_rtx);
-          return "dec{<imodesuffix>}\t%0";
+         return use_ndd ? "dec{<imodesuffix>}\t{%1, %0|%0, %1}"
+                        : "dec{<imodesuffix>}\t%0";
        }
 
     default:
       if (x86_maybe_negate_const_int (&operands[2], <MODE>mode))
-       return "add{<imodesuffix>}\t{%2, %0|%0, %2}";
+       return use_ndd ? "add{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
+                      : "add{<imodesuffix>}\t{%2, %0|%0, %2}";
 
-      return "sub{<imodesuffix>}\t{%2, %0|%0, %2}";
+      return use_ndd ? "sub{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
+                    : "sub{<imodesuffix>}\t{%2, %0|%0, %2}";
     }
 }
-  [(set (attr "type")
+  [(set_attr "isa" "*,apx_ndd")
+   (set (attr "type")
      (if_then_else (match_operand:<MODE> 2 "incdec_operand")
        (const_string "incdec")
        (const_string "alu")))
   [(set (reg FLAGS_REG)
        (compare
          (plus:SWI
-           (match_operand:SWI 1 "nonimmediate_operand" "%0,<r>")
-           (match_operand:SWI 2 "<general_operand>" "<g>,0"))
+           (match_operand:SWI 1 "nonimmediate_operand" "%0,<r>,r,rm")
+           (match_operand:SWI 2 "<general_operand>" "<g>,0,<g>,re"))
          (const_int 0)))
-   (clobber (match_scratch:SWI 0 "=<r>,<r>"))]
+   (clobber (match_scratch:SWI 0 "=<r>,<r>,r,r"))]
   "ix86_match_ccmode (insn, CCGOCmode)
    && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
 {
+  bool use_ndd = get_attr_isa (insn) == ISA_APX_NDD;
   switch (get_attr_type (insn))
     {
     case TYPE_INCDEC:
       if (operands[2] == const1_rtx)
-        return "inc{<imodesuffix>}\t%0";
+        return use_ndd ? "inc{<imodesuffix>}\t{%1, %0|%0, %1}"
+                      : "inc{<imodesuffix>}\t%0";
       else
         {
           gcc_assert (operands[2] == constm1_rtx);
-          return "dec{<imodesuffix>}\t%0";
+         return use_ndd ? "dec{<imodesuffix>}\t{%1, %0|%0, %1}"
+                        : "dec{<imodesuffix>}\t%0";
        }
 
     default:
       if (which_alternative == 1)
         std::swap (operands[1], operands[2]);
 
-      gcc_assert (rtx_equal_p (operands[0], operands[1]));
       if (x86_maybe_negate_const_int (&operands[2], <MODE>mode))
-        return "sub{<imodesuffix>}\t{%2, %0|%0, %2}";
+       return use_ndd ? "sub{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
+                      : "sub{<imodesuffix>}\t{%2, %0|%0, %2}";
 
-      return "add{<imodesuffix>}\t{%2, %0|%0, %2}";
+      return use_ndd ? "add{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
+                    : "add{<imodesuffix>}\t{%2, %0|%0, %2}";
     }
 }
-  [(set (attr "type")
+  [(set_attr "isa" "*,*,apx_ndd,apx_ndd")
+   (set (attr "type")
      (if_then_else (match_operand:SWI 2 "incdec_operand")
        (const_string "incdec")
        (const_string "alu")))
   [(set (reg:CCO FLAGS_REG)
        (eq:CCO (plus:<DWI>
                   (sign_extend:<DWI>
-                     (match_operand:SWI 1 "nonimmediate_operand" "%0,0"))
+                     (match_operand:SWI 1 "nonimmediate_operand" "%0,0,rm,r"))
                   (sign_extend:<DWI>
-                     (match_operand:SWI 2 "<general_sext_operand>" "<r>We,m")))
+                     (match_operand:SWI 2 "<general_sext_operand>" "<r>We,m,rWe,m")))
                (sign_extend:<DWI>
                   (plus:SWI (match_dup 1) (match_dup 2)))))
-   (set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m,<r>")
+   (set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m,<r>,r,r")
        (plus:SWI (match_dup 1) (match_dup 2)))]
-  "ix86_binary_operator_ok (PLUS, <MODE>mode, operands)"
-  "add{<imodesuffix>}\t{%2, %0|%0, %2}"
-  [(set_attr "type" "alu")
+  "ix86_binary_operator_ok (PLUS, <MODE>mode, operands, TARGET_APX_NDD)"
+  "@
+   add{<imodesuffix>}\t{%2, %0|%0, %2}
+   add{<imodesuffix>}\t{%2, %0|%0, %2}
+   add{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}
+   add{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "isa" "*,*,apx_ndd,apx_ndd")
+   (set_attr "type" "alu")
    (set_attr "mode" "<MODE>")])
 
 (define_insn "addv<mode>4_1"
   [(set (reg:CCO FLAGS_REG)
        (eq:CCO (plus:<DWI>
                   (sign_extend:<DWI>
-                     (match_operand:SWI 1 "nonimmediate_operand" "0"))
+                     (match_operand:SWI 1 "nonimmediate_operand" "0,rm"))
                   (match_operand:<DWI> 3 "const_int_operand"))
                (sign_extend:<DWI>
                   (plus:SWI
                     (match_dup 1)
-                    (match_operand:SWI 2 "x86_64_immediate_operand" "<i>")))))
-   (set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m")
+                    (match_operand:SWI 2 "x86_64_immediate_operand" "<i>,<i>")))))
+   (set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m,r")
        (plus:SWI (match_dup 1) (match_dup 2)))]
-  "ix86_binary_operator_ok (PLUS, <MODE>mode, operands)
+  "ix86_binary_operator_ok (PLUS, <MODE>mode, operands, TARGET_APX_NDD)
    && CONST_INT_P (operands[2])
    && INTVAL (operands[2]) == INTVAL (operands[3])"
-  "add{<imodesuffix>}\t{%2, %0|%0, %2}"
-  [(set_attr "type" "alu")
+  "@
+  add{<imodesuffix>}\t{%2, %0|%0, %2}
+  add{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "isa" "*,apx_ndd")
+   (set_attr "type" "alu")
    (set_attr "mode" "<MODE>")
    (set (attr "length_immediate")
        (cond [(match_test "IN_RANGE (INTVAL (operands[2]), -128, 127)")
        (eq:CCO
          (plus:<QPWI>
            (sign_extend:<QPWI>
-             (match_operand:<DWI> 1 "nonimmediate_operand" "%0,0"))
+             (match_operand:<DWI> 1 "nonimmediate_operand" "%0,0,ro,r"))
            (sign_extend:<QPWI>
-             (match_operand:<DWI> 2 "nonimmediate_operand" "r,o")))
+             (match_operand:<DWI> 2 "nonimmediate_operand" "r,o,r,o")))
          (sign_extend:<QPWI>
            (plus:<DWI> (match_dup 1) (match_dup 2)))))
-   (set (match_operand:<DWI> 0 "nonimmediate_operand" "=ro,r")
+   (set (match_operand:<DWI> 0 "nonimmediate_operand" "=ro,r,&r,&r")
        (plus:<DWI> (match_dup 1) (match_dup 2)))]
-  "ix86_binary_operator_ok (PLUS, <DWI>mode, operands)"
+  "ix86_binary_operator_ok (PLUS, <DWI>mode, operands, TARGET_APX_NDD)"
   "#"
   "&& reload_completed"
   [(parallel [(set (reg:CCC FLAGS_REG)
                     (match_dup 5)))])]
 {
   split_double_mode (<DWI>mode, &operands[0], 3, &operands[0], &operands[3]);
-})
+}
+[(set_attr "isa" "*,*,apx_ndd,apx_ndd")])
 
 (define_insn_and_split "*addv<dwi>4_doubleword_1"
   [(set (reg:CCO FLAGS_REG)
        (eq:CCO
          (plus:<QPWI>
            (sign_extend:<QPWI>
-             (match_operand:<DWI> 1 "nonimmediate_operand" "%0"))
-           (match_operand:<QPWI> 3 "const_scalar_int_operand" "n"))
+             (match_operand:<DWI> 1 "nonimmediate_operand" "%0,rjM"))
+           (match_operand:<QPWI> 3 "const_scalar_int_operand" "n,n"))
          (sign_extend:<QPWI>
            (plus:<DWI>
              (match_dup 1)
-             (match_operand:<DWI> 2 "x86_64_hilo_general_operand" "<di>")))))
-   (set (match_operand:<DWI> 0 "nonimmediate_operand" "=ro")
+             (match_operand:<DWI> 2 "x86_64_hilo_general_operand" "<di>,<di>")))))
+   (set (match_operand:<DWI> 0 "nonimmediate_operand" "=ro,&r")
        (plus:<DWI> (match_dup 1) (match_dup 2)))]
-  "ix86_binary_operator_ok (PLUS, <DWI>mode, operands)
+  "ix86_binary_operator_ok (PLUS, <DWI>mode, operands, TARGET_APX_NDD)
    && CONST_SCALAR_INT_P (operands[2])
    && rtx_equal_p (operands[2], operands[3])"
   "#"
   split_double_mode (<DWI>mode, &operands[0], 3, &operands[0], &operands[3]);
   if (operands[2] == const0_rtx)
     {
+      if (!rtx_equal_p (operands[0], operands[1]))
+       emit_move_insn (operands[0], operands[1]);
       emit_insn (gen_addv<mode>4_1 (operands[3], operands[4], operands[5],
                                    operands[5]));
       DONE;
     }
-})
+}
+[(set_attr "isa" "*,apx_ndd")])
 
 (define_insn "*addv<mode>4_overflow_1"
   [(set (reg:CCO FLAGS_REG)
              (match_operator:<DWI> 4 "ix86_carry_flag_operator"
                [(match_operand 3 "flags_reg_operand") (const_int 0)])
              (sign_extend:<DWI>
-               (match_operand:SWI 1 "nonimmediate_operand" "%0,0")))
+               (match_operand:SWI 1 "nonimmediate_operand" "%0,0,rm,r")))
            (sign_extend:<DWI>
-             (match_operand:SWI 2 "<general_sext_operand>" "rWe,m")))
+             (match_operand:SWI 2 "<general_sext_operand>" "rWe,m,rWe,m")))
          (sign_extend:<DWI>
            (plus:SWI
              (plus:SWI
                  [(match_dup 3) (const_int 0)])
                (match_dup 1))
              (match_dup 2)))))
-   (set (match_operand:SWI 0 "nonimmediate_operand" "=rm,r")
+   (set (match_operand:SWI 0 "nonimmediate_operand" "=rm,r,r,r")
        (plus:SWI
          (plus:SWI
            (match_op_dup 5 [(match_dup 3) (const_int 0)])
            (match_dup 1))
          (match_dup 2)))]
-  "ix86_binary_operator_ok (PLUS, <MODE>mode, operands)"
-  "adc{<imodesuffix>}\t{%2, %0|%0, %2}"
-  [(set_attr "type" "alu")
+  "ix86_binary_operator_ok (PLUS, <MODE>mode, operands, TARGET_APX_NDD)"
+  "@
+   adc{<imodesuffix>}\t{%2, %0|%0, %2}
+   adc{<imodesuffix>}\t{%2, %0|%0, %2}
+   adc{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}
+   adc{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "isa" "*,*,apx_ndd,apx_ndd")
+   (set_attr "type" "alu")
    (set_attr "mode" "<MODE>")])
 
 (define_insn "*addv<mode>4_overflow_2"
              (match_operator:<DWI> 4 "ix86_carry_flag_operator"
                [(match_operand 3 "flags_reg_operand") (const_int 0)])
              (sign_extend:<DWI>
-               (match_operand:SWI 1 "nonimmediate_operand" "%0")))
-           (match_operand:<DWI> 6 "const_int_operand" "n"))
+               (match_operand:SWI 1 "nonimmediate_operand" "%0,rm")))
+           (match_operand:<DWI> 6 "const_int_operand" "n,n"))
          (sign_extend:<DWI>
            (plus:SWI
              (plus:SWI
                (match_operator:SWI 5 "ix86_carry_flag_operator"
                  [(match_dup 3) (const_int 0)])
                (match_dup 1))
-             (match_operand:SWI 2 "x86_64_immediate_operand" "e")))))
-   (set (match_operand:SWI 0 "nonimmediate_operand" "=rm")
+             (match_operand:SWI 2 "x86_64_immediate_operand" "e,e")))))
+   (set (match_operand:SWI 0 "nonimmediate_operand" "=rm,r")
        (plus:SWI
          (plus:SWI
            (match_op_dup 5 [(match_dup 3) (const_int 0)])
            (match_dup 1))
          (match_dup 2)))]
-  "ix86_binary_operator_ok (PLUS, <MODE>mode, operands)
+  "ix86_binary_operator_ok (PLUS, <MODE>mode, operands, TARGET_APX_NDD)
    && CONST_INT_P (operands[2])
    && INTVAL (operands[2]) == INTVAL (operands[6])"
-  "adc{<imodesuffix>}\t{%2, %0|%0, %2}"
-  [(set_attr "type" "alu")
+  "@
+  adc{<imodesuffix>}\t{%2, %0|%0, %2}
+  adc{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "isa" "*,apx_ndd")
+   (set_attr "type" "alu")
    (set_attr "mode" "<MODE>")
    (set (attr "length_immediate")
      (if_then_else (match_test "IN_RANGE (INTVAL (operands[2]), -128, 127)")
        (minus:SDWIM (match_operand:SDWIM 1 "nonimmediate_operand")
                     (match_operand:SDWIM 2 "<general_hilo_operand>")))]
   ""
-  "ix86_expand_binary_operator (MINUS, <MODE>mode, operands); DONE;")
+{
+  ix86_expand_binary_operator (MINUS, <MODE>mode, operands, TARGET_APX_NDD);
+  DONE;
+})
 
 (define_insn_and_split "*sub<dwi>3_doubleword"
-  [(set (match_operand:<DWI> 0 "nonimmediate_operand" "=ro,r")
+  [(set (match_operand:<DWI> 0 "nonimmediate_operand" "=ro,r,&r,&r")
        (minus:<DWI>
-         (match_operand:<DWI> 1 "nonimmediate_operand" "0,0")
-         (match_operand:<DWI> 2 "x86_64_hilo_general_operand" "r<di>,o")))
+         (match_operand:<DWI> 1 "nonimmediate_operand" "0,0,ro,r")
+         (match_operand:<DWI> 2 "x86_64_hilo_general_operand" "r<di>,o,r<di>,o")))
    (clobber (reg:CC FLAGS_REG))]
-  "ix86_binary_operator_ok (MINUS, <MODE>mode, operands)"
+  "ix86_binary_operator_ok (MINUS, <MODE>mode, operands, TARGET_APX_NDD)"
   "#"
   "&& reload_completed"
   [(parallel [(set (reg:CC FLAGS_REG)
   split_double_mode (<DWI>mode, &operands[0], 3, &operands[0], &operands[3]);
   if (operands[2] == const0_rtx)
     {
-      ix86_expand_binary_operator (MINUS, <MODE>mode, &operands[3]);
+      if (!rtx_equal_p (operands[0], operands[1]))
+       emit_move_insn (operands[0], operands[1]);
+      ix86_expand_binary_operator (MINUS, <MODE>mode, &operands[3],
+                                  TARGET_APX_NDD);
       DONE;
     }
-})
+}
+[(set_attr "isa" "*,*,apx_ndd,apx_ndd")])
 
 (define_insn_and_split "*sub<dwi>3_doubleword_zext"
-  [(set (match_operand:<DWI> 0 "nonimmediate_operand" "=r,o")
+  [(set (match_operand:<DWI> 0 "nonimmediate_operand" "=r,o,&r,&r")
        (minus:<DWI>
-         (match_operand:<DWI> 1 "nonimmediate_operand" "0,0")
+         (match_operand:<DWI> 1 "nonimmediate_operand" "0,0,r,o")
          (zero_extend:<DWI>
-           (match_operand:DWIH 2 "nonimmediate_operand" "rm,r"))))
+           (match_operand:DWIH 2 "nonimmediate_operand" "rm,r,rm,r"))))
    (clobber (reg:CC FLAGS_REG))]
-  "ix86_binary_operator_ok (UNKNOWN, <DWI>mode, operands)"
+  "ix86_binary_operator_ok (UNKNOWN, <DWI>mode, operands, TARGET_APX_NDD)"
   "#"
   "&& reload_completed"
   [(parallel [(set (reg:CC FLAGS_REG)
                       (ltu:DWIH (reg:CC FLAGS_REG) (const_int 0)))
                     (const_int 0)))
              (clobber (reg:CC FLAGS_REG))])]
-  "split_double_mode (<DWI>mode, &operands[0], 2, &operands[0], &operands[3]);")
+  "split_double_mode (<DWI>mode, &operands[0], 2, &operands[0], &operands[3]);"
+[(set_attr "isa" "*,*,apx_ndd,apx_ndd")])
 
 (define_insn "*sub<mode>_1"
-  [(set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m,<r>")
+  [(set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m,<r>,r,r,r")
        (minus:SWI
-         (match_operand:SWI 1 "nonimmediate_operand" "0,0")
-         (match_operand:SWI 2 "<general_operand>" "<r><i>,<m>")))
+         (match_operand:SWI 1 "nonimmediate_operand" "0,0,rm,rjM,r")
+         (match_operand:SWI 2 "<general_operand>" "<r><i>,<m>,r,<i>,<m>")))
    (clobber (reg:CC FLAGS_REG))]
-  "ix86_binary_operator_ok (MINUS, <MODE>mode, operands)"
-  "sub{<imodesuffix>}\t{%2, %0|%0, %2}"
-  [(set_attr "type" "alu")
+  "ix86_binary_operator_ok (MINUS, <MODE>mode, operands, TARGET_APX_NDD)"
+  "@
+  sub{<imodesuffix>}\t{%2, %0|%0, %2}
+  sub{<imodesuffix>}\t{%2, %0|%0, %2}
+  sub{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}
+  sub{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}
+  sub{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "isa" "*,*,apx_ndd,apx_ndd,apx_ndd")
+   (set_attr "type" "alu")
    (set_attr "mode" "<MODE>")])
 
 (define_insn "*subsi_1_zext"
-  [(set (match_operand:DI 0 "register_operand" "=r")
+  [(set (match_operand:DI 0 "register_operand" "=r,r,r")
        (zero_extend:DI
-         (minus:SI (match_operand:SI 1 "register_operand" "0")
-                   (match_operand:SI 2 "x86_64_general_operand" "rBMe"))))
+         (minus:SI (match_operand:SI 1 "nonimmediate_operand" "0,r,rm")
+                   (match_operand:SI 2 "x86_64_general_operand" "rBMe,rBMe,re"))))
    (clobber (reg:CC FLAGS_REG))]
-  "TARGET_64BIT && ix86_binary_operator_ok (MINUS, SImode, operands)"
-  "sub{l}\t{%2, %k0|%k0, %2}"
-  [(set_attr "type" "alu")
+  "TARGET_64BIT
+   && ix86_binary_operator_ok (MINUS, SImode, operands, TARGET_APX_NDD)"
+  "@
+  sub{l}\t{%2, %k0|%k0, %2}
+  sub{l}\t{%2, %1, %k0|%k0, %1, %2}
+  sub{l}\t{%2, %1, %k0|%k0, %1, %2}"
+  [(set_attr "isa" "*,apx_ndd,apx_ndd")
+   (set_attr "type" "alu")
    (set_attr "mode" "SI")])
 
 ;; Alternative 1 is needed to work around LRA limitation, see PR82524.
   [(set (reg FLAGS_REG)
        (compare
          (minus:SWI
-           (match_operand:SWI 1 "nonimmediate_operand" "0,0")
-           (match_operand:SWI 2 "<general_operand>" "<r><i>,<m>"))
+           (match_operand:SWI 1 "nonimmediate_operand" "0,0,rm,r")
+           (match_operand:SWI 2 "<general_operand>" "<r><i>,<m>,r<i>,<m>"))
          (const_int 0)))
-   (set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m,<r>")
+   (set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m,<r>,r,r")
        (minus:SWI (match_dup 1) (match_dup 2)))]
   "ix86_match_ccmode (insn, CCGOCmode)
-   && ix86_binary_operator_ok (MINUS, <MODE>mode, operands)"
-  "sub{<imodesuffix>}\t{%2, %0|%0, %2}"
-  [(set_attr "type" "alu")
+   && ix86_binary_operator_ok (MINUS, <MODE>mode, operands, TARGET_APX_NDD)"
+  "@
+  sub{<imodesuffix>}\t{%2, %0|%0, %2}
+  sub{<imodesuffix>}\t{%2, %0|%0, %2}
+  sub{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}
+  sub{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "isa" "*,*,apx_ndd,apx_ndd")
+   (set_attr "type" "alu")
    (set_attr "mode" "<MODE>")])
 
 (define_insn "*subsi_2_zext"
   [(set (reg FLAGS_REG)
        (compare
-         (minus:SI (match_operand:SI 1 "register_operand" "0")
-                   (match_operand:SI 2 "x86_64_general_operand" "rBMe"))
+         (minus:SI (match_operand:SI 1 "nonimmediate_operand" "0,r,rm")
+                   (match_operand:SI 2 "x86_64_general_operand" "rBMe,rBMe,re"))
          (const_int 0)))
-   (set (match_operand:DI 0 "register_operand" "=r")
+   (set (match_operand:DI 0 "register_operand" "=r,r,r")
        (zero_extend:DI
          (minus:SI (match_dup 1)
                    (match_dup 2))))]
   "TARGET_64BIT && ix86_match_ccmode (insn, CCGOCmode)
-   && ix86_binary_operator_ok (MINUS, SImode, operands)"
-  "sub{l}\t{%2, %k0|%k0, %2}"
-  [(set_attr "type" "alu")
+   && ix86_binary_operator_ok (MINUS, SImode, operands, TARGET_APX_NDD)"
+  "@
+  sub{l}\t{%2, %k0|%k0, %2}
+  sub{l}\t{%2, %1, %k0|%k0, %1, %2}
+  sub{l}\t{%2, %1, %k0|%k0, %1, %2}"
+  [(set_attr "isa" "*,apx_ndd,apx_ndd")
+   (set_attr "type" "alu")
    (set_attr "mode" "SI")])
 
 (define_insn "*subqi_ext<mode>_0"
               (pc)))]
   ""
 {
-  ix86_fixup_binary_operands_no_copy (MINUS, <MODE>mode, operands);
+  ix86_fixup_binary_operands_no_copy (MINUS, <MODE>mode, operands,
+                                     TARGET_APX_NDD);
   if (CONST_SCALAR_INT_P (operands[2]))
     operands[4] = operands[2];
   else
   [(set (reg:CCO FLAGS_REG)
        (eq:CCO (minus:<DWI>
                   (sign_extend:<DWI>
-                     (match_operand:SWI 1 "nonimmediate_operand" "0,0"))
+                     (match_operand:SWI 1 "nonimmediate_operand" "0,0,rm,r"))
                   (sign_extend:<DWI>
-                     (match_operand:SWI 2 "<general_sext_operand>" "<r>We,m")))
+                     (match_operand:SWI 2 "<general_sext_operand>" "<r>We,m,rWe,m")))
                (sign_extend:<DWI>
                   (minus:SWI (match_dup 1) (match_dup 2)))))
-   (set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m,<r>")
+   (set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m,<r>,r,r")
        (minus:SWI (match_dup 1) (match_dup 2)))]
-  "ix86_binary_operator_ok (MINUS, <MODE>mode, operands)"
-  "sub{<imodesuffix>}\t{%2, %0|%0, %2}"
-  [(set_attr "type" "alu")
+  "ix86_binary_operator_ok (MINUS, <MODE>mode, operands, TARGET_APX_NDD)"
+  "@
+  sub{<imodesuffix>}\t{%2, %0|%0, %2}
+  sub{<imodesuffix>}\t{%2, %0|%0, %2}
+  sub{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}
+  sub{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "isa" "*,*,apx_ndd,apx_ndd")
+   (set_attr "type" "alu")
    (set_attr "mode" "<MODE>")])
 
 (define_insn "subv<mode>4_1"
   [(set (reg:CCO FLAGS_REG)
        (eq:CCO (minus:<DWI>
                   (sign_extend:<DWI>
-                     (match_operand:SWI 1 "nonimmediate_operand" "0"))
+                     (match_operand:SWI 1 "nonimmediate_operand" "0,rm"))
                   (match_operand:<DWI> 3 "const_int_operand"))
                (sign_extend:<DWI>
                   (minus:SWI
                     (match_dup 1)
-                    (match_operand:SWI 2 "x86_64_immediate_operand" "<i>")))))
-   (set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m")
+                    (match_operand:SWI 2 "x86_64_immediate_operand" "<i>,<i>")))))
+   (set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m,r")
        (minus:SWI (match_dup 1) (match_dup 2)))]
-  "ix86_binary_operator_ok (MINUS, <MODE>mode, operands)
+  "ix86_binary_operator_ok (MINUS, <MODE>mode, operands, TARGET_APX_NDD)
    && CONST_INT_P (operands[2])
    && INTVAL (operands[2]) == INTVAL (operands[3])"
-  "sub{<imodesuffix>}\t{%2, %0|%0, %2}"
-  [(set_attr "type" "alu")
+  "@
+  sub{<imodesuffix>}\t{%2, %0|%0, %2}
+  sub{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "isa" "*,apx_ndd")
+   (set_attr "type" "alu")
    (set_attr "mode" "<MODE>")
    (set (attr "length_immediate")
        (cond [(match_test "IN_RANGE (INTVAL (operands[2]), -128, 127)")
        (eq:CCO
          (minus:<QPWI>
            (sign_extend:<QPWI>
-             (match_operand:<DWI> 1 "nonimmediate_operand" "0,0"))
+             (match_operand:<DWI> 1 "nonimmediate_operand" "0,0,ro,r"))
            (sign_extend:<QPWI>
-             (match_operand:<DWI> 2 "nonimmediate_operand" "r,o")))
+             (match_operand:<DWI> 2 "nonimmediate_operand" "r,o,r,o")))
          (sign_extend:<QPWI>
            (minus:<DWI> (match_dup 1) (match_dup 2)))))
-   (set (match_operand:<DWI> 0 "nonimmediate_operand" "=ro,r")
+   (set (match_operand:<DWI> 0 "nonimmediate_operand" "=ro,r,&r,&r")
        (minus:<DWI> (match_dup 1) (match_dup 2)))]
-  "ix86_binary_operator_ok (MINUS, <MODE>mode, operands)"
+  "ix86_binary_operator_ok (MINUS, <MODE>mode, operands, TARGET_APX_NDD)"
   "#"
   "&& reload_completed"
   [(parallel [(set (reg:CC FLAGS_REG)
                     (match_dup 5)))])]
 {
   split_double_mode (<DWI>mode, &operands[0], 3, &operands[0], &operands[3]);
-})
+}
+[(set_attr "isa" "*,*,apx_ndd,apx_ndd")])
 
 (define_insn_and_split "*subv<dwi>4_doubleword_1"
   [(set (reg:CCO FLAGS_REG)
        (eq:CCO
          (minus:<QPWI>
            (sign_extend:<QPWI>
-             (match_operand:<DWI> 1 "nonimmediate_operand" "0"))
+             (match_operand:<DWI> 1 "nonimmediate_operand" "0,ro"))
            (match_operand:<QPWI> 3 "const_scalar_int_operand"))
          (sign_extend:<QPWI>
            (minus:<DWI>
              (match_dup 1)
-             (match_operand:<DWI> 2 "x86_64_hilo_general_operand" "<di>")))))
-   (set (match_operand:<DWI> 0 "nonimmediate_operand" "=ro")
+             (match_operand:<DWI> 2 "x86_64_hilo_general_operand" "<di>,<di>")))))
+   (set (match_operand:<DWI> 0 "nonimmediate_operand" "=ro,&r")
        (minus:<DWI> (match_dup 1) (match_dup 2)))]
-  "ix86_binary_operator_ok (MINUS, <MODE>mode, operands)
+  "ix86_binary_operator_ok (MINUS, <MODE>mode, operands, TARGET_APX_NDD)
    && CONST_SCALAR_INT_P (operands[2])
    && rtx_equal_p (operands[2], operands[3])"
   "#"
   split_double_mode (<DWI>mode, &operands[0], 3, &operands[0], &operands[3]);
   if (operands[2] == const0_rtx)
     {
+      if (!rtx_equal_p (operands[0], operands[1]))
+       emit_move_insn (operands[0], operands[1]);
       emit_insn (gen_subv<mode>4_1 (operands[3], operands[4], operands[5],
                                    operands[5]));
       DONE;
     }
-})
+}
+[(set_attr "isa" "*,apx_ndd")])
 
 (define_insn "*subv<mode>4_overflow_1"
   [(set (reg:CCO FLAGS_REG)
          (minus:<DWI>
            (minus:<DWI>
              (sign_extend:<DWI>
-               (match_operand:SWI 1 "nonimmediate_operand" "%0,0"))
+               (match_operand:SWI 1 "nonimmediate_operand" "%0,0,rm,r"))
              (match_operator:<DWI> 4 "ix86_carry_flag_operator"
                [(match_operand 3 "flags_reg_operand") (const_int 0)]))
            (sign_extend:<DWI>
-             (match_operand:SWI 2 "<general_sext_operand>" "rWe,m")))
+             (match_operand:SWI 2 "<general_sext_operand>" "rWe,m,rWe,m")))
          (sign_extend:<DWI>
            (minus:SWI
              (minus:SWI
                (match_operator:SWI 5 "ix86_carry_flag_operator"
                  [(match_dup 3) (const_int 0)]))
              (match_dup 2)))))
-   (set (match_operand:SWI 0 "nonimmediate_operand" "=rm,r")
+   (set (match_operand:SWI 0 "nonimmediate_operand" "=rm,r,r,r")
        (minus:SWI
          (minus:SWI
            (match_dup 1)
            (match_op_dup 5 [(match_dup 3) (const_int 0)]))
          (match_dup 2)))]
-  "ix86_binary_operator_ok (MINUS, <MODE>mode, operands)"
-  "sbb{<imodesuffix>}\t{%2, %0|%0, %2}"
-  [(set_attr "type" "alu")
+  "ix86_binary_operator_ok (MINUS, <MODE>mode, operands, TARGET_APX_NDD)"
+  "@
+  sbb{<imodesuffix>}\t{%2, %0|%0, %2}
+  sbb{<imodesuffix>}\t{%2, %0|%0, %2}
+  sbb{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}
+  sbb{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "isa" "*,*,apx_ndd,apx_ndd")
+   (set_attr "type" "alu")
    (set_attr "mode" "<MODE>")])
 
 (define_insn "*subv<mode>4_overflow_2"
          (minus:<DWI>
            (minus:<DWI>
              (sign_extend:<DWI>
-               (match_operand:SWI 1 "nonimmediate_operand" "%0"))
+               (match_operand:SWI 1 "nonimmediate_operand" "%0,rm"))
              (match_operator:<DWI> 4 "ix86_carry_flag_operator"
                [(match_operand 3 "flags_reg_operand") (const_int 0)]))
-           (match_operand:<DWI> 6 "const_int_operand" "n"))
+           (match_operand:<DWI> 6 "const_int_operand" "n,n"))
          (sign_extend:<DWI>
            (minus:SWI
              (minus:SWI
                (match_dup 1)
                (match_operator:SWI 5 "ix86_carry_flag_operator"
                  [(match_dup 3) (const_int 0)]))
-             (match_operand:SWI 2 "x86_64_immediate_operand" "e")))))
-   (set (match_operand:SWI 0 "nonimmediate_operand" "=rm")
+             (match_operand:SWI 2 "x86_64_immediate_operand" "e,e")))))
+   (set (match_operand:SWI 0 "nonimmediate_operand" "=rm,r")
        (minus:SWI
          (minus:SWI
            (match_dup 1)
            (match_op_dup 5 [(match_dup 3) (const_int 0)]))
          (match_dup 2)))]
-  "ix86_binary_operator_ok (MINUS, <MODE>mode, operands)
+  "ix86_binary_operator_ok (MINUS, <MODE>mode, operands, TARGET_APX_NDD)
    && CONST_INT_P (operands[2])
    && INTVAL (operands[2]) == INTVAL (operands[6])"
-  "sbb{<imodesuffix>}\t{%2, %0|%0, %2}"
-  [(set_attr "type" "alu")
+  "@
+  sbb{<imodesuffix>}\t{%2, %0|%0, %2}
+  sbb{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "isa" "*,apx_ndd")
+   (set_attr "type" "alu")
    (set_attr "mode" "<MODE>")
    (set (attr "length_immediate")
      (if_then_else (match_test "IN_RANGE (INTVAL (operands[2]), -128, 127)")
               (label_ref (match_operand 3))
               (pc)))]
   ""
-  "ix86_fixup_binary_operands_no_copy (MINUS, <MODE>mode, operands);")
+  "ix86_fixup_binary_operands_no_copy (MINUS, <MODE>mode, operands,
+                                      TARGET_APX_NDD);")
 
 (define_insn "*sub<mode>_3"
   [(set (reg FLAGS_REG)
-       (compare (match_operand:SWI 1 "nonimmediate_operand" "0,0")
-                (match_operand:SWI 2 "<general_operand>" "<r><i>,<m>")))
-   (set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m,<r>")
+       (compare (match_operand:SWI 1 "nonimmediate_operand" "0,0,rm,r")
+                (match_operand:SWI 2 "<general_operand>" "<r><i>,<m>,r<i>,<m>")))
+   (set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m,<r>i,r,r")
        (minus:SWI (match_dup 1) (match_dup 2)))]
   "ix86_match_ccmode (insn, CCmode)
-   && ix86_binary_operator_ok (MINUS, <MODE>mode, operands)"
-  "sub{<imodesuffix>}\t{%2, %0|%0, %2}"
-  [(set_attr "type" "alu")
+   && ix86_binary_operator_ok (MINUS, <MODE>mode, operands, TARGET_APX_NDD)"
+  "@
+  sub{<imodesuffix>}\t{%2, %0|%0, %2}
+  sub{<imodesuffix>}\t{%2, %0|%0, %2}
+  sub{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}
+  sub{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "isa" "*,*,apx_ndd,apx_ndd")
+   (set_attr "type" "alu")
    (set_attr "mode" "<MODE>")])
 
 (define_peephole2
 
 (define_insn "*subsi_3_zext"
   [(set (reg FLAGS_REG)
-       (compare (match_operand:SI 1 "register_operand" "0")
-                (match_operand:SI 2 "x86_64_general_operand" "rBMe")))
-   (set (match_operand:DI 0 "register_operand" "=r")
+       (compare (match_operand:SI 1 "nonimmediate_operand" "0,r,rm")
+                (match_operand:SI 2 "x86_64_general_operand" "rBMe,rBMe,re")))
+   (set (match_operand:DI 0 "register_operand" "=r,r,r")
        (zero_extend:DI
          (minus:SI (match_dup 1)
                    (match_dup 2))))]
   "TARGET_64BIT && ix86_match_ccmode (insn, CCmode)
-   && ix86_binary_operator_ok (MINUS, SImode, operands)"
-  "sub{l}\t{%2, %1|%1, %2}"
-  [(set_attr "type" "alu")
+   && ix86_binary_operator_ok (MINUS, SImode, operands, TARGET_APX_NDD)"
+  "@
+  sub{l}\t{%2, %1|%1, %2}
+  sub{l}\t{%2, %1, %k0|%k0, %1, %2}
+  sub{l}\t{%2, %1, %k0|%k0, %1, %2}"
+  [(set_attr "isa" "*,apx_ndd,apx_ndd")
+   (set_attr "type" "alu")
    (set_attr "mode" "SI")])
 \f
 ;; Add with carry and subtract with borrow
 
 (define_insn "@add<mode>3_carry"
-  [(set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m,<r>")
+  [(set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m,<r>,r,r")
        (plus:SWI
          (plus:SWI
            (match_operator:SWI 4 "ix86_carry_flag_operator"
             [(match_operand 3 "flags_reg_operand") (const_int 0)])
-           (match_operand:SWI 1 "nonimmediate_operand" "%0,0"))
-         (match_operand:SWI 2 "<general_operand>" "<r><i>,<m>")))
+           (match_operand:SWI 1 "nonimmediate_operand" "%0,0,rm,r"))
+         (match_operand:SWI 2 "<general_operand>" "<r><i>,<m>,r<i>,<m>")))
    (clobber (reg:CC FLAGS_REG))]
-  "ix86_binary_operator_ok (PLUS, <MODE>mode, operands)"
-  "adc{<imodesuffix>}\t{%2, %0|%0, %2}"
-  [(set_attr "type" "alu")
+  "ix86_binary_operator_ok (PLUS, <MODE>mode, operands, TARGET_APX_NDD)"
+  "@
+   adc{<imodesuffix>}\t{%2, %0|%0, %2}
+   adc{<imodesuffix>}\t{%2, %0|%0, %2}
+   adc{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}
+   adc{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "isa" "*,*,apx_ndd,apx_ndd")
+   (set_attr "type" "alu")
    (set_attr "use_carry" "1")
    (set_attr "pent_pair" "pu")
    (set_attr "mode" "<MODE>")])
    (set_attr "mode" "<MODE>")])
 
 (define_insn "*addsi3_carry_zext"
-  [(set (match_operand:DI 0 "register_operand" "=r")
+  [(set (match_operand:DI 0 "register_operand" "=r,r,r")
        (zero_extend:DI
          (plus:SI
            (plus:SI (match_operator:SI 3 "ix86_carry_flag_operator"
                      [(reg FLAGS_REG) (const_int 0)])
-                    (match_operand:SI 1 "register_operand" "%0"))
-           (match_operand:SI 2 "x86_64_general_operand" "rBMe"))))
+                    (match_operand:SI 1 "nonimmediate_operand" "%0,r,rm"))
+           (match_operand:SI 2 "x86_64_general_operand" "rBMe,rBMe,re"))))
    (clobber (reg:CC FLAGS_REG))]
-  "TARGET_64BIT && ix86_binary_operator_ok (PLUS, SImode, operands)"
-  "adc{l}\t{%2, %k0|%k0, %2}"
-  [(set_attr "type" "alu")
+  "TARGET_64BIT
+   && ix86_binary_operator_ok (PLUS, SImode, operands, TARGET_APX_NDD)"
+  "@
+  adc{l}\t{%2, %k0|%k0, %2}
+  adc{l}\t{%2, %1, %k0|%k0, %1, %2}
+  adc{l}\t{%2, %1, %k0|%k0, %1, %2}"
+  [(set_attr "isa" "*,apx_ndd,apx_ndd")
+   (set_attr "type" "alu")
    (set_attr "use_carry" "1")
    (set_attr "pent_pair" "pu")
    (set_attr "mode" "SI")])
 
 (define_insn "*addsi3_carry_zext_0"
-  [(set (match_operand:DI 0 "register_operand" "=r")
+  [(set (match_operand:DI 0 "register_operand" "=r,r")
        (zero_extend:DI
          (plus:SI (match_operator:SI 2 "ix86_carry_flag_operator"
                    [(reg FLAGS_REG) (const_int 0)])
-                  (match_operand:SI 1 "register_operand" "0"))))
+                  (match_operand:SI 1 "nonimmediate_operand" "0,rm"))))
    (clobber (reg:CC FLAGS_REG))]
   "TARGET_64BIT"
-  "adc{l}\t{$0, %k0|%k0, 0}"
-  [(set_attr "type" "alu")
+  "@
+  adc{l}\t{$0, %k0|%k0, 0}
+  adc{l}\t{$0, %1, %k0|%k0, %1, 0}"
+  [(set_attr "isa" "*,apx_ndd")
+   (set_attr "type" "alu")
    (set_attr "use_carry" "1")
    (set_attr "pent_pair" "pu")
    (set_attr "mode" "SI")])
 
 (define_insn "*addsi3_carry_zext_0r"
-  [(set (match_operand:DI 0 "register_operand" "=r")
+  [(set (match_operand:DI 0 "register_operand" "=r,r")
        (zero_extend:DI
          (plus:SI (match_operator:SI 2 "ix86_carry_flag_unset_operator"
                    [(reg FLAGS_REG) (const_int 0)])
-                  (match_operand:SI 1 "register_operand" "0"))))
+                  (match_operand:SI 1 "nonimmediate_operand" "0,rm"))))
    (clobber (reg:CC FLAGS_REG))]
   "TARGET_64BIT"
-  "sbb{l}\t{$-1, %k0|%k0, -1}"
-  [(set_attr "type" "alu")
+  "@
+  sbb{l}\t{$-1, %k0|%k0, -1}
+  sbb{l}\t{$-1, %1, %k0|%k0, %1, -1}"
+  [(set_attr "isa" "*,apx_ndd")
+   (set_attr "type" "alu")
    (set_attr "use_carry" "1")
    (set_attr "pent_pair" "pu")
    (set_attr "mode" "SI")])
              (plus:SWI48
                (match_operator:SWI48 5 "ix86_carry_flag_operator"
                  [(match_operand 3 "flags_reg_operand") (const_int 0)])
-               (match_operand:SWI48 1 "nonimmediate_operand" "%0,0"))
-             (match_operand:SWI48 2 "nonimmediate_operand" "r,rm")))
+               (match_operand:SWI48 1 "nonimmediate_operand" "%0,0,rm,r"))
+             (match_operand:SWI48 2 "nonimmediate_operand" "r,rm,r,m")))
          (plus:<DWI>
            (zero_extend:<DWI> (match_dup 2))
            (match_operator:<DWI> 4 "ix86_carry_flag_operator"
              [(match_dup 3) (const_int 0)]))))
-   (set (match_operand:SWI48 0 "nonimmediate_operand" "=rm,r")
+   (set (match_operand:SWI48 0 "nonimmediate_operand" "=rm,r,r,r")
        (plus:SWI48 (plus:SWI48 (match_op_dup 5
                                 [(match_dup 3) (const_int 0)])
                                (match_dup 1))
                    (match_dup 2)))]
-  "ix86_binary_operator_ok (PLUS, <MODE>mode, operands)"
-  "adc{<imodesuffix>}\t{%2, %0|%0, %2}"
-  [(set_attr "type" "alu")
+  "ix86_binary_operator_ok (PLUS, <MODE>mode, operands, TARGET_APX_NDD)"
+  "@
+  adc{<imodesuffix>}\t{%2, %0|%0, %2}
+  adc{<imodesuffix>}\t{%2, %0|%0, %2}
+  adc{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}
+  adc{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "isa" "*,*,apx_ndd,apx_ndd")
+   (set_attr "type" "alu")
    (set_attr "use_carry" "1")
    (set_attr "pent_pair" "pu")
    (set_attr "mode" "<MODE>")])
             (match_dup 1)))
       (set (match_operand:SWI48 0 "nonimmediate_operand")
           (plus:SWI48 (match_dup 1) (match_dup 2)))])]
-  "ix86_binary_operator_ok (PLUS, <MODE>mode, operands)")
+  "ix86_binary_operator_ok (PLUS, <MODE>mode, operands, TARGET_APX_NDD)")
 
 (define_insn "*addcarry<mode>_1"
   [(set (reg:CCC FLAGS_REG)
              (plus:SWI48
                (match_operator:SWI48 5 "ix86_carry_flag_operator"
                  [(match_operand 3 "flags_reg_operand") (const_int 0)])
-               (match_operand:SWI48 1 "nonimmediate_operand" "%0"))
-             (match_operand:SWI48 2 "x86_64_immediate_operand" "e")))
+               (match_operand:SWI48 1 "nonimmediate_operand" "%0,rm"))
+             (match_operand:SWI48 2 "x86_64_immediate_operand" "e,e")))
          (plus:<DWI>
            (match_operand:<DWI> 6 "const_scalar_int_operand")
            (match_operator:<DWI> 4 "ix86_carry_flag_operator"
              [(match_dup 3) (const_int 0)]))))
-   (set (match_operand:SWI48 0 "nonimmediate_operand" "=rm")
+   (set (match_operand:SWI48 0 "nonimmediate_operand" "=rm,r")
        (plus:SWI48 (plus:SWI48 (match_op_dup 5
                                 [(match_dup 3) (const_int 0)])
                                (match_dup 1))
                    (match_dup 2)))]
-  "ix86_binary_operator_ok (PLUS, <MODE>mode, operands)
+  "ix86_binary_operator_ok (PLUS, <MODE>mode, operands, TARGET_APX_NDD)
    && CONST_INT_P (operands[2])
    /* Check that operands[6] is operands[2] zero extended from
       <MODE>mode to <DWI>mode.  */
          && ((unsigned HOST_WIDE_INT) CONST_WIDE_INT_ELT (operands[6], 0)
              == UINTVAL (operands[2]))
          && CONST_WIDE_INT_ELT (operands[6], 1) == 0))"
-  "adc{<imodesuffix>}\t{%2, %0|%0, %2}"
-  [(set_attr "type" "alu")
+  "@
+  adc{<imodesuffix>}\t{%2, %0|%0, %2}
+  adc{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "isa" "*,apx_ndd")
+   (set_attr "type" "alu")
    (set_attr "use_carry" "1")
    (set_attr "pent_pair" "pu")
    (set_attr "mode" "<MODE>")
        (const_string "4")))])
 
 (define_insn "@sub<mode>3_carry"
-  [(set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m,<r>")
+  [(set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m,<r>,r,r")
        (minus:SWI
          (minus:SWI
-           (match_operand:SWI 1 "nonimmediate_operand" "0,0")
+           (match_operand:SWI 1 "nonimmediate_operand" "0,0,rm,r")
            (match_operator:SWI 4 "ix86_carry_flag_operator"
             [(match_operand 3 "flags_reg_operand") (const_int 0)]))
-         (match_operand:SWI 2 "<general_operand>" "<r><i>,<m>")))
+         (match_operand:SWI 2 "<general_operand>" "<r><i>,<m>,r<i>,<m>")))
    (clobber (reg:CC FLAGS_REG))]
-  "ix86_binary_operator_ok (MINUS, <MODE>mode, operands)"
-  "sbb{<imodesuffix>}\t{%2, %0|%0, %2}"
-  [(set_attr "type" "alu")
+  "ix86_binary_operator_ok (MINUS, <MODE>mode, operands, TARGET_APX_NDD)"
+  "@
+  sbb{<imodesuffix>}\t{%2, %0|%0, %2}
+  sbb{<imodesuffix>}\t{%2, %0|%0, %2}
+  sbb{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}
+  sbb{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "isa" "*,*,apx_ndd,apx_ndd")
+   (set_attr "type" "alu")
    (set_attr "use_carry" "1")
    (set_attr "pent_pair" "pu")
    (set_attr "mode" "<MODE>")])
    (set_attr "mode" "<MODE>")])
 
 (define_insn "*subsi3_carry_zext"
-  [(set (match_operand:DI 0 "register_operand" "=r")
+  [(set (match_operand:DI 0 "register_operand" "=r,r,r")
        (zero_extend:DI
          (minus:SI
            (minus:SI
-             (match_operand:SI 1 "register_operand" "0")
+             (match_operand:SI 1 "nonimmediate_operand" "0,r,rm")
              (match_operator:SI 3 "ix86_carry_flag_operator"
               [(reg FLAGS_REG) (const_int 0)]))
-           (match_operand:SI 2 "x86_64_general_operand" "rBMe"))))
+           (match_operand:SI 2 "x86_64_general_operand" "rBMe,rBMe,re"))))
    (clobber (reg:CC FLAGS_REG))]
-  "TARGET_64BIT && ix86_binary_operator_ok (MINUS, SImode, operands)"
-  "sbb{l}\t{%2, %k0|%k0, %2}"
-  [(set_attr "type" "alu")
+  "TARGET_64BIT
+   && ix86_binary_operator_ok (MINUS, SImode, operands, TARGET_APX_NDD)"
+  "@
+  sbb{l}\t{%2, %k0|%k0, %2}
+  sbb{l}\t{%2, %1, %k0|%k0, %1, %2}
+  sbb{l}\t{%2, %1, %k0|%k0, %1, %2}"
+  [(set_attr "isa" "*,apx_ndd,apx_ndd")
+   (set_attr "type" "alu")
    (set_attr "use_carry" "1")
    (set_attr "pent_pair" "pu")
    (set_attr "mode" "SI")])
   [(set (reg:CCC FLAGS_REG)
        (compare:CCC
          (zero_extend:<DWI>
-           (match_operand:SWI48 1 "nonimmediate_operand" "0,0"))
+           (match_operand:SWI48 1 "nonimmediate_operand" "0,0,r,rm"))
          (plus:<DWI>
            (match_operator:<DWI> 4 "ix86_carry_flag_operator"
              [(match_operand 3 "flags_reg_operand") (const_int 0)])
            (zero_extend:<DWI>
-             (match_operand:SWI48 2 "nonimmediate_operand" "r,rm")))))
-   (set (match_operand:SWI48 0 "nonimmediate_operand" "=rm,r")
+             (match_operand:SWI48 2 "nonimmediate_operand" "r,rm,rm,r")))))
+   (set (match_operand:SWI48 0 "nonimmediate_operand" "=rm,r,r,r")
        (minus:SWI48 (minus:SWI48
                       (match_dup 1)
                       (match_operator:SWI48 5 "ix86_carry_flag_operator"
                         [(match_dup 3) (const_int 0)]))
                     (match_dup 2)))]
-  "ix86_binary_operator_ok (MINUS, <MODE>mode, operands)"
-  "sbb{<imodesuffix>}\t{%2, %0|%0, %2}"
-  [(set_attr "type" "alu")
+  "ix86_binary_operator_ok (MINUS, <MODE>mode, operands, TARGET_APX_NDD)"
+  "@
+  sbb{<imodesuffix>}\t{%2, %0|%0, %2}
+  sbb{<imodesuffix>}\t{%2, %0|%0, %2}
+  sbb{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}
+  sbb{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "isa" "*,*,apx_ndd,apx_ndd")
+   (set_attr "type" "alu")
    (set_attr "use_carry" "1")
    (set_attr "pent_pair" "pu")
    (set_attr "mode" "<MODE>")])
             (match_operand:SWI48 2 "<general_operand>")))
       (set (match_operand:SWI48 0 "register_operand")
           (minus:SWI48 (match_dup 1) (match_dup 2)))])]
-  "ix86_binary_operator_ok (MINUS, <MODE>mode, operands)")
+  "ix86_binary_operator_ok (MINUS, <MODE>mode, operands, TARGET_APX_NDD)")
 
 (define_expand "uaddc<mode>5"
   [(match_operand:SWI48 0 "register_operand")
   [(set (reg:CCC FLAGS_REG)
        (compare:CCC
          (plus:SWI
-           (match_operand:SWI 1 "nonimmediate_operand" "%0")
-           (match_operand:SWI 2 "<general_operand>" "<g>"))
+           (match_operand:SWI 1 "nonimmediate_operand" "%0,r,rm")
+           (match_operand:SWI 2 "<general_operand>" "<g>,<g>,re"))
          (match_dup 1)))
-   (clobber (match_scratch:SWI 0 "=<r>"))]
+   (clobber (match_scratch:SWI 0 "=<r>,r,r"))]
   "!(MEM_P (operands[1]) && MEM_P (operands[2]))"
-  "add{<imodesuffix>}\t{%2, %0|%0, %2}"
-  [(set_attr "type" "alu")
+  "@
+  add{<imodesuffix>}\t{%2, %0|%0, %2}
+  add{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}
+  add{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "isa" "*,apx_ndd,apx_ndd")
+   (set_attr "type" "alu")
    (set_attr "mode" "<MODE>")])
 
 (define_insn "@add<mode>3_cc_overflow_1"
   [(set (reg:CCC FLAGS_REG)
        (compare:CCC
            (plus:SWI
-               (match_operand:SWI 1 "nonimmediate_operand" "%0,0")
-               (match_operand:SWI 2 "<general_operand>" "<r><i>,<m>"))
+               (match_operand:SWI 1 "nonimmediate_operand" "%0,0,rm,rjM,r")
+               (match_operand:SWI 2 "<general_operand>" "<r><i>,<m>,r,<i>,<m>"))
            (match_dup 1)))
-   (set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m,<r>")
+   (set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m,<r>,r,r,r")
        (plus:SWI (match_dup 1) (match_dup 2)))]
-  "ix86_binary_operator_ok (PLUS, <MODE>mode, operands)"
-  "add{<imodesuffix>}\t{%2, %0|%0, %2}"
-  [(set_attr "type" "alu")
+  "ix86_binary_operator_ok (PLUS, <MODE>mode, operands, TARGET_APX_NDD)"
+  "@
+   add{<imodesuffix>}\t{%2, %0|%0, %2}
+   add{<imodesuffix>}\t{%2, %0|%0, %2}
+   add{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}
+   add{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}
+   add{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "isa" "*,*,apx_ndd,apx_ndd,apx_ndd")
+   (set_attr "type" "alu")
    (set_attr "mode" "<MODE>")])
 
 (define_peephole2
   [(set (reg:CCC FLAGS_REG)
        (compare:CCC
          (plus:SI
-           (match_operand:SI 1 "nonimmediate_operand" "%0")
-           (match_operand:SI 2 "x86_64_general_operand" "rBMe"))
+           (match_operand:SI 1 "nonimmediate_operand" "%0,r,rm")
+           (match_operand:SI 2 "x86_64_general_operand" "rBMe,rBMe,re"))
          (match_dup 1)))
-   (set (match_operand:DI 0 "register_operand" "=r")
+   (set (match_operand:DI 0 "register_operand" "=r,r,r")
        (zero_extend:DI (plus:SI (match_dup 1) (match_dup 2))))]
-  "TARGET_64BIT && ix86_binary_operator_ok (PLUS, SImode, operands)"
-  "add{l}\t{%2, %k0|%k0, %2}"
-  [(set_attr "type" "alu")
+  "TARGET_64BIT
+   && ix86_binary_operator_ok (PLUS, SImode, operands, TARGET_APX_NDD)"
+  "@
+  add{l}\t{%2, %k0|%k0, %2}
+  add{l}\t{%2, %1, %k0|%k0, %1, %2}
+  add{l}\t{%2, %1, %k0|%k0, %1, %2}"
+  [(set_attr "isa" "*,apx_ndd,apx_ndd")
+   (set_attr "type" "alu")
    (set_attr "mode" "SI")])
 
 (define_insn "*add<mode>3_cconly_overflow_2"
   [(set (reg:CCC FLAGS_REG)
        (compare:CCC
          (plus:SWI
-           (match_operand:SWI 1 "nonimmediate_operand" "%0")
-           (match_operand:SWI 2 "<general_operand>" "<g>"))
+           (match_operand:SWI 1 "nonimmediate_operand" "%0,r,rm")
+           (match_operand:SWI 2 "<general_operand>" "<g>,<g>,re"))
          (match_dup 2)))
-   (clobber (match_scratch:SWI 0 "=<r>"))]
+   (clobber (match_scratch:SWI 0 "=<r>,r,r"))]
   "!(MEM_P (operands[1]) && MEM_P (operands[2]))"
-  "add{<imodesuffix>}\t{%2, %0|%0, %2}"
-  [(set_attr "type" "alu")
+  "@
+  add{<imodesuffix>}\t{%2, %0|%0, %2}
+  add{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}
+  add{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "isa" "*,apx_ndd,apx_ndd")
+   (set_attr "type" "alu")
    (set_attr "mode" "<MODE>")])
 
 (define_insn "*add<mode>3_cc_overflow_2"
   [(set (reg:CCC FLAGS_REG)
        (compare:CCC
            (plus:SWI
-               (match_operand:SWI 1 "nonimmediate_operand" "%0,0")
-               (match_operand:SWI 2 "<general_operand>" "<r><i>,<m>"))
+               (match_operand:SWI 1 "nonimmediate_operand" "%0,0,rm,r")
+               (match_operand:SWI 2 "<general_operand>" "<r><i>,<m>,r<i>,<m>"))
            (match_dup 2)))
-   (set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m,<r>")
+   (set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m,<r>,r,r")
        (plus:SWI (match_dup 1) (match_dup 2)))]
-  "ix86_binary_operator_ok (PLUS, <MODE>mode, operands)"
-  "add{<imodesuffix>}\t{%2, %0|%0, %2}"
-  [(set_attr "type" "alu")
+  "ix86_binary_operator_ok (PLUS, <MODE>mode, operands, TARGET_APX_NDD)"
+  "@
+  add{<imodesuffix>}\t{%2, %0|%0, %2}
+  add{<imodesuffix>}\t{%2, %0|%0, %2}
+  add{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}
+  add{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "isa" "*,*,apx_ndd,apx_ndd")
+   (set_attr "type" "alu")
    (set_attr "mode" "<MODE>")])
 
 (define_insn "*addsi3_zext_cc_overflow_2"
   [(set (reg:CCC FLAGS_REG)
        (compare:CCC
          (plus:SI
-           (match_operand:SI 1 "nonimmediate_operand" "%0")
-           (match_operand:SI 2 "x86_64_general_operand" "rBMe"))
+           (match_operand:SI 1 "nonimmediate_operand" "%0,r,rm")
+           (match_operand:SI 2 "x86_64_general_operand" "rBMe,rBMe,re"))
          (match_dup 2)))
-   (set (match_operand:DI 0 "register_operand" "=r")
+   (set (match_operand:DI 0 "register_operand" "=r,r,r")
        (zero_extend:DI (plus:SI (match_dup 1) (match_dup 2))))]
-  "TARGET_64BIT && ix86_binary_operator_ok (PLUS, SImode, operands)"
-  "add{l}\t{%2, %k0|%k0, %2}"
-  [(set_attr "type" "alu")
+  "TARGET_64BIT
+   && ix86_binary_operator_ok (PLUS, SImode, operands, TARGET_APX_NDD)"
+  "@
+  add{l}\t{%2, %k0|%k0, %2}
+  add{l}\t{%2, %1, %k0|%k0, %1, %2}
+  add{l}\t{%2, %1, %k0|%k0, %1, %2}"
+  [(set_attr "isa" "*,apx_ndd,apx_ndd")
+   (set_attr "type" "alu")
    (set_attr "mode" "SI")])
 
 (define_insn_and_split "*add<dwi>3_doubleword_cc_overflow_1"
   [(set (reg:CCC FLAGS_REG)
        (compare:CCC
          (plus:<DWI>
-           (match_operand:<DWI> 1 "nonimmediate_operand" "%0,0")
-           (match_operand:<DWI> 2 "x86_64_hilo_general_operand" "r<di>,o"))
+           (match_operand:<DWI> 1 "nonimmediate_operand" "%0,0,ro,rjO,r")
+           (match_operand:<DWI> 2 "x86_64_hilo_general_operand" "r<di>,o,r,<di>,o"))
          (match_dup 1)))
-   (set (match_operand:<DWI> 0 "nonimmediate_operand" "=ro,r")
+   (set (match_operand:<DWI> 0 "nonimmediate_operand" "=ro,r,&r,&r,&r")
        (plus:<DWI> (match_dup 1) (match_dup 2)))]
-  "ix86_binary_operator_ok (PLUS, <DWI>mode, operands)"
+  "ix86_binary_operator_ok (PLUS, <DWI>mode, operands, TARGET_APX_NDD)"
   "#"
   "&& reload_completed"
   [(parallel [(set (reg:CCC FLAGS_REG)
   split_double_mode (<DWI>mode, &operands[0], 3, &operands[0], &operands[3]);
   if (operands[2] == const0_rtx)
     {
+      if (!rtx_equal_p (operands[0], operands[1]))
+       emit_move_insn (operands[0], operands[1]);
       emit_insn (gen_addcarry<mode>_0 (operands[3], operands[4], operands[5]));
       DONE;
     }
                                            operands[5], <MODE>mode);
   else
     operands[6] = gen_rtx_ZERO_EXTEND (<DWI>mode, operands[5]);
-})
+}
+[(set_attr "isa" "*,*,apx_ndd,apx_ndd,apx_ndd")])
 
 ;; x == 0 with zero flag test can be done also as x < 1U with carry flag
 ;; test, where the latter is preferrable if we have some carry consuming
            (match_operand:SWI 1 "nonimmediate_operand"))
          (match_operand:SWI 2 "<general_operand>")))
    (clobber (reg:CC FLAGS_REG))]
-  "ix86_binary_operator_ok (PLUS, <MODE>mode, operands)
+  "ix86_binary_operator_ok (PLUS, <MODE>mode, operands, TARGET_APX_NDD)
    && ix86_pre_reload_split ()"
   "#"
   "&& 1"
   "CONST_INT_P (operands[2])
    && (<MODE>mode != DImode
        || INTVAL (operands[2]) != HOST_WIDE_INT_C (-0x80000000))
-   && ix86_binary_operator_ok (PLUS, <MODE>mode, operands)
+   && ix86_binary_operator_ok (PLUS, <MODE>mode, operands, TARGET_APX_NDD)
    && ix86_pre_reload_split ()"
   "#"
   "&& 1"
                    (const_int 0)))
          (match_operand:SWI 2 "<general_operand>")))
    (clobber (reg:CC FLAGS_REG))]
-  "ix86_binary_operator_ok (MINUS, <MODE>mode, operands)
+  "ix86_binary_operator_ok (MINUS, <MODE>mode, operands, TARGET_APX_NDD)
    && ix86_pre_reload_split ()"
   "#"
   "&& 1"
   "CONST_INT_P (operands[2])
    && (<MODE>mode != DImode
        || INTVAL (operands[2]) != HOST_WIDE_INT_C (-0x80000000))
-   && ix86_binary_operator_ok (MINUS, <MODE>mode, operands)
+   && ix86_binary_operator_ok (MINUS, <MODE>mode, operands, TARGET_APX_NDD)
    && ix86_pre_reload_split ()"
   "#"
   "&& 1"
   "CONST_INT_P (operands[2])
    && (<MODE>mode != DImode
        || INTVAL (operands[2]) != HOST_WIDE_INT_C (-0x80000000))
-   && ix86_binary_operator_ok (MINUS, <MODE>mode, operands)
+   && ix86_binary_operator_ok (MINUS, <MODE>mode, operands, TARGET_APX_NDD)
    && ix86_pre_reload_split ()"
   "#"
   "&& 1"
               (operands[0], gen_lowpart (mode, operands[1]),
                <MODE>mode, mode, 1));
   else
-    ix86_expand_binary_operator (AND, <MODE>mode, operands);
+    ix86_expand_binary_operator (AND, <MODE>mode, operands, TARGET_APX_NDD);
 
   DONE;
 })
 
 (define_insn_and_split "*and<dwi>3_doubleword"
-  [(set (match_operand:<DWI> 0 "nonimmediate_operand" "=ro,r")
+  [(set (match_operand:<DWI> 0 "nonimmediate_operand" "=ro,r,&r,&r,&r")
        (and:<DWI>
-        (match_operand:<DWI> 1 "nonimmediate_operand" "%0,0")
-        (match_operand:<DWI> 2 "x86_64_hilo_general_operand" "r<di>,o")))
+        (match_operand:<DWI> 1 "nonimmediate_operand" "%0,0,ro,rjO,r")
+        (match_operand:<DWI> 2 "x86_64_hilo_general_operand" "r<di>,o,r,<di>,o")))
    (clobber (reg:CC FLAGS_REG))]
-  "ix86_binary_operator_ok (AND, <DWI>mode, operands)"
+  "ix86_binary_operator_ok (AND, <DWI>mode, operands, TARGET_APX_NDD)"
   "#"
   "&& reload_completed"
   [(const_int:DWIH 0)]
   if (operands[2] == const0_rtx)
     emit_move_insn (operands[0], const0_rtx);
   else if (operands[2] == constm1_rtx)
-    emit_insn_deleted_note_p = true;
+    {
+      if (!rtx_equal_p (operands[0], operands[1]))
+       emit_move_insn (operands[0], operands[1]);
+      else
+       emit_insn_deleted_note_p = true;
+    }
   else
-    ix86_expand_binary_operator (AND, <MODE>mode, &operands[0]);
+    ix86_expand_binary_operator (AND, <MODE>mode, &operands[0], TARGET_APX_NDD);
 
   if (operands[5] == const0_rtx)
     emit_move_insn (operands[3], const0_rtx);
   else if (operands[5] == constm1_rtx)
     {
-      if (emit_insn_deleted_note_p)
+      if (!rtx_equal_p (operands[3], operands[4]))
+       emit_move_insn (operands[3], operands[4]);
+      else if (emit_insn_deleted_note_p)
        emit_note (NOTE_INSN_DELETED);
     }
   else
-    ix86_expand_binary_operator (AND, <MODE>mode, &operands[3]);
+    ix86_expand_binary_operator (AND, <MODE>mode, &operands[3], TARGET_APX_NDD);
 
   DONE;
-})
+}
+[(set_attr "isa" "*,*,apx_ndd,apx_ndd,apx_ndd")])
 
 (define_insn "*anddi_1"
-  [(set (match_operand:DI 0 "nonimmediate_operand" "=r,rm,r,r,?k")
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=r,r,rm,r,r,r,r,r,?k")
        (and:DI
-        (match_operand:DI 1 "nonimmediate_operand" "%0,0,0,qm,k")
-        (match_operand:DI 2 "x86_64_szext_general_operand" "Z,re,m,L,k")))
+        (match_operand:DI 1 "nonimmediate_operand" "%0,r,0,0,rm,rjM,r,qm,k")
+        (match_operand:DI 2 "x86_64_szext_general_operand" "Z,Z,re,m,r,e,m,L,k")))
    (clobber (reg:CC FLAGS_REG))]
-  "TARGET_64BIT && ix86_binary_operator_ok (AND, DImode, operands)"
+  "TARGET_64BIT
+   && ix86_binary_operator_ok (AND, DImode, operands, TARGET_APX_NDD)"
   "@
    and{l}\t{%k2, %k0|%k0, %k2}
+   and{l}\t{%k2, %k1, %k0|%k0, %k1, %k2}
    and{q}\t{%2, %0|%0, %2}
    and{q}\t{%2, %0|%0, %2}
+   and{q}\t{%2, %1, %0|%0, %1, %2}
+   and{q}\t{%2, %1, %0|%0, %1, %2}
+   and{q}\t{%2, %1, %0|%0, %1, %2}
    #
    #"
-  [(set_attr "isa" "x64,x64,x64,x64,avx512bw_512")
-   (set_attr "type" "alu,alu,alu,imovx,msklog")
-   (set_attr "length_immediate" "*,*,*,0,*")
+  [(set_attr "isa" "x64,apx_ndd,x64,x64,apx_ndd,apx_ndd,apx_ndd,x64,avx512bw")
+   (set_attr "type" "alu,alu,alu,alu,alu,alu,alu,imovx,msklog")
+   (set_attr "length_immediate" "*,*,*,*,*,*,*,0,*")
    (set (attr "prefix_rex")
      (if_then_else
        (and (eq_attr "type" "imovx")
                 (match_operand 1 "ext_QIreg_operand")))
        (const_string "1")
        (const_string "*")))
-   (set_attr "mode" "SI,DI,DI,SI,DI")])
+   (set_attr "mode" "SI,SI,DI,DI,DI,DI,DI,SI,DI")])
 
 (define_insn_and_split "*anddi_1_btr"
   [(set (match_operand:DI 0 "nonimmediate_operand" "=rm")
 
 ;; See comment for addsi_1_zext why we do use nonimmediate_operand
 (define_insn "*andsi_1_zext"
-  [(set (match_operand:DI 0 "register_operand" "=r")
+  [(set (match_operand:DI 0 "register_operand" "=r,r,r,r")
        (zero_extend:DI
-         (and:SI (match_operand:SI 1 "nonimmediate_operand" "%0")
-                 (match_operand:SI 2 "x86_64_general_operand" "rBMe"))))
+         (and:SI (match_operand:SI 1 "nonimmediate_operand" "%0,rm,rjM,r")
+                 (match_operand:SI 2 "x86_64_general_operand" "rBMe,r,e,BM"))))
    (clobber (reg:CC FLAGS_REG))]
-  "TARGET_64BIT && ix86_binary_operator_ok (AND, SImode, operands)"
-  "and{l}\t{%2, %k0|%k0, %2}"
+  "TARGET_64BIT
+   && ix86_binary_operator_ok (AND, SImode, operands, TARGET_APX_NDD)"
+  "@
+  and{l}\t{%2, %k0|%k0, %2}
+  and{l}\t{%2, %1, %k0|%k0, %1, %2}
+  and{l}\t{%2, %1, %k0|%k0, %1, %2}
+  and{l}\t{%2, %1, %k0|%k0, %1, %2}"
   [(set_attr "type" "alu")
+   (set_attr "isa" "*,apx_ndd,apx_ndd,apx_ndd")
    (set_attr "mode" "SI")])
 
 (define_insn "*and<mode>_1"
-  [(set (match_operand:SWI24 0 "nonimmediate_operand" "=rm,r,Ya,?k")
-       (and:SWI24 (match_operand:SWI24 1 "nonimmediate_operand" "%0,0,qm,k")
-                  (match_operand:SWI24 2 "<general_operand>" "r<i>,<m>,L,k")))
+  [(set (match_operand:SWI24 0 "nonimmediate_operand" "=rm,r,r,r,r,Ya,?k")
+       (and:SWI24 (match_operand:SWI24 1 "nonimmediate_operand" "%0,0,rm,rjM,r,qm,k")
+                  (match_operand:SWI24 2 "<general_operand>" "r<i>,<m>,r,<i>,<m>,L,k")))
    (clobber (reg:CC FLAGS_REG))]
-  "ix86_binary_operator_ok (AND, <MODE>mode, operands)"
+  "ix86_binary_operator_ok (AND, <MODE>mode, operands, TARGET_APX_NDD)"
   "@
    and{<imodesuffix>}\t{%2, %0|%0, %2}
    and{<imodesuffix>}\t{%2, %0|%0, %2}
+   and{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}
+   and{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}
+   and{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}
    #
    #"
   [(set (attr "isa")
-       (cond [(eq_attr "alternative" "3")
+       (cond [(eq_attr "alternative" "2,3,4")
+                (const_string "apx_ndd")
+              (eq_attr "alternative" "6")
                 (if_then_else (eq_attr "mode" "SI")
                   (const_string "avx512bw")
                   (const_string "avx512f"))
              ]
              (const_string "*")))
-   (set_attr "type" "alu,alu,imovx,msklog")
-   (set_attr "length_immediate" "*,*,0,*")
+   (set_attr "type" "alu,alu,alu,alu,alu,imovx,msklog")
+   (set_attr "length_immediate" "*,*,*,*,*,0,*")
    (set (attr "prefix_rex")
      (if_then_else
        (and (eq_attr "type" "imovx")
                 (match_operand 1 "ext_QIreg_operand")))
        (const_string "1")
        (const_string "*")))
-   (set_attr "mode" "<MODE>,<MODE>,SI,<MODE>")])
+   (set_attr "mode" "<MODE>,<MODE>,<MODE>,<MODE>,<MODE>,SI,<MODE>")])
 
 (define_insn "*andqi_1"
-  [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,q,r,?k")
-       (and:QI (match_operand:QI 1 "nonimmediate_operand" "%0,0,0,k")
-               (match_operand:QI 2 "general_operand" "qn,m,rn,k")))
+  [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,q,r,r,r,?k")
+       (and:QI (match_operand:QI 1 "nonimmediate_operand" "%0,0,0,rm,r,k")
+               (match_operand:QI 2 "general_operand" "qn,m,rn,rn,m,k")))
    (clobber (reg:CC FLAGS_REG))]
-  "ix86_binary_operator_ok (AND, QImode, operands)"
+  "ix86_binary_operator_ok (AND, QImode, operands, TARGET_APX_NDD)"
   "@
    and{b}\t{%2, %0|%0, %2}
    and{b}\t{%2, %0|%0, %2}
    and{l}\t{%k2, %k0|%k0, %k2}
+   and{b}\t{%2, %1, %0|%0, %1, %2}
+   and{b}\t{%2, %1, %0|%0, %1, %2}
    #"
-  [(set_attr "type" "alu,alu,alu,msklog")
+  [(set_attr "type" "alu,alu,alu,alu,alu,msklog")
+   (set_attr "isa" "*,*,*,apx_ndd,apx_ndd,*")
    (set (attr "mode")
        (cond [(eq_attr "alternative" "2")
                 (const_string "SI")
-               (and (eq_attr "alternative" "3")
+               (and (eq_attr "alternative" "5")
                     (match_test "!TARGET_AVX512DQ"))
                 (const_string "HI")
               ]
    (clobber (reg:CC FLAGS_REG))]
   "reload_completed
    && (!REG_P (operands[1])
-       || REGNO (operands[0]) != REGNO (operands[1]))"
+       || REGNO (operands[0]) != REGNO (operands[1]))
+   && (UINTVAL (operands[2]) == GET_MODE_MASK (SImode)
+       || UINTVAL (operands[2]) == GET_MODE_MASK (HImode)
+       || UINTVAL (operands[2]) == GET_MODE_MASK (QImode))"
   [(const_int 0)]
 {
   unsigned HOST_WIDE_INT ival = UINTVAL (operands[2]);
   [(set (reg FLAGS_REG)
        (compare
         (and:DI
-         (match_operand:DI 1 "nonimmediate_operand" "%0,0,0")
-         (match_operand:DI 2 "x86_64_szext_general_operand" "Z,re,m"))
+         (match_operand:DI 1 "nonimmediate_operand" "%0,0,0,r,rm,r")
+         (match_operand:DI 2 "x86_64_szext_general_operand" "Z,re,m,Z,re,m"))
         (const_int 0)))
-   (set (match_operand:DI 0 "nonimmediate_operand" "=r,rm,r")
+   (set (match_operand:DI 0 "nonimmediate_operand" "=r,rm,r,r,r,r")
        (and:DI (match_dup 1) (match_dup 2)))]
   "TARGET_64BIT
    && ix86_match_ccmode
          && (!CONST_INT_P (operands[2])
              || val_signbit_known_set_p (SImode, INTVAL (operands[2]))))
         ? CCZmode : CCNOmode)
-   && ix86_binary_operator_ok (AND, DImode, operands)"
+   && ix86_binary_operator_ok (AND, DImode, operands, TARGET_APX_NDD)"
   "@
    and{l}\t{%k2, %k0|%k0, %k2}
    and{q}\t{%2, %0|%0, %2}
-   and{q}\t{%2, %0|%0, %2}"
+   and{q}\t{%2, %0|%0, %2}
+   and{l}\t{%k2, %k1, %k0|%k0, %k1, %k2}
+   and{q}\t{%2, %1, %0|%0, %1, %2}
+   and{q}\t{%2, %1, %0|%0, %1, %2}"
   [(set_attr "type" "alu")
-   (set_attr "mode" "SI,DI,DI")])
+   (set_attr "isa" "*,*,*,apx_ndd,apx_ndd,apx_ndd")
+   (set_attr "mode" "SI,DI,DI,SI,DI,DI")])
 
 ;; See comment for addsi_1_zext why we do use nonimmediate_operand
 (define_insn "*andsi_2_zext"
   [(set (reg FLAGS_REG)
        (compare (and:SI
-                 (match_operand:SI 1 "nonimmediate_operand" "%0")
-                 (match_operand:SI 2 "x86_64_general_operand" "rBMe"))
+                 (match_operand:SI 1 "nonimmediate_operand" "%0,rm,r")
+                 (match_operand:SI 2 "x86_64_general_operand" "rBMe,re,BM"))
                 (const_int 0)))
-   (set (match_operand:DI 0 "register_operand" "=r")
+   (set (match_operand:DI 0 "register_operand" "=r,r,r")
        (zero_extend:DI (and:SI (match_dup 1) (match_dup 2))))]
   "TARGET_64BIT && ix86_match_ccmode (insn, CCNOmode)
-   && ix86_binary_operator_ok (AND, SImode, operands)"
-  "and{l}\t{%2, %k0|%k0, %2}"
+   && ix86_binary_operator_ok (AND, SImode, operands, TARGET_APX_NDD)"
+  "@
+  and{l}\t{%2, %k0|%k0, %2}
+  and{l}\t{%2, %1, %k0|%k0, %1, %2}
+  and{l}\t{%2, %1, %k0|%k0, %1, %2}"
   [(set_attr "type" "alu")
+   (set_attr "isa" "*,apx_ndd,apx_ndd")
    (set_attr "mode" "SI")])
 
 (define_insn "*andqi_2_maybe_si"
   [(set (reg FLAGS_REG)
        (compare (and:QI
-                 (match_operand:QI 1 "nonimmediate_operand" "%0,0,0")
-                 (match_operand:QI 2 "general_operand" "qn,m,n"))
+                 (match_operand:QI 1 "nonimmediate_operand" "%0,0,0,rm,r")
+                 (match_operand:QI 2 "general_operand" "qn,m,n,rn,m"))
                 (const_int 0)))
-   (set (match_operand:QI 0 "nonimmediate_operand" "=qm,q,r")
+   (set (match_operand:QI 0 "nonimmediate_operand" "=qm,q,r,r,r")
        (and:QI (match_dup 1) (match_dup 2)))]
-  "ix86_binary_operator_ok (AND, QImode, operands)
+  "ix86_binary_operator_ok (AND, QImode, operands, TARGET_APX_NDD)
    && ix86_match_ccmode (insn,
                         CONST_INT_P (operands[2])
                         && INTVAL (operands[2]) >= 0 ? CCNOmode : CCZmode)"
         operands[2] = GEN_INT (INTVAL (operands[2]) & 0xff);
       return "and{l}\t{%2, %k0|%k0, %2}";
     }
+  if (which_alternative > 2)
+    return "and{b}\t{%2, %1, %0|%0, %1, %2}";
   return "and{b}\t{%2, %0|%0, %2}";
 }
   [(set_attr "type" "alu")
+   (set_attr "isa" "*,*,*,apx_ndd,apx_ndd")
    (set (attr "mode")
-     (cond [(eq_attr "alternative" "2")
+     (cond [(eq_attr "alternative" "3,4")
+             (const_string "QI")
+           (eq_attr "alternative" "2")
              (const_string "SI")
            (and (match_test "optimize_insn_for_size_p ()")
                 (and (match_operand 0 "ext_QIreg_operand")
 (define_insn "*and<mode>_2"
   [(set (reg FLAGS_REG)
        (compare (and:SWI124
-                 (match_operand:SWI124 1 "nonimmediate_operand" "%0,0")
-                 (match_operand:SWI124 2 "<general_operand>" "<r><i>,<m>"))
+                 (match_operand:SWI124 1 "nonimmediate_operand" "%0,0,rm,r")
+                 (match_operand:SWI124 2 "<general_operand>" "<r><i>,<m>,r<i>,<m>"))
                 (const_int 0)))
-   (set (match_operand:SWI124 0 "nonimmediate_operand" "=<r>m,<r>")
+   (set (match_operand:SWI124 0 "nonimmediate_operand" "=<r>m,<r>,r,r")
        (and:SWI124 (match_dup 1) (match_dup 2)))]
   "ix86_match_ccmode (insn, CCNOmode)
-   && ix86_binary_operator_ok (AND, <MODE>mode, operands)"
-  "and{<imodesuffix>}\t{%2, %0|%0, %2}"
+   && ix86_binary_operator_ok (AND, <MODE>mode, operands, TARGET_APX_NDD)"
+  "@
+  and{<imodesuffix>}\t{%2, %0|%0, %2}
+  and{<imodesuffix>}\t{%2, %0|%0, %2}
+  and{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}
+  and{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
   [(set_attr "type" "alu")
+   (set_attr "isa" "*,*,apx_ndd,apx_ndd")
    (set_attr "mode" "<MODE>")])
 
 (define_insn "*<code>qi_ext<mode>_0"
 ;; Don't do the splitting with memory operands, since it introduces risk
 ;; of memory mismatch stalls.  We may want to do the splitting for optimizing
 ;; for size, but that can (should?) be handled by generic code instead.
+;; Don't do the splitting for APX NDD as NDD does not support *h registers.
 (define_split
   [(set (match_operand:SWI248 0 "QIreg_operand")
        (and:SWI248 (match_operand:SWI248 1 "register_operand")
    (clobber (reg:CC FLAGS_REG))]
    "reload_completed
     && (!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))
-    && !(~INTVAL (operands[2]) & ~(255 << 8))"
+    && !(~INTVAL (operands[2]) & ~(255 << 8))
+    && !(TARGET_APX_NDD && REGNO (operands[0]) != REGNO (operands[1]))"
   [(parallel
      [(set (zero_extract:HI (match_dup 0)
                            (const_int 8)
    "reload_completed
     && (!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))
     && !(~INTVAL (operands[2]) & ~255)
-    && !(INTVAL (operands[2]) & 128)"
+    && !(INTVAL (operands[2]) & 128)
+    && !(TARGET_APX_NDD
+        && !rtx_equal_p (operands[0], operands[1]))"
   [(parallel [(set (strict_low_part (match_dup 0))
                   (and:QI (match_dup 1)
                           (match_dup 2)))
          (not:SWI48 (match_operand:SWI48 1 "register_operand" "r,r,k"))
          (match_operand:SWI48 2 "nonimmediate_operand" "r,m,k")))
    (clobber (reg:CC FLAGS_REG))]
-  "TARGET_BMI
-   || (TARGET_AVX512BW && (<MODE>mode == SImode || TARGET_EVEX512))"
+  "TARGET_BMI || TARGET_AVX512BW"
   "@
    andn\t{%2, %1, %0|%0, %1, %2}
    andn\t{%2, %1, %0|%0, %1, %2}
    #"
-  [(set_attr "isa" "bmi,bmi,<kmov_isa>")
+  [(set_attr "isa" "bmi,bmi,avx512bw")
    (set_attr "type" "bitmanip,bitmanip,msklog")
    (set_attr "btver2_decode" "direct, double,*")
    (set_attr "mode" "<MODE>")])
       && !x86_64_hilo_general_operand (operands[2], <MODE>mode))
     operands[2] = force_reg (<MODE>mode, operands[2]);
 
-  ix86_expand_binary_operator (<CODE>, <MODE>mode, operands);
+  ix86_expand_binary_operator (<CODE>, <MODE>mode, operands, TARGET_APX_NDD);
   DONE;
 })
 
 (define_insn_and_split "*<code><dwi>3_doubleword"
-  [(set (match_operand:<DWI> 0 "nonimmediate_operand" "=ro,r")
+  [(set (match_operand:<DWI> 0 "nonimmediate_operand" "=ro,r,&r,&r,&r")
        (any_or:<DWI>
-        (match_operand:<DWI> 1 "nonimmediate_operand" "%0,0")
-        (match_operand:<DWI> 2 "x86_64_hilo_general_operand" "r<di>,o")))
+        (match_operand:<DWI> 1 "nonimmediate_operand" "%0,0,ro,rjO,r")
+        (match_operand:<DWI> 2 "x86_64_hilo_general_operand" "r<di>,o,r,<di>,o")))
    (clobber (reg:CC FLAGS_REG))]
-  "ix86_binary_operator_ok (<CODE>, <DWI>mode, operands)"
+  "ix86_binary_operator_ok (<CODE>, <DWI>mode, operands, TARGET_APX_NDD)"
   "#"
   "&& reload_completed"
   [(const_int:DWIH 0)]
   split_double_mode (<DWI>mode, &operands[0], 3, &operands[0], &operands[3]);
 
   if (operands[2] == const0_rtx)
-    emit_insn_deleted_note_p = true;
+    {
+      if (!rtx_equal_p (operands[0], operands[1]))
+       emit_move_insn (operands[0], operands[1]);
+      else
+       emit_insn_deleted_note_p = true;
+    }
   else if (operands[2] == constm1_rtx)
     {
       if (<CODE> == IOR)
        emit_move_insn (operands[0], constm1_rtx);
       else
-       ix86_expand_unary_operator (NOT, <MODE>mode, &operands[0]);
+       ix86_expand_unary_operator (NOT, <MODE>mode, &operands[0],
+                                   TARGET_APX_NDD);
     }
   else
-    ix86_expand_binary_operator (<CODE>, <MODE>mode, &operands[0]);
+    ix86_expand_binary_operator (<CODE>, <MODE>mode, &operands[0],
+                                TARGET_APX_NDD);
 
   if (operands[5] == const0_rtx)
     {
-      if (emit_insn_deleted_note_p)
+      if (!rtx_equal_p (operands[3], operands[4]))
+       emit_move_insn (operands[3], operands[4]);
+      else if (emit_insn_deleted_note_p)
        emit_note (NOTE_INSN_DELETED);
     }
   else if (operands[5] == constm1_rtx)
       if (<CODE> == IOR)
        emit_move_insn (operands[3], constm1_rtx);
       else
-       ix86_expand_unary_operator (NOT, <MODE>mode, &operands[3]);
+       ix86_expand_unary_operator (NOT, <MODE>mode, &operands[3],
+                                   TARGET_APX_NDD);
     }
   else
-    ix86_expand_binary_operator (<CODE>, <MODE>mode, &operands[3]);
+    ix86_expand_binary_operator (<CODE>, <MODE>mode, &operands[3],
+                                TARGET_APX_NDD);
 
   DONE;
-})
+}
+[(set_attr "isa" "*,*,apx_ndd,apx_ndd,apx_ndd")])
 
 (define_insn "*<code><mode>_1"
-  [(set (match_operand:SWI248 0 "nonimmediate_operand" "=rm,r,?k")
+  [(set (match_operand:SWI248 0 "nonimmediate_operand" "=rm,r,r,r,r,?k")
        (any_or:SWI248
-        (match_operand:SWI248 1 "nonimmediate_operand" "%0,0,k")
-        (match_operand:SWI248 2 "<general_operand>" "r<i>,<m>,k")))
+        (match_operand:SWI248 1 "nonimmediate_operand" "%0,0,rm,rjM,r,k")
+        (match_operand:SWI248 2 "<general_operand>" "r<i>,<m>,r,<i>,<m>,k")))
    (clobber (reg:CC FLAGS_REG))]
-  "ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
+  "ix86_binary_operator_ok (<CODE>, <MODE>mode, operands, TARGET_APX_NDD)"
   "@
    <logic>{<imodesuffix>}\t{%2, %0|%0, %2}
    <logic>{<imodesuffix>}\t{%2, %0|%0, %2}
+   <logic>{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}
+   <logic>{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}
+   <logic>{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}
    #"
-  [(set_attr "isa" "*,*,<kmov_isa>")
-   (set_attr "type" "alu, alu, msklog")
+  [(set_attr "isa" "*,*,apx_ndd,apx_ndd,apx_ndd,<kmov_isa>")
+   (set_attr "type" "alu, alu, alu, alu, alu, msklog")
    (set_attr "mode" "<MODE>")])
 
 (define_insn_and_split "*notxor<mode>_1"
-  [(set (match_operand:SWI248 0 "nonimmediate_operand" "=rm,r,?k")
+  [(set (match_operand:SWI248 0 "nonimmediate_operand" "=rm,r,r,r,?k")
        (not:SWI248
          (xor:SWI248
-           (match_operand:SWI248 1 "nonimmediate_operand" "%0,0,k")
-           (match_operand:SWI248 2 "<general_operand>" "r<i>,<m>,k"))))
+           (match_operand:SWI248 1 "nonimmediate_operand" "%0,0,rm,r,k")
+           (match_operand:SWI248 2 "<general_operand>" "r<i>,<m>,r<i>,<m>,k"))))
    (clobber (reg:CC FLAGS_REG))]
-  "ix86_binary_operator_ok (XOR, <MODE>mode, operands)"
+  "ix86_binary_operator_ok (XOR, <MODE>mode, operands, TARGET_APX_NDD)"
   "#"
   "&& reload_completed"
   [(parallel
       DONE;
     }
 }
-  [(set_attr "isa" "*,*,<kmov_isa>")
-   (set_attr "type" "alu, alu, msklog")
+  [(set_attr "isa" "*,*,apx_ndd,apx_ndd,<kmov_isa>")
+   (set_attr "type" "alu, alu, alu, alu, msklog")
    (set_attr "mode" "<MODE>")])
 
 (define_insn_and_split "*iordi_1_bts"
 
 ;; See comment for addsi_1_zext why we do use nonimmediate_operand
 (define_insn "*<code>si_1_zext"
-  [(set (match_operand:DI 0 "register_operand" "=r")
+  [(set (match_operand:DI 0 "register_operand" "=r,r,r,r")
        (zero_extend:DI
-        (any_or:SI (match_operand:SI 1 "nonimmediate_operand" "%0")
-                   (match_operand:SI 2 "x86_64_general_operand" "rBMe"))))
+        (any_or:SI (match_operand:SI 1 "nonimmediate_operand" "%0,rm,rjM,r")
+                   (match_operand:SI 2 "x86_64_general_operand" "rBMe,r,e,BM"))))
    (clobber (reg:CC FLAGS_REG))]
-  "TARGET_64BIT && ix86_binary_operator_ok (<CODE>, SImode, operands)"
-  "<logic>{l}\t{%2, %k0|%k0, %2}"
+  "TARGET_64BIT
+   && ix86_binary_operator_ok (<CODE>, SImode, operands, TARGET_APX_NDD)"
+  "@
+  <logic>{l}\t{%2, %k0|%k0, %2}
+  <logic>{l}\t{%2, %1, %k0|%k0, %1, %2}
+  <logic>{l}\t{%2, %1, %k0|%k0, %1, %2}
+  <logic>{l}\t{%2, %1, %k0|%k0, %1, %2}"
   [(set_attr "type" "alu")
+   (set_attr "isa" "*,apx_ndd,apx_ndd,apx_ndd")
    (set_attr "mode" "SI")])
 
 (define_insn "*<code>si_1_zext_imm"
-  [(set (match_operand:DI 0 "register_operand" "=r")
+  [(set (match_operand:DI 0 "register_operand" "=r,r")
        (any_or:DI
-        (zero_extend:DI (match_operand:SI 1 "register_operand" "%0"))
-        (match_operand:DI 2 "x86_64_zext_immediate_operand" "Z")))
+        (zero_extend:DI (match_operand:SI 1 "nonimmediate_operand" "%0,rm"))
+        (match_operand:DI 2 "x86_64_zext_immediate_operand" "Z,Z")))
    (clobber (reg:CC FLAGS_REG))]
-  "TARGET_64BIT && ix86_binary_operator_ok (<CODE>, SImode, operands)"
-  "<logic>{l}\t{%2, %k0|%k0, %2}"
+  "TARGET_64BIT
+   && ix86_binary_operator_ok (<CODE>, SImode, operands, TARGET_APX_NDD)"
+  "@
+  <logic>{l}\t{%2, %k0|%k0, %2}
+  <logic>{l}\t{%2, %1, %k0|%k0, %1, %2}"
   [(set_attr "type" "alu")
+   (set_attr "isa" "*,apx_ndd")
    (set_attr "mode" "SI")])
 
 (define_insn "*<code>qi_1"
-  [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,q,r,?k")
-       (any_or:QI (match_operand:QI 1 "nonimmediate_operand" "%0,0,0,k")
-                  (match_operand:QI 2 "general_operand" "qn,m,rn,k")))
+  [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,q,r,r,r,?k")
+       (any_or:QI (match_operand:QI 1 "nonimmediate_operand" "%0,0,0,rm,r,k")
+                  (match_operand:QI 2 "general_operand" "qn,m,rn,rn,m,k")))
    (clobber (reg:CC FLAGS_REG))]
-  "ix86_binary_operator_ok (<CODE>, QImode, operands)"
+  "ix86_binary_operator_ok (<CODE>, QImode, operands, TARGET_APX_NDD)"
   "@
    <logic>{b}\t{%2, %0|%0, %2}
    <logic>{b}\t{%2, %0|%0, %2}
    <logic>{l}\t{%k2, %k0|%k0, %k2}
+   <logic>{b}\t{%2, %1, %0|%0, %1, %2}
+   <logic>{b}\t{%2, %1, %0|%0, %1, %2}
    #"
-  [(set_attr "isa" "*,*,*,avx512f")
-   (set_attr "type" "alu,alu,alu,msklog")
+  [(set_attr "isa" "*,*,*,apx_ndd,apx_ndd,avx512f")
+   (set_attr "type" "alu,alu,alu,alu,alu,msklog")
    (set (attr "mode")
        (cond [(eq_attr "alternative" "2")
                 (const_string "SI")
-               (and (eq_attr "alternative" "3")
+               (and (eq_attr "alternative" "5")
                     (match_test "!TARGET_AVX512DQ"))
                 (const_string "HI")
               ]
           (symbol_ref "true")))])
 
 (define_insn_and_split "*notxorqi_1"
-  [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,q,r,?k")
+  [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,q,r,r,r,?k")
        (not:QI
-         (xor:QI (match_operand:QI 1 "nonimmediate_operand" "%0,0,0,k")
-                 (match_operand:QI 2 "general_operand" "qn,m,rn,k"))))
+         (xor:QI (match_operand:QI 1 "nonimmediate_operand" "%0,0,0,rm,r,k")
+                 (match_operand:QI 2 "general_operand" "qn,m,rn,rn,m,k"))))
    (clobber (reg:CC FLAGS_REG))]
-  "ix86_binary_operator_ok (XOR, QImode, operands)"
+  "ix86_binary_operator_ok (XOR, QImode, operands, TARGET_APX_NDD)"
   "#"
   "&& reload_completed"
   [(parallel
       DONE;
     }
 }
-  [(set_attr "isa" "*,*,*,avx512f")
-   (set_attr "type" "alu,alu,alu,msklog")
+  [(set_attr "isa" "*,*,*,apx_ndd,apx_ndd,avx512f")
+   (set_attr "type" "alu,alu,alu,alu,alu,msklog")
    (set (attr "mode")
        (cond [(eq_attr "alternative" "2")
                 (const_string "SI")
-               (and (eq_attr "alternative" "3")
+               (and (eq_attr "alternative" "5")
                     (match_test "!TARGET_AVX512DQ"))
                 (const_string "HI")
               ]
 (define_insn "*<code><mode>_2"
   [(set (reg FLAGS_REG)
        (compare (any_or:SWI
-                 (match_operand:SWI 1 "nonimmediate_operand" "%0,0")
-                 (match_operand:SWI 2 "<general_operand>" "<r><i>,<m>"))
+                 (match_operand:SWI 1 "nonimmediate_operand" "%0,0,rm,r")
+                 (match_operand:SWI 2 "<general_operand>" "<r><i>,<m>,r<i>,<m>"))
                 (const_int 0)))
-   (set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m,<r>")
+   (set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m,<r>,r,r")
        (any_or:SWI (match_dup 1) (match_dup 2)))]
   "ix86_match_ccmode (insn, CCNOmode)
-   && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
-  "<logic>{<imodesuffix>}\t{%2, %0|%0, %2}"
+   && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands, TARGET_APX_NDD)"
+  "@
+  <logic>{<imodesuffix>}\t{%2, %0|%0, %2}
+  <logic>{<imodesuffix>}\t{%2, %0|%0, %2}
+  <logic>{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}
+  <logic>{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
   [(set_attr "type" "alu")
+   (set_attr "isa" "*,*,apx_ndd,apx_ndd")
    (set_attr "mode" "<MODE>")])
 
 ;; See comment for addsi_1_zext why we do use nonimmediate_operand
 ;; ??? Special case for immediate operand is missing - it is tricky.
 (define_insn "*<code>si_2_zext"
   [(set (reg FLAGS_REG)
-       (compare (any_or:SI (match_operand:SI 1 "nonimmediate_operand" "%0")
-                           (match_operand:SI 2 "x86_64_general_operand" "rBMe"))
+       (compare (any_or:SI (match_operand:SI 1 "nonimmediate_operand" "%0,rm,r")
+                           (match_operand:SI 2 "x86_64_general_operand" "rBMe,re,BM"))
                 (const_int 0)))
-   (set (match_operand:DI 0 "register_operand" "=r")
+   (set (match_operand:DI 0 "register_operand" "=r,r,r")
        (zero_extend:DI (any_or:SI (match_dup 1) (match_dup 2))))]
   "TARGET_64BIT && ix86_match_ccmode (insn, CCNOmode)
-   && ix86_binary_operator_ok (<CODE>, SImode, operands)"
-  "<logic>{l}\t{%2, %k0|%k0, %2}"
+   && ix86_binary_operator_ok (<CODE>, SImode, operands, TARGET_APX_NDD)"
+  "@
+  <logic>{l}\t{%2, %k0|%k0, %2}
+  <logic>{l}\t{%2, %1, %k0|%k0, %1, %2}
+  <logic>{l}\t{%2, %1, %k0|%k0, %1, %2}"
   [(set_attr "type" "alu")
+   (set_attr "isa" "*,apx_ndd,apx_ndd")
    (set_attr "mode" "SI")])
 
 (define_insn "*<code>si_2_zext_imm"
   [(set (reg FLAGS_REG)
        (compare (any_or:SI
-                 (match_operand:SI 1 "nonimmediate_operand" "%0")
-                 (match_operand:SI 2 "x86_64_zext_immediate_operand" "Z"))
+                 (match_operand:SI 1 "nonimmediate_operand" "%0,rm")
+                 (match_operand:SI 2 "x86_64_zext_immediate_operand" "Z,Z"))
                 (const_int 0)))
-   (set (match_operand:DI 0 "register_operand" "=r")
+   (set (match_operand:DI 0 "register_operand" "=r,r")
        (any_or:DI (zero_extend:DI (match_dup 1)) (match_dup 2)))]
   "TARGET_64BIT && ix86_match_ccmode (insn, CCNOmode)
-   && ix86_binary_operator_ok (<CODE>, SImode, operands)"
-  "<logic>{l}\t{%2, %k0|%k0, %2}"
+   && ix86_binary_operator_ok (<CODE>, SImode, operands, TARGET_APX_NDD)"
+  "@
+  <logic>{l}\t{%2, %k0|%k0, %2}
+  <logic>{l}\t{%2, %1, %k0|%k0, %1, %2}"
   [(set_attr "type" "alu")
+   (set_attr "isa" "*,apx_ndd")
    (set_attr "mode" "SI")])
 
 (define_insn "*<code><mode>_3"
 ;; Don't do the splitting with memory operands, since it introduces risk
 ;; of memory mismatch stalls.  We may want to do the splitting for optimizing
 ;; for size, but that can (should?) be handled by generic code instead.
+;; Don't do the splitting for APX NDD as NDD does not support *h registers.
 (define_split
   [(set (match_operand:SWI248 0 "QIreg_operand")
        (any_or:SWI248 (match_operand:SWI248 1 "register_operand")
    (clobber (reg:CC FLAGS_REG))]
    "reload_completed
     && (!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))
-    && !(INTVAL (operands[2]) & ~(255 << 8))"
+    && !(INTVAL (operands[2]) & ~(255 << 8))
+    && !(TARGET_APX_NDD && REGNO (operands[0]) != REGNO (operands[1]))"
   [(parallel
      [(set (zero_extract:HI (match_dup 0)
                            (const_int 8)
    "reload_completed
     && (!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))
     && !(INTVAL (operands[2]) & ~255)
-    && (INTVAL (operands[2]) & 128)"
+    && (INTVAL (operands[2]) & 128)
+    && !(TARGET_APX_NDD
+        && !rtx_equal_p (operands[0], operands[1]))"
   [(parallel [(set (strict_low_part (match_dup 0))
                   (any_or:QI (match_dup 1)
                              (match_dup 2)))
   [(set (match_operand:SDWIM 0 "nonimmediate_operand")
        (neg:SDWIM (match_operand:SDWIM 1 "nonimmediate_operand")))]
   ""
-  "ix86_expand_unary_operator (NEG, <MODE>mode, operands); DONE;")
+{
+  ix86_expand_unary_operator (NEG, <MODE>mode, operands, TARGET_APX_NDD);
+  DONE;
+})
 
 (define_insn_and_split "*neg<dwi>2_doubleword"
-  [(set (match_operand:<DWI> 0 "nonimmediate_operand" "=ro")
-       (neg:<DWI> (match_operand:<DWI> 1 "nonimmediate_operand" "0")))
+  [(set (match_operand:<DWI> 0 "nonimmediate_operand" "=ro,&r")
+       (neg:<DWI> (match_operand:<DWI> 1 "nonimmediate_operand" "0,ro")))
    (clobber (reg:CC FLAGS_REG))]
-  "ix86_unary_operator_ok (NEG, <DWI>mode, operands)"
+  "ix86_unary_operator_ok (NEG, <DWI>mode, operands, TARGET_APX_NDD)"
   "#"
   "&& reload_completed"
   [(parallel
     [(set (match_dup 2)
          (neg:DWIH (match_dup 2)))
      (clobber (reg:CC FLAGS_REG))])]
-  "split_double_mode (<DWI>mode, &operands[0], 2, &operands[0], &operands[2]);")
+  "split_double_mode (<DWI>mode, &operands[0], 2, &operands[0], &operands[2]);"
+  [(set_attr "isa" "*,apx_ndd")])
 
 ;; Convert:
 ;;   mov %esi, %edx
      (clobber (reg:CC FLAGS_REG))])])
 
 (define_insn "*neg<mode>_1"
-  [(set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m")
-       (neg:SWI (match_operand:SWI 1 "nonimmediate_operand" "0")))
+  [(set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m,r")
+       (neg:SWI (match_operand:SWI 1 "nonimmediate_operand" "0,rm")))
    (clobber (reg:CC FLAGS_REG))]
-  "ix86_unary_operator_ok (NEG, <MODE>mode, operands)"
-  "neg{<imodesuffix>}\t%0"
+  "ix86_unary_operator_ok (NEG, <MODE>mode, operands, TARGET_APX_NDD)"
+  "@
+  neg{<imodesuffix>}\t%0
+  neg{<imodesuffix>}\t{%1, %0|%0, %1}"
   [(set_attr "type" "negnot")
+   (set_attr "isa" "*,apx_ndd")
    (set_attr "mode" "<MODE>")])
 
 (define_insn "*negsi_1_zext"
-  [(set (match_operand:DI 0 "register_operand" "=r")
+  [(set (match_operand:DI 0 "register_operand" "=r,r")
        (zero_extend:DI
-         (neg:SI (match_operand:SI 1 "register_operand" "0"))))
+         (neg:SI (match_operand:SI 1 "nonimmediate_operand" "0,rm"))))
    (clobber (reg:CC FLAGS_REG))]
-  "TARGET_64BIT && ix86_unary_operator_ok (NEG, SImode, operands)"
-  "neg{l}\t%k0"
+  "TARGET_64BIT
+   && ix86_unary_operator_ok (NEG, SImode, operands, TARGET_APX_NDD)"
+  "@
+  neg{l}\t%k0
+  neg{l}\t{%k1, %k0|%k0, %k1}"
   [(set_attr "type" "negnot")
+   (set_attr "isa" "*,apx_ndd")
    (set_attr "mode" "SI")])
 
 ;; Alternative 1 is needed to work around LRA limitation, see PR82524.
 (define_insn "*neg<mode>_2"
   [(set (reg FLAGS_REG)
        (compare
-         (neg:SWI (match_operand:SWI 1 "nonimmediate_operand" "0"))
+         (neg:SWI (match_operand:SWI 1 "nonimmediate_operand" "0,rm"))
          (const_int 0)))
-   (set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m")
+   (set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m,r")
        (neg:SWI (match_dup 1)))]
   "ix86_match_ccmode (insn, CCGOCmode)
-   && ix86_unary_operator_ok (NEG, <MODE>mode, operands)"
-  "neg{<imodesuffix>}\t%0"
+   && ix86_unary_operator_ok (NEG, <MODE>mode, operands, TARGET_APX_NDD)"
+  "@
+   neg{<imodesuffix>}\t%0
+   neg{<imodesuffix>}\t{%1, %0|%0, %1}"
   [(set_attr "type" "negnot")
+   (set_attr "isa" "*,apx_ndd")
    (set_attr "mode" "<MODE>")])
 
 (define_insn "*negsi_2_zext"
   [(set (reg FLAGS_REG)
        (compare
-         (neg:SI (match_operand:SI 1 "register_operand" "0"))
+         (neg:SI (match_operand:SI 1 "nonimmediate_operand" "0,rm"))
          (const_int 0)))
-   (set (match_operand:DI 0 "register_operand" "=r")
+   (set (match_operand:DI 0 "register_operand" "=r,r")
        (zero_extend:DI
          (neg:SI (match_dup 1))))]
   "TARGET_64BIT && ix86_match_ccmode (insn, CCGOCmode)
-   && ix86_unary_operator_ok (NEG, SImode, operands)"
-  "neg{l}\t%k0"
+   && ix86_unary_operator_ok (NEG, SImode, operands, TARGET_APX_NDD)"
+  "@
+   neg{l}\t%k0
+   neg{l}\t{%1, %k0|%k0, %1}"
   [(set_attr "type" "negnot")
+   (set_attr "isa" "*,apx_ndd")
    (set_attr "mode" "SI")])
 
 (define_insn "*neg<mode>_ccc_1"
   [(set (reg:CCC FLAGS_REG)
        (unspec:CCC
-         [(match_operand:SWI 1 "nonimmediate_operand" "0")
+         [(match_operand:SWI 1 "nonimmediate_operand" "0,rm")
           (const_int 0)] UNSPEC_CC_NE))
-   (set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m")
+   (set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m,r")
        (neg:SWI (match_dup 1)))]
   ""
-  "neg{<imodesuffix>}\t%0"
+  "@
+  neg{<imodesuffix>}\t%0
+  neg{<imodesuffix>}\t{%1, %0|%0, %1}"
   [(set_attr "type" "negnot")
+   (set_attr "isa" "*,apx_ndd")
    (set_attr "mode" "<MODE>")])
 
 (define_insn "*neg<mode>_ccc_2"
   [(set (reg:CCC FLAGS_REG)
        (unspec:CCC
-         [(match_operand:SWI 1 "nonimmediate_operand" "0")
+         [(match_operand:SWI 1 "nonimmediate_operand" "0,rm")
           (const_int 0)] UNSPEC_CC_NE))
-   (clobber (match_scratch:SWI 0 "=<r>"))]
+   (clobber (match_scratch:SWI 0 "=<r>,r"))]
   ""
-  "neg{<imodesuffix>}\t%0"
+  "@
+  neg{<imodesuffix>}\t%0
+  neg{<imodesuffix>}\t{%1, %0|%0, %1}"
   [(set_attr "type" "negnot")
+   (set_attr "isa" "*,apx_ndd")
    (set_attr "mode" "<MODE>")])
 
 (define_expand "x86_neg<mode>_ccc"
   [(set (match_operand:SDWIM 0 "nonimmediate_operand")
        (not:SDWIM (match_operand:SDWIM 1 "nonimmediate_operand")))]
   ""
-  "ix86_expand_unary_operator (NOT, <MODE>mode, operands); DONE;")
+{
+  ix86_expand_unary_operator (NOT, <MODE>mode, operands, TARGET_APX_NDD);
+  DONE;
+})
 
 (define_insn_and_split "*one_cmpl<dwi>2_doubleword"
-  [(set (match_operand:<DWI> 0 "nonimmediate_operand" "=ro")
-       (not:<DWI> (match_operand:<DWI> 1 "nonimmediate_operand" "0")))]
-  "ix86_unary_operator_ok (NOT, <DWI>mode, operands)"
+  [(set (match_operand:<DWI> 0 "nonimmediate_operand" "=ro,&r")
+       (not:<DWI> (match_operand:<DWI> 1 "nonimmediate_operand" "0,ro")))]
+  "ix86_unary_operator_ok (NOT, <DWI>mode, operands, TARGET_APX_NDD)"
   "#"
   "&& reload_completed"
   [(set (match_dup 0)
        (not:DWIH (match_dup 1)))
    (set (match_dup 2)
        (not:DWIH (match_dup 3)))]
-  "split_double_mode (<DWI>mode, &operands[0], 2, &operands[0], &operands[2]);")
+  "split_double_mode (<DWI>mode, &operands[0], 2, &operands[0], &operands[2]);"
+  [(set_attr "isa" "*,apx_ndd")])
 
 (define_insn "*one_cmpl<mode>2_1"
-  [(set (match_operand:SWI248 0 "nonimmediate_operand" "=rm,?k")
-       (not:SWI248 (match_operand:SWI248 1 "nonimmediate_operand" "0,k")))]
-  "ix86_unary_operator_ok (NOT, <MODE>mode, operands)"
+  [(set (match_operand:SWI248 0 "nonimmediate_operand" "=rm,r,?k")
+       (not:SWI248 (match_operand:SWI248 1 "nonimmediate_operand" "0,rm,k")))]
+  "ix86_unary_operator_ok (NOT, <MODE>mode, operands, TARGET_APX_NDD)"
   "@
    not{<imodesuffix>}\t%0
+   not{<imodesuffix>}\t{%1, %0|%0, %1}
    #"
-  [(set_attr "isa" "*,<kmov_isa>")
-   (set_attr "type" "negnot,msklog")
+  [(set_attr "isa" "*,apx_ndd,<kmov_isa>")
+   (set_attr "type" "negnot,negnot,msklog")
    (set_attr "mode" "<MODE>")])
 
 (define_insn "*one_cmplsi2_1_zext"
-  [(set (match_operand:DI 0 "register_operand" "=r,?k")
+  [(set (match_operand:DI 0 "register_operand" "=r,r,?k")
        (zero_extend:DI
-         (not:SI (match_operand:SI 1 "register_operand" "0,k"))))]
-  "TARGET_64BIT && ix86_unary_operator_ok (NOT, SImode, operands)"
+         (not:SI (match_operand:SI 1 "nonimmediate_operand" "0,rm,k"))))]
+  "TARGET_64BIT
+   && ix86_unary_operator_ok (NOT, SImode, operands, TARGET_APX_NDD)"
   "@
    not{l}\t%k0
+   not{l}\t{%1, %k0|%k0, %1}
    #"
-  [(set_attr "isa" "x64,avx512bw_512")
-   (set_attr "type" "negnot,msklog")
-   (set_attr "mode" "SI,SI")])
+  [(set_attr "isa" "x64,apx_ndd,avx512bw")
+   (set_attr "type" "negnot,negnot,msklog")
+   (set_attr "mode" "SI,SI,SI")])
 
 (define_insn "*one_cmplqi2_1"
-  [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,r,?k")
-       (not:QI (match_operand:QI 1 "nonimmediate_operand" "0,0,k")))]
-  "ix86_unary_operator_ok (NOT, QImode, operands)"
+  [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,r,r,?k")
+       (not:QI (match_operand:QI 1 "nonimmediate_operand" "0,0,rm,k")))]
+  "ix86_unary_operator_ok (NOT, QImode, operands, TARGET_APX_NDD)"
   "@
    not{b}\t%0
    not{l}\t%k0
+   not{b}\t{%1, %0|%0, %1}
    #"
-  [(set_attr "isa" "*,*,avx512f")
-   (set_attr "type" "negnot,negnot,msklog")
+  [(set_attr "isa" "*,*,apx_ndd,avx512f")
+   (set_attr "type" "negnot,negnot,negnot,msklog")
    (set (attr "mode")
        (cond [(eq_attr "alternative" "1")
                 (const_string "SI")
-               (and (eq_attr "alternative" "2")
+               (and (eq_attr "alternative" "3")
                     (match_test "!TARGET_AVX512DQ"))
                 (const_string "HI")
               ]
 
 (define_insn "*one_cmpl<mode>2_2"
   [(set (reg FLAGS_REG)
-       (compare (not:SWI (match_operand:SWI 1 "nonimmediate_operand" "0"))
+       (compare (not:SWI (match_operand:SWI 1 "nonimmediate_operand" "0,rm"))
                 (const_int 0)))
-   (set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m")
+   (set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m,r")
        (not:SWI (match_dup 1)))]
   "ix86_match_ccmode (insn, CCNOmode)
-   && ix86_unary_operator_ok (NOT, <MODE>mode, operands)"
+   && ix86_unary_operator_ok (NOT, <MODE>mode, operands, TARGET_APX_NDD)"
   "#"
   [(set_attr "type" "alu1")
+   (set_attr "isa" "*,apx_ndd")
    (set_attr "mode" "<MODE>")])
 
 (define_split
 
 (define_insn "*one_cmplsi2_2_zext"
   [(set (reg FLAGS_REG)
-       (compare (not:SI (match_operand:SI 1 "register_operand" "0"))
+       (compare (not:SI (match_operand:SI 1 "nonimmediate_operand" "0,rm"))
                 (const_int 0)))
-   (set (match_operand:DI 0 "register_operand" "=r")
+   (set (match_operand:DI 0 "register_operand" "=r,r")
        (zero_extend:DI (not:SI (match_dup 1))))]
   "TARGET_64BIT && ix86_match_ccmode (insn, CCNOmode)
-   && ix86_unary_operator_ok (NOT, SImode, operands)"
+   && ix86_unary_operator_ok (NOT, SImode, operands, TARGET_APX_NDD)"
   "#"
   [(set_attr "type" "alu1")
+   (set_attr "isa" "*,apx_ndd")
    (set_attr "mode" "SI")])
 
 (define_split
   [(set (match_operand 0 "flags_reg_operand")
        (match_operator 2 "compare_operator"
-         [(not:SI (match_operand:SI 3 "register_operand"))
+         [(not:SI (match_operand:SI 3 "nonimmediate_operand"))
           (const_int 0)]))
    (set (match_operand:DI 1 "register_operand")
        (zero_extend:DI (not:SI (match_dup 3))))]
        (ashift:SDWIM (match_operand:SDWIM 1 "<ashl_input_operand>")
                      (match_operand:QI 2 "nonmemory_operand")))]
   ""
-  "ix86_expand_binary_operator (ASHIFT, <MODE>mode, operands); DONE;")
+{
+  ix86_expand_binary_operator (ASHIFT, <MODE>mode, operands, TARGET_APX_NDD);
+  DONE;
+})
 
 (define_insn_and_split "*ashl<dwi>3_doubleword_mask"
   [(set (match_operand:<DWI> 0 "register_operand")
 })
 
 (define_insn "ashl<mode>3_doubleword"
-  [(set (match_operand:DWI 0 "register_operand" "=&r")
-       (ashift:DWI (match_operand:DWI 1 "reg_or_pm1_operand" "0n")
-                   (match_operand:QI 2 "nonmemory_operand" "<S>c")))
+  [(set (match_operand:DWI 0 "register_operand" "=&r,&r")
+       (ashift:DWI (match_operand:DWI 1 "reg_or_pm1_operand" "0n,r")
+                   (match_operand:QI 2 "nonmemory_operand" "<S>c,<S>c")))
    (clobber (reg:CC FLAGS_REG))]
   ""
   "#"
-  [(set_attr "type" "multi")])
+  [(set_attr "type" "multi")
+   (set_attr "isa" "*,apx_ndd")])
 
 (define_split
   [(set (match_operand:DWI 0 "register_operand")
    (clobber (reg:CC FLAGS_REG))]
   "epilogue_completed"
   [(const_int 0)]
-  "ix86_split_ashl (operands, NULL_RTX, <MODE>mode); DONE;")
+{
+  if (TARGET_APX_NDD
+      && !rtx_equal_p (operands[0], operands[1])
+      && REG_P (operands[1]))
+    ix86_split_ashl_ndd (operands, NULL_RTX);
+  else
+    ix86_split_ashl (operands, NULL_RTX, <MODE>mode);
+  DONE;
+})
 
 ;; By default we don't ask for a scratch register, because when DWImode
 ;; values are manipulated, registers are already at a premium.  But if
    (match_dup 3)]
   "TARGET_CMOVE"
   [(const_int 0)]
-  "ix86_split_ashl (operands, operands[3], <DWI>mode); DONE;")
+{
+  if (TARGET_APX_NDD
+      && !rtx_equal_p (operands[0], operands[1])
+      && (REG_P (operands[1])))
+    ix86_split_ashl_ndd (operands, operands[3]);
+  else
+    ix86_split_ashl (operands, operands[3], <DWI>mode);
+  DONE;
+})
 
 (define_insn_and_split "*ashl<dwi>3_doubleword_highpart"
   [(set (match_operand:<DWI> 0 "register_operand" "=r")
 {
   split_double_mode (<DWI>mode, &operands[0], 1, &operands[0], &operands[3]);
   int bits = INTVAL (operands[2]) - (<MODE_SIZE> * BITS_PER_UNIT);
-  if (!rtx_equal_p (operands[3], operands[1]))
-    emit_move_insn (operands[3], operands[1]);
-  if (bits > 0)
-    emit_insn (gen_ashl<mode>3 (operands[3], operands[3], GEN_INT (bits)));
+  bool op_equal_p = rtx_equal_p (operands[3], operands[1]);
+  if (bits == 0)
+    {
+      if (!op_equal_p)
+       emit_move_insn (operands[3], operands[1]);
+    }
+  else
+    {
+      if (!op_equal_p && !TARGET_APX_NDD)
+       emit_move_insn (operands[3], operands[1]);
+      rtx op_tmp = TARGET_APX_NDD ? operands[1] : operands[3];
+      emit_insn (gen_ashl<mode>3 (operands[3], op_tmp, GEN_INT (bits)));
+    }
   ix86_expand_clear (operands[0]);
   DONE;
 })
                              (and:QI (match_dup 2) (const_int 63)))) 0)))
    (clobber (reg:CC FLAGS_REG))]
   "TARGET_64BIT"
-  "shld{q}\t{%s2%1, %0|%0, %1, %2}"
+  "shld{q}\t{%2, %1, %0|%0, %1, %2}"
   [(set_attr "type" "ishift")
    (set_attr "prefix_0f" "1")
    (set_attr "mode" "DI")
    (set_attr "amdfam10_decode" "vector")
    (set_attr "bdver1_decode" "vector")])
 
+(define_insn "x86_64_shld_ndd"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+        (ior:DI (ashift:DI (match_operand:DI 1 "nonimmediate_operand" "rm")
+                 (and:QI (match_operand:QI 3 "nonmemory_operand" "Jc")
+                         (const_int 63)))
+               (subreg:DI
+                 (lshiftrt:TI
+                   (zero_extend:TI
+                     (match_operand:DI 2 "register_operand" "r"))
+                   (minus:QI (const_int 64)
+                             (and:QI (match_dup 3) (const_int 63)))) 0)))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_APX_NDD"
+  "shld{q}\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+  [(set_attr "type" "ishift")
+   (set_attr "mode" "DI")])
+
 (define_insn "x86_64_shld_1"
   [(set (match_operand:DI 0 "nonimmediate_operand" "+r*m")
         (ior:DI (ashift:DI (match_dup 0)
    (set_attr "amdfam10_decode" "vector")
    (set_attr "bdver1_decode" "vector")])
 
+(define_insn "x86_64_shld_ndd_1"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+        (ior:DI (ashift:DI (match_operand:DI 1 "nonimmediate_operand" "rm")
+                          (match_operand:QI 3 "const_0_to_63_operand"))
+               (subreg:DI
+                 (lshiftrt:TI
+                   (zero_extend:TI
+                     (match_operand:DI 2 "register_operand" "r"))
+                   (match_operand:QI 4 "const_0_to_255_operand")) 0)))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_APX_NDD
+   && INTVAL (operands[4]) == 64 - INTVAL (operands[3])"
+  "shld{q}\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+  [(set_attr "type" "ishift")
+   (set_attr "mode" "DI")
+   (set_attr "length_immediate" "1")])
+
+
 (define_insn_and_split "*x86_64_shld_shrd_1_nozext"
   [(set (match_operand:DI 0 "nonimmediate_operand")
        (ior:DI (ashift:DI (match_operand:DI 4 "nonimmediate_operand")
       operands[4] = force_reg (DImode, operands[4]);
       emit_insn (gen_x86_64_shrd_1 (operands[0], operands[4], operands[3], operands[2]));
     }
+  else if (TARGET_APX_NDD)
+    {
+     rtx tmp = gen_reg_rtx (DImode);
+     if (MEM_P (operands[4]))
+       {
+        operands[1] = force_reg (DImode, operands[1]);
+        emit_insn (gen_x86_64_shld_ndd_1 (tmp, operands[4], operands[1],
+                                          operands[2], operands[3]));
+       }
+     else if (MEM_P (operands[1]))
+       emit_insn (gen_x86_64_shrd_ndd_1 (tmp, operands[1], operands[4],
+                                        operands[3], operands[2]));
+     else
+       emit_insn (gen_x86_64_shld_ndd_1 (tmp, operands[4], operands[1],
+                                        operands[2], operands[3]));
+     emit_move_insn (operands[0], tmp);
+    }
   else
    {
      operands[1] = force_reg (DImode, operands[1]);
                                      (and:QI (match_dup 2) (const_int 63)))
                           (subreg:DI
                             (lshiftrt:TI
-                              (zero_extend:TI (match_dup 1))
+                              (zero_extend:TI (match_dup 1))
+                                (minus:QI (const_int 64)
+                                          (and:QI (match_dup 2)
+                                                  (const_int 63)))) 0)))
+             (clobber (reg:CC FLAGS_REG))])])
+
+(define_insn_and_split "*x86_64_shld_ndd_2"
+  [(set (match_operand:DI 0 "nonimmediate_operand")
+       (ior:DI (ashift:DI (match_operand:DI 1 "nonimmediate_operand")
+                          (match_operand:QI 3 "nonmemory_operand"))
+               (lshiftrt:DI (match_operand:DI 2 "register_operand")
+                            (minus:QI (const_int 64) (match_dup 3)))))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_APX_NDD
+   && ix86_pre_reload_split ()"
+  "#"
+  "&& 1"
+  [(parallel [(set (match_dup 4)
+                  (ior:DI (ashift:DI (match_dup 1)
+                                     (and:QI (match_dup 3) (const_int 63)))
+                          (subreg:DI
+                            (lshiftrt:TI
+                              (zero_extend:TI (match_dup 2))
                                 (minus:QI (const_int 64)
-                                          (and:QI (match_dup 2)
+                                          (and:QI (match_dup 3)
                                                   (const_int 63)))) 0)))
-             (clobber (reg:CC FLAGS_REG))])])
+             (clobber (reg:CC FLAGS_REG))
+             (set (match_dup 0) (match_dup 4))])]
+{
+  operands[4] = gen_reg_rtx (DImode);
+  emit_move_insn (operands[4], operands[0]);
+})
 
 (define_insn "x86_shld"
   [(set (match_operand:SI 0 "nonimmediate_operand" "+r*m")
                              (and:QI (match_dup 2) (const_int 31)))) 0)))
    (clobber (reg:CC FLAGS_REG))]
   ""
-  "shld{l}\t{%s2%1, %0|%0, %1, %2}"
+  "shld{l}\t{%2, %1, %0|%0, %1, %2}"
   [(set_attr "type" "ishift")
    (set_attr "prefix_0f" "1")
    (set_attr "mode" "SI")
    (set_attr "amdfam10_decode" "vector")
    (set_attr "bdver1_decode" "vector")])
 
+(define_insn "x86_shld_ndd"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=r")
+        (ior:SI (ashift:SI (match_operand:SI 1 "nonimmediate_operand" "rm")
+                 (and:QI (match_operand:QI 3 "nonmemory_operand" "Ic")
+                         (const_int 31)))
+               (subreg:SI
+                 (lshiftrt:DI
+                   (zero_extend:DI
+                     (match_operand:SI 2 "register_operand" "r"))
+                   (minus:QI (const_int 32)
+                             (and:QI (match_dup 3) (const_int 31)))) 0)))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_APX_NDD"
+  "shld{l}\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+  [(set_attr "type" "ishift")
+   (set_attr "mode" "SI")])
+
+
 (define_insn "x86_shld_1"
   [(set (match_operand:SI 0 "nonimmediate_operand" "+r*m")
         (ior:SI (ashift:SI (match_dup 0)
    (set_attr "amdfam10_decode" "vector")
    (set_attr "bdver1_decode" "vector")])
 
+(define_insn "x86_shld_ndd_1"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+        (ior:SI (ashift:SI (match_operand:SI 1 "nonimmediate_operand" "rm")
+                          (match_operand:QI 3 "const_0_to_31_operand"))
+               (subreg:SI
+                 (lshiftrt:DI
+                   (zero_extend:DI
+                     (match_operand:SI 2 "register_operand" "r"))
+                   (match_operand:QI 4 "const_0_to_63_operand")) 0)))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_APX_NDD 
+   && INTVAL (operands[4]) == 32 - INTVAL (operands[3])"
+  "shld{l}\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+  [(set_attr "type" "ishift")
+   (set_attr "length_immediate" "1")
+   (set_attr "mode" "SI")])
+
+
 (define_insn_and_split "*x86_shld_shrd_1_nozext"
   [(set (match_operand:SI 0 "nonimmediate_operand")
        (ior:SI (ashift:SI (match_operand:SI 4 "nonimmediate_operand")
       operands[4] = force_reg (SImode, operands[4]);
       emit_insn (gen_x86_shrd_1 (operands[0], operands[4], operands[3], operands[2]));
     }
-  else
+  else if (TARGET_APX_NDD)
+    {
+     rtx tmp = gen_reg_rtx (SImode);
+     if (MEM_P (operands[4]))
+       {
+        operands[1] = force_reg (SImode, operands[1]);
+        emit_insn (gen_x86_shld_ndd_1 (tmp, operands[4], operands[1],
+                                       operands[2], operands[3]));
+       }
+     else if (MEM_P (operands[1]))
+       emit_insn (gen_x86_shrd_ndd_1 (tmp, operands[1], operands[4],
+                                     operands[3], operands[2]));
+     else
+       emit_insn (gen_x86_shld_ndd_1 (tmp, operands[4], operands[1],
+                                     operands[2], operands[3]));
+     emit_move_insn (operands[0], tmp);
+    }
+ else
    {
      operands[1] = force_reg (SImode, operands[1]);
      rtx tmp = gen_reg_rtx (SImode);
                                                   (const_int 31)))) 0)))
              (clobber (reg:CC FLAGS_REG))])])
 
+(define_insn_and_split "*x86_shld_ndd_2"
+  [(set (match_operand:SI 0 "nonimmediate_operand")
+       (ior:SI (ashift:SI (match_operand:SI 1 "nonimmediate_operand")
+                          (match_operand:QI 3 "nonmemory_operand"))
+               (lshiftrt:SI (match_operand:SI 2 "register_operand")
+                            (minus:QI (const_int 32) (match_dup 3)))))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_APX_NDD
+   && ix86_pre_reload_split ()"
+  "#"
+  "&& 1"
+  [(parallel [(set (match_dup 4)
+                  (ior:SI (ashift:SI (match_dup 1)
+                                     (and:QI (match_dup 3) (const_int 31)))
+                          (subreg:SI
+                            (lshiftrt:DI
+                              (zero_extend:DI (match_dup 2))
+                                (minus:QI (const_int 32)
+                                          (and:QI (match_dup 3)
+                                                  (const_int 31)))) 0)))
+             (clobber (reg:CC FLAGS_REG))
+             (set (match_dup 0) (match_dup 4))])]
+{
+  operands[4] = gen_reg_rtx (SImode);
+  emit_move_insn (operands[4], operands[0]);
+})
+
 (define_expand "@x86_shift<mode>_adj_1"
   [(set (reg:CCZ FLAGS_REG)
        (compare:CCZ (and:QI (match_operand:QI 2 "register_operand")
    (set_attr "mode" "<MODE>")])
 
 (define_insn "*ashl<mode>3_1"
-  [(set (match_operand:SWI48 0 "nonimmediate_operand" "=rm,r,r,?k")
-       (ashift:SWI48 (match_operand:SWI48 1 "nonimmediate_operand" "0,l,rm,k")
-                     (match_operand:QI 2 "nonmemory_operand" "c<S>,M,r,<KS>")))
+  [(set (match_operand:SWI48 0 "nonimmediate_operand" "=rm,r,r,?k,r")
+       (ashift:SWI48 (match_operand:SWI48 1 "nonimmediate_operand" "0,l,rm,k,rm")
+                     (match_operand:QI 2 "nonmemory_operand" "c<S>,M,r,<KS>,c<S>")))
    (clobber (reg:CC FLAGS_REG))]
-  "ix86_binary_operator_ok (ASHIFT, <MODE>mode, operands)"
+  "ix86_binary_operator_ok (ASHIFT, <MODE>mode, operands, TARGET_APX_NDD)"
 {
+  bool use_ndd = get_attr_isa (insn) == ISA_APX_NDD;
   switch (get_attr_type (insn))
     {
     case TYPE_LEA:
 
     default:
       if (operands[2] == const1_rtx
-         && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
+         && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))
+         /* For NDD form instructions related to TARGET_SHIFT1, the $1
+            immediate do not need to be omitted as assembler will map it
+            to use shorter encoding. */
+         && !use_ndd)
        return "sal{<imodesuffix>}\t%0";
       else
-       return "sal{<imodesuffix>}\t{%2, %0|%0, %2}";
+       return use_ndd ? "sal{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
+                      : "sal{<imodesuffix>}\t{%2, %0|%0, %2}";
     }
 }
-  [(set_attr "isa" "*,*,bmi2,<kmov_isa>")
+  [(set_attr "isa" "*,*,bmi2,avx512bw,apx_ndd")
    (set (attr "type")
      (cond [(eq_attr "alternative" "1")
              (const_string "lea")
            (eq_attr "alternative" "2")
              (const_string "ishiftx")
+           (eq_attr "alternative" "4")
+             (const_string "ishift")
             (and (and (match_test "TARGET_DOUBLE_WITH_ADD")
                      (match_operand 0 "register_operand"))
                 (match_operand 2 "const1_operand"))
    (set_attr "mode" "SI")])
 
 (define_insn "*ashlsi3_1_zext"
-  [(set (match_operand:DI 0 "register_operand" "=r,r,r")
+  [(set (match_operand:DI 0 "register_operand" "=r,r,r,r")
        (zero_extend:DI
-         (ashift:SI (match_operand:SI 1 "nonimmediate_operand" "0,l,rm")
-                    (match_operand:QI 2 "nonmemory_operand" "cI,M,r"))))
+         (ashift:SI (match_operand:SI 1 "nonimmediate_operand" "0,l,rm,rm")
+                    (match_operand:QI 2 "nonmemory_operand" "cI,M,r,cI"))))
    (clobber (reg:CC FLAGS_REG))]
-  "TARGET_64BIT && ix86_binary_operator_ok (ASHIFT, SImode, operands)"
+  "TARGET_64BIT
+   && ix86_binary_operator_ok (ASHIFT, SImode, operands, TARGET_APX_NDD)"
 {
+  bool use_ndd = get_attr_isa (insn) == ISA_APX_NDD;
   switch (get_attr_type (insn))
     {
     case TYPE_LEA:
 
     default:
       if (operands[2] == const1_rtx
-         && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
+         && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))
+         && !use_ndd)
        return "sal{l}\t%k0";
       else
-       return "sal{l}\t{%2, %k0|%k0, %2}";
+       return use_ndd ? "sal{l}\t{%2, %1, %k0|%k0, %1, %2}"
+                      : "sal{l}\t{%2, %k0|%k0, %2}";
     }
 }
-  [(set_attr "isa" "*,*,bmi2")
+  [(set_attr "isa" "*,*,bmi2,apx_ndd")
    (set (attr "type")
      (cond [(eq_attr "alternative" "1")
              (const_string "lea")
            (eq_attr "alternative" "2")
              (const_string "ishiftx")
+           (eq_attr "alternative" "3")
+             (const_string "ishift")
             (and (match_test "TARGET_DOUBLE_WITH_ADD")
                 (match_operand 2 "const1_operand"))
              (const_string "alu")
   "operands[2] = gen_lowpart (SImode, operands[2]);")
 
 (define_insn "*ashlhi3_1"
-  [(set (match_operand:HI 0 "nonimmediate_operand" "=rm,Yp,?k")
-       (ashift:HI (match_operand:HI 1 "nonimmediate_operand" "0,l,k")
-                  (match_operand:QI 2 "nonmemory_operand" "cI,M,Ww")))
+  [(set (match_operand:HI 0 "nonimmediate_operand" "=rm,Yp,?k,r")
+       (ashift:HI (match_operand:HI 1 "nonimmediate_operand" "0,l,k,rm")
+                  (match_operand:QI 2 "nonmemory_operand" "cI,M,Ww,cI")))
    (clobber (reg:CC FLAGS_REG))]
-  "ix86_binary_operator_ok (ASHIFT, HImode, operands)"
+  "ix86_binary_operator_ok (ASHIFT, HImode, operands, TARGET_APX_NDD)"
 {
+  bool use_ndd = get_attr_isa (insn) == ISA_APX_NDD;
   switch (get_attr_type (insn))
     {
     case TYPE_LEA:
 
     default:
       if (operands[2] == const1_rtx
-         && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
+         && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))
+         && !use_ndd)
        return "sal{w}\t%0";
       else
-       return "sal{w}\t{%2, %0|%0, %2}";
+       return use_ndd ? "sal{w}\t{%2, %1, %0|%0, %1, %2}"
+                      : "sal{w}\t{%2, %0|%0, %2}";
     }
 }
-  [(set_attr "isa" "*,*,avx512f")
+  [(set_attr "isa" "*,*,avx512f,apx_ndd")
    (set (attr "type")
      (cond [(eq_attr "alternative" "1")
              (const_string "lea")
            (eq_attr "alternative" "2")
              (const_string "msklog")
+           (eq_attr "alternative" "3")
+             (const_string "ishift")
             (and (and (match_test "TARGET_DOUBLE_WITH_ADD")
                      (match_operand 0 "register_operand"))
                 (match_operand 2 "const1_operand"))
                           (match_test "optimize_function_for_size_p (cfun)")))))
        (const_string "0")
        (const_string "*")))
-   (set_attr "mode" "HI,SI,HI")])
+   (set_attr "mode" "HI,SI,HI,HI")])
 
 (define_insn "*ashlqi3_1"
-  [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,r,Yp,?k")
-       (ashift:QI (match_operand:QI 1 "nonimmediate_operand" "0,0,l,k")
-                  (match_operand:QI 2 "nonmemory_operand" "cI,cI,M,Wb")))
+  [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,r,Yp,?k,r")
+       (ashift:QI (match_operand:QI 1 "nonimmediate_operand" "0,0,l,k,rm")
+                  (match_operand:QI 2 "nonmemory_operand" "cI,cI,M,Wb,cI")))
    (clobber (reg:CC FLAGS_REG))]
-  "ix86_binary_operator_ok (ASHIFT, QImode, operands)"
+  "ix86_binary_operator_ok (ASHIFT, QImode, operands, TARGET_APX_NDD)"
 {
+  bool use_ndd = get_attr_isa (insn) == ISA_APX_NDD;
   switch (get_attr_type (insn))
     {
     case TYPE_LEA:
 
     default:
       if (operands[2] == const1_rtx
-         && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
+         && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))
+         && !use_ndd)
        {
          if (get_attr_mode (insn) == MODE_SI)
            return "sal{l}\t%k0";
          if (get_attr_mode (insn) == MODE_SI)
            return "sal{l}\t{%2, %k0|%k0, %2}";
          else
-           return "sal{b}\t{%2, %0|%0, %2}";
+           return use_ndd ? "sal{b}\t{%2, %1, %0|%0, %1, %2}"
+                          : "sal{b}\t{%2, %0|%0, %2}";
        }
     }
 }
-  [(set_attr "isa" "*,*,*,avx512dq")
+  [(set_attr "isa" "*,*,*,avx512dq,apx_ndd")
    (set (attr "type")
      (cond [(eq_attr "alternative" "2")
              (const_string "lea")
            (eq_attr "alternative" "3")
              (const_string "msklog")
+           (eq_attr "alternative" "4")
+             (const_string "ishift")
             (and (and (match_test "TARGET_DOUBLE_WITH_ADD")
                      (match_operand 0 "register_operand"))
                 (match_operand 2 "const1_operand"))
                           (match_test "optimize_function_for_size_p (cfun)")))))
        (const_string "0")
        (const_string "*")))
-   (set_attr "mode" "QI,SI,SI,QI")
+   (set_attr "mode" "QI,SI,SI,QI,QI")
    ;; Potential partial reg stall on alternative 1.
    (set (attr "preferred_for_speed")
-     (cond [(eq_attr "alternative" "1")
+     (cond [(eq_attr "alternative" "1,4")
              (symbol_ref "!TARGET_PARTIAL_REG_STALL")]
           (symbol_ref "true")))])
 
 (define_insn "*ashl<mode>3_cmp"
   [(set (reg FLAGS_REG)
        (compare
-         (ashift:SWI (match_operand:SWI 1 "nonimmediate_operand" "0")
-                     (match_operand:QI 2 "<shift_immediate_operand>" "<S>"))
+         (ashift:SWI (match_operand:SWI 1 "nonimmediate_operand" "0,rm")
+                     (match_operand:QI 2 "<shift_immediate_operand>" "<S>,<S>"))
          (const_int 0)))
-   (set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m")
+   (set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m,r")
        (ashift:SWI (match_dup 1) (match_dup 2)))]
   "(optimize_function_for_size_p (cfun)
     || !TARGET_PARTIAL_FLAG_REG_STALL
        && (TARGET_SHIFT1
            || (TARGET_DOUBLE_WITH_ADD && REG_P (operands[0])))))
    && ix86_match_ccmode (insn, CCGOCmode)
-   && ix86_binary_operator_ok (ASHIFT, <MODE>mode, operands)"
+   && ix86_binary_operator_ok (ASHIFT, <MODE>mode, operands, TARGET_APX_NDD)"
 {
+  bool use_ndd = get_attr_isa (insn) == ISA_APX_NDD;
   switch (get_attr_type (insn))
     {
     case TYPE_ALU:
 
     default:
       if (operands[2] == const1_rtx
-         && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
+         && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))
+         && !use_ndd)
        return "sal{<imodesuffix>}\t%0";
       else
-       return "sal{<imodesuffix>}\t{%2, %0|%0, %2}";
+       return use_ndd ? "sal{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
+                      : "sal{<imodesuffix>}\t{%2, %0|%0, %2}";
     }
 }
-  [(set (attr "type")
-     (cond [(and (and (match_test "TARGET_DOUBLE_WITH_ADD")
+  [(set_attr "isa" "*,apx_ndd")
+   (set (attr "type")
+     (cond [(eq_attr "alternative" "1")
+             (const_string "ishift")
+           (and (and (match_test "TARGET_DOUBLE_WITH_ADD")
                      (match_operand 0 "register_operand"))
                 (match_operand 2 "const1_operand"))
              (const_string "alu")
 (define_insn "*ashlsi3_cmp_zext"
   [(set (reg FLAGS_REG)
        (compare
-         (ashift:SI (match_operand:SI 1 "register_operand" "0")
+         (ashift:SI (match_operand:SI 1 "nonimmediate_operand" "0,rm")
                     (match_operand:QI 2 "const_1_to_31_operand"))
          (const_int 0)))
-   (set (match_operand:DI 0 "register_operand" "=r")
+   (set (match_operand:DI 0 "register_operand" "=r,r")
        (zero_extend:DI (ashift:SI (match_dup 1) (match_dup 2))))]
   "TARGET_64BIT
    && (optimize_function_for_size_p (cfun)
           && (TARGET_SHIFT1
               || TARGET_DOUBLE_WITH_ADD)))
    && ix86_match_ccmode (insn, CCGOCmode)
-   && ix86_binary_operator_ok (ASHIFT, SImode, operands)"
+   && ix86_binary_operator_ok (ASHIFT, SImode, operands, TARGET_APX_NDD)"
 {
+  bool use_ndd = get_attr_isa (insn) == ISA_APX_NDD;
   switch (get_attr_type (insn))
     {
     case TYPE_ALU:
 
     default:
       if (operands[2] == const1_rtx
-         && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
+         && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))
+         && !use_ndd)
        return "sal{l}\t%k0";
       else
-       return "sal{l}\t{%2, %k0|%k0, %2}";
+       return use_ndd ? "sal{l}\t{%2, %1, %k0|%k0, %1, %2}"
+                      : "sal{l}\t{%2, %k0|%k0, %2}";
     }
 }
-  [(set (attr "type")
-     (cond [(and (match_test "TARGET_DOUBLE_WITH_ADD")
+  [(set_attr "isa" "*,apx_ndd")
+   (set (attr "type")
+     (cond [(eq_attr "alternative" "1")
+             (const_string "ishift")
+           (and (match_test "TARGET_DOUBLE_WITH_ADD")
                 (match_operand 2 "const1_operand"))
              (const_string "alu")
           ]
 (define_insn "*ashl<mode>3_cconly"
   [(set (reg FLAGS_REG)
        (compare
-         (ashift:SWI (match_operand:SWI 1 "register_operand" "0")
-                     (match_operand:QI 2 "<shift_immediate_operand>" "<S>"))
+         (ashift:SWI (match_operand:SWI 1 "nonimmediate_operand" "0,rm")
+                     (match_operand:QI 2 "<shift_immediate_operand>" "<S>,<S>"))
          (const_int 0)))
-   (clobber (match_scratch:SWI 0 "=<r>"))]
+   (clobber (match_scratch:SWI 0 "=<r>,r"))]
   "(optimize_function_for_size_p (cfun)
     || !TARGET_PARTIAL_FLAG_REG_STALL
     || (operands[2] == const1_rtx
            || TARGET_DOUBLE_WITH_ADD)))
    && ix86_match_ccmode (insn, CCGOCmode)"
 {
+  bool use_ndd = get_attr_isa (insn) == ISA_APX_NDD;
   switch (get_attr_type (insn))
     {
     case TYPE_ALU:
       gcc_assert (operands[2] == const1_rtx);
       return "add{<imodesuffix>}\t%0, %0";
 
-    default:
+  default:
       if (operands[2] == const1_rtx
-         && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
+         && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))
+         && !use_ndd)
        return "sal{<imodesuffix>}\t%0";
       else
-       return "sal{<imodesuffix>}\t{%2, %0|%0, %2}";
+       return use_ndd ? "sal{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
+                      : "sal{<imodesuffix>}\t{%2, %0|%0, %2}";
     }
 }
-  [(set (attr "type")
-     (cond [(and (and (match_test "TARGET_DOUBLE_WITH_ADD")
+  [(set_attr "isa" "*,apx_ndd")
+   (set (attr "type")
+     (cond [(eq_attr "alternative" "1")
+             (const_string "ishift")
+           (and (and (match_test "TARGET_DOUBLE_WITH_ADD")
                      (match_operand 0 "register_operand"))
                 (match_operand 2 "const1_operand"))
              (const_string "alu")
    && !(rtx_equal_p (operands[0], operands[1]))"
   [(set (zero_extract:SWI248
          (match_dup 0) (const_int 8) (const_int 8))
-       (match_dup 1))
+       (zero_extract:SWI248
+         (match_dup 1) (const_int 8) (const_int 8)))
    (parallel
      [(set (zero_extract:SWI248
             (match_dup 0) (const_int 8) (const_int 8))
        (any_shiftrt:SDWIM (match_operand:SDWIM 1 "<shift_operand>")
                           (match_operand:QI 2 "nonmemory_operand")))]
   ""
-  "ix86_expand_binary_operator (<CODE>, <MODE>mode, operands); DONE;")
+{
+  ix86_expand_binary_operator (<CODE>, <MODE>mode, operands, TARGET_APX_NDD);
+  DONE;
+})
 
 ;; Avoid useless masking of count operand.
 (define_insn_and_split "*<insn><mode>3_mask"
 })
 
 (define_insn_and_split "<insn><mode>3_doubleword"
-  [(set (match_operand:DWI 0 "register_operand" "=&r")
-       (any_shiftrt:DWI (match_operand:DWI 1 "register_operand" "0")
-                        (match_operand:QI 2 "nonmemory_operand" "<S>c")))
+  [(set (match_operand:DWI 0 "register_operand" "=&r,&r")
+       (any_shiftrt:DWI (match_operand:DWI 1 "register_operand" "0,r")
+                        (match_operand:QI 2 "nonmemory_operand" "<S>c,<S>c")))
    (clobber (reg:CC FLAGS_REG))]
   ""
   "#"
   "epilogue_completed"
   [(const_int 0)]
-  "ix86_split_<insn> (operands, NULL_RTX, <MODE>mode); DONE;"
-  [(set_attr "type" "multi")])
+{
+  if (TARGET_APX_NDD
+      && !rtx_equal_p (operands[0], operands[1]))
+    ix86_split_rshift_ndd (<CODE>, operands, NULL_RTX);
+  else
+    ix86_split_<insn> (operands, NULL_RTX, <MODE>mode);
+  DONE;
+}
+  [(set_attr "type" "multi")
+   (set_attr "isa" "*,apx_ndd")])
 
 ;; By default we don't ask for a scratch register, because when DWImode
 ;; values are manipulated, registers are already at a premium.  But if
    (match_dup 3)]
   "TARGET_CMOVE"
   [(const_int 0)]
-  "ix86_split_<insn> (operands, operands[3], <DWI>mode); DONE;")
+{
+  if (TARGET_APX_NDD
+      && !rtx_equal_p (operands[0], operands[1]))
+    ix86_split_rshift_ndd (<CODE>, operands, operands[3]);
+  else
+    ix86_split_<insn> (operands, operands[3], <DWI>mode);
+  DONE;
+})
 
 ;; Split truncations of double word right shifts into x86_shrd_1.
 (define_insn_and_split "<insn><dwi>3_doubleword_lowpart"
                              (and:QI (match_dup 2) (const_int 63)))) 0)))
    (clobber (reg:CC FLAGS_REG))]
   "TARGET_64BIT"
-  "shrd{q}\t{%s2%1, %0|%0, %1, %2}"
+  "shrd{q}\t{%2, %1, %0|%0, %1, %2}"
   [(set_attr "type" "ishift")
    (set_attr "prefix_0f" "1")
    (set_attr "mode" "DI")
    (set_attr "amdfam10_decode" "vector")
    (set_attr "bdver1_decode" "vector")])
 
+(define_insn "x86_64_shrd_ndd"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+        (ior:DI (lshiftrt:DI (match_operand:DI 1 "nonimmediate_operand" "rm")
+                 (and:QI (match_operand:QI 3 "nonmemory_operand" "Jc")
+                         (const_int 63)))
+               (subreg:DI
+                 (ashift:TI
+                   (zero_extend:TI
+                     (match_operand:DI 2 "register_operand" "r"))
+                   (minus:QI (const_int 64)
+                             (and:QI (match_dup 3) (const_int 63)))) 0)))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_APX_NDD"
+  "shrd{q}\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+  [(set_attr "type" "ishift")
+   (set_attr "mode" "DI")])
+
+
 (define_insn "x86_64_shrd_1"
   [(set (match_operand:DI 0 "nonimmediate_operand" "+r*m")
         (ior:DI (lshiftrt:DI (match_dup 0)
    (set_attr "amdfam10_decode" "vector")
    (set_attr "bdver1_decode" "vector")])
 
+(define_insn "x86_64_shrd_ndd_1"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+        (ior:DI (lshiftrt:DI (match_operand:DI 1 "nonimmediate_operand" "rm")
+                            (match_operand:QI 3 "const_0_to_63_operand"))
+               (subreg:DI
+                 (ashift:TI
+                   (zero_extend:TI
+                     (match_operand:DI 2 "register_operand" "r"))
+                   (match_operand:QI 4 "const_0_to_255_operand")) 0)))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_APX_NDD
+   && INTVAL (operands[4]) == 64 - INTVAL (operands[3])"
+  "shrd{q}\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+  [(set_attr "type" "ishift")
+   (set_attr "length_immediate" "1")
+   (set_attr "mode" "DI")])
+
+
 (define_insn_and_split "*x86_64_shrd_shld_1_nozext"
   [(set (match_operand:DI 0 "nonimmediate_operand")
        (ior:DI (lshiftrt:DI (match_operand:DI 4 "nonimmediate_operand")
       operands[4] = force_reg (DImode, operands[4]);
       emit_insn (gen_x86_64_shld_1 (operands[0], operands[4], operands[3], operands[2]));
     }
+  else if (TARGET_APX_NDD)
+    {
+      rtx tmp = gen_reg_rtx (DImode);
+      if (MEM_P (operands[4]))
+        {
+         operands[1] = force_reg (DImode, operands[1]);
+         emit_insn (gen_x86_64_shrd_ndd_1 (tmp, operands[4], operands[1],
+                                           operands[2], operands[3]));
+        }
+       else if (MEM_P (operands[1]))
+         emit_insn (gen_x86_64_shld_ndd_1 (tmp, operands[1], operands[4],
+                                          operands[3], operands[2]));
+       else
+         emit_insn (gen_x86_64_shrd_ndd_1 (tmp, operands[4], operands[1],
+                                          operands[2], operands[3]));
+       emit_move_insn (operands[0], tmp);
+    }
   else
    {
      operands[1] = force_reg (DImode, operands[1]);
                                                   (const_int 63)))) 0)))
              (clobber (reg:CC FLAGS_REG))])])
 
+(define_insn_and_split "*x86_64_shrd_ndd_2"
+  [(set (match_operand:DI 0 "nonimmediate_operand")
+       (ior:DI (lshiftrt:DI (match_operand:DI 1 "nonimmediate_operand")
+                            (match_operand:QI 3 "nonmemory_operand"))
+               (ashift:DI (match_operand:DI 2 "register_operand")
+                          (minus:QI (const_int 64) (match_dup 2)))))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_APX_NDD
+  && ix86_pre_reload_split ()"
+  "#"
+  "&& 1"
+  [(parallel [(set (match_dup 4)
+                  (ior:DI (lshiftrt:DI (match_dup 1)
+                                       (and:QI (match_dup 3) (const_int 63)))
+                          (subreg:DI
+                            (ashift:TI
+                              (zero_extend:TI (match_dup 2))
+                                (minus:QI (const_int 64)
+                                          (and:QI (match_dup 3)
+                                                  (const_int 63)))) 0)))
+             (clobber (reg:CC FLAGS_REG))
+             (set (match_dup 0) (match_dup 4))])]
+{
+  operands[4] = gen_reg_rtx (DImode);
+  emit_move_insn (operands[4], operands[0]);
+})
+
 (define_insn "x86_shrd"
   [(set (match_operand:SI 0 "nonimmediate_operand" "+r*m")
         (ior:SI (lshiftrt:SI (match_dup 0)
                              (and:QI (match_dup 2) (const_int 31)))) 0)))
    (clobber (reg:CC FLAGS_REG))]
   ""
-  "shrd{l}\t{%s2%1, %0|%0, %1, %2}"
+  "shrd{l}\t{%2, %1, %0|%0, %1, %2}"
   [(set_attr "type" "ishift")
    (set_attr "prefix_0f" "1")
    (set_attr "mode" "SI")
    (set_attr "amdfam10_decode" "vector")
    (set_attr "bdver1_decode" "vector")])
 
+(define_insn "x86_shrd_ndd"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+        (ior:SI (lshiftrt:SI (match_operand:SI 1 "nonimmediate_operand" "rm")
+                 (and:QI (match_operand:QI 3 "nonmemory_operand" "Ic")
+                         (const_int 31)))
+               (subreg:SI
+                 (ashift:DI
+                   (zero_extend:DI
+                     (match_operand:SI 2 "register_operand" "r"))
+                   (minus:QI (const_int 32)
+                             (and:QI (match_dup 3) (const_int 31)))) 0)))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_APX_NDD"
+  "shrd{l}\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+  [(set_attr "type" "ishift")
+   (set_attr "mode" "SI")])
+
 (define_insn "x86_shrd_1"
   [(set (match_operand:SI 0 "nonimmediate_operand" "+r*m")
         (ior:SI (lshiftrt:SI (match_dup 0)
    (set_attr "amdfam10_decode" "vector")
    (set_attr "bdver1_decode" "vector")])
 
+(define_insn "x86_shrd_ndd_1"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+        (ior:SI (lshiftrt:SI (match_operand:SI 1 "nonimmediate_operand" "rm")
+                            (match_operand:QI 3 "const_0_to_31_operand"))
+               (subreg:SI
+                 (ashift:DI
+                   (zero_extend:DI
+                     (match_operand:SI 2 "register_operand" "r"))
+                   (match_operand:QI 4 "const_0_to_63_operand")) 0)))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_APX_NDD
+   && (INTVAL (operands[4]) == 32 - INTVAL (operands[3]))"
+  "shrd{l}\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+  [(set_attr "type" "ishift")
+   (set_attr "length_immediate" "1")
+   (set_attr "mode" "SI")])
+
+
 (define_insn_and_split "*x86_shrd_shld_1_nozext"
   [(set (match_operand:SI 0 "nonimmediate_operand")
        (ior:SI (lshiftrt:SI (match_operand:SI 4 "nonimmediate_operand")
       operands[4] = force_reg (SImode, operands[4]);
       emit_insn (gen_x86_shld_1 (operands[0], operands[4], operands[3], operands[2]));
     }
-  else
+  else if (TARGET_APX_NDD)
+    {
+      rtx tmp = gen_reg_rtx (SImode);
+      if (MEM_P (operands[4]))
+        {
+         operands[1] = force_reg (SImode, operands[1]);
+         emit_insn (gen_x86_shrd_ndd_1 (tmp, operands[4], operands[1],
+                                        operands[2], operands[3]));
+        }
+      else if (MEM_P (operands[1]))
+        emit_insn (gen_x86_shld_ndd_1 (tmp, operands[1], operands[4],
+                                      operands[3], operands[2]));
+      else
+        emit_insn (gen_x86_shrd_ndd_1 (tmp, operands[4], operands[1],
+                                      operands[2], operands[3]));
+      emit_move_insn (operands[0], tmp);
+     }
+   else
    {
      operands[1] = force_reg (SImode, operands[1]);
      rtx tmp = gen_reg_rtx (SImode);
                                                   (const_int 31)))) 0)))
              (clobber (reg:CC FLAGS_REG))])])
 
+(define_insn_and_split "*x86_shrd_ndd_2"
+  [(set (match_operand:SI 0 "nonimmediate_operand")
+       (ior:SI (lshiftrt:SI (match_operand:SI 1 "nonimmediate_operand")
+                          (match_operand:QI 3 "nonmemory_operand"))
+               (ashift:SI (match_operand:SI 2 "register_operand")
+                          (minus:QI (const_int 32) (match_dup 3)))))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_APX_NDD
+   && ix86_pre_reload_split ()"
+  "#"
+  "&& 1"
+  [(parallel [(set (match_dup 4)
+                  (ior:SI (lshiftrt:SI (match_dup 1)
+                                       (and:QI (match_dup 3) (const_int 31)))
+                          (subreg:SI
+                            (ashift:DI
+                              (zero_extend:DI (match_dup 2))
+                                (minus:QI (const_int 32)
+                                          (and:QI (match_dup 3)
+                                                  (const_int 31)))) 0)))
+             (clobber (reg:CC FLAGS_REG))
+             (set (match_dup 0) (match_dup 4))])]
+{
+  operands[4] = gen_reg_rtx (SImode);
+  emit_move_insn (operands[4], operands[0]);
+})
+
 ;; Base name for insn mnemonic.
 (define_mode_attr cvt_mnemonic
   [(SI "{cltd|cdq}") (DI "{cqto|cqo}")])
 
 (define_insn "ashr<mode>3_cvt"
-  [(set (match_operand:SWI48 0 "nonimmediate_operand" "=*d,rm")
+  [(set (match_operand:SWI48 0 "nonimmediate_operand" "=*d,rm,r")
        (ashiftrt:SWI48
-         (match_operand:SWI48 1 "nonimmediate_operand" "*a,0")
+         (match_operand:SWI48 1 "nonimmediate_operand" "*a,0,rm")
          (match_operand:QI 2 "const_int_operand")))
    (clobber (reg:CC FLAGS_REG))]
   "INTVAL (operands[2]) == GET_MODE_BITSIZE (<MODE>mode)-1
    && (TARGET_USE_CLTD || optimize_function_for_size_p (cfun))
-   && ix86_binary_operator_ok (ASHIFTRT, <MODE>mode, operands)"
+   && ix86_binary_operator_ok (ASHIFTRT, <MODE>mode, operands, TARGET_APX_NDD)"
   "@
    <cvt_mnemonic>
-   sar{<imodesuffix>}\t{%2, %0|%0, %2}"
-  [(set_attr "type" "imovx,ishift")
-   (set_attr "prefix_0f" "0,*")
-   (set_attr "length_immediate" "0,*")
-   (set_attr "modrm" "0,1")
+   sar{<imodesuffix>}\t{%2, %0|%0, %2}
+   sar{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "isa" "*,*,apx_ndd")
+   (set_attr "type" "imovx,ishift,ishift")
+   (set_attr "prefix_0f" "0,*,*")
+   (set_attr "length_immediate" "0,*,*")
+   (set_attr "modrm" "0,1,1")
    (set_attr "mode" "<MODE>")])
 
 (define_insn "*ashrsi3_cvt_zext"
-  [(set (match_operand:DI 0 "register_operand" "=*d,r")
+  [(set (match_operand:DI 0 "register_operand" "=*d,r,r")
        (zero_extend:DI
-         (ashiftrt:SI (match_operand:SI 1 "register_operand" "*a,0")
+         (ashiftrt:SI (match_operand:SI 1 "nonimmediate_operand" "*a,0,rm")
                       (match_operand:QI 2 "const_int_operand"))))
    (clobber (reg:CC FLAGS_REG))]
   "TARGET_64BIT && INTVAL (operands[2]) == 31
    && (TARGET_USE_CLTD || optimize_function_for_size_p (cfun))
-   && ix86_binary_operator_ok (ASHIFTRT, SImode, operands)"
+   && ix86_binary_operator_ok (ASHIFTRT, SImode, operands, TARGET_APX_NDD)"
   "@
    {cltd|cdq}
-   sar{l}\t{%2, %k0|%k0, %2}"
-  [(set_attr "type" "imovx,ishift")
-   (set_attr "prefix_0f" "0,*")
-   (set_attr "length_immediate" "0,*")
-   (set_attr "modrm" "0,1")
+   sar{l}\t{%2, %k0|%k0, %2}
+   sar{l}\t{%2, %1, %k0|%k0, %1, %2}"
+  [(set_attr "isa" "*,*,apx_ndd")
+   (set_attr "type" "imovx,ishift,ishift")
+   (set_attr "prefix_0f" "0,*,*")
+   (set_attr "length_immediate" "0,*,*")
+   (set_attr "modrm" "0,1,1")
    (set_attr "mode" "SI")])
 
 (define_expand "@x86_shift<mode>_adj_3"
    (set_attr "mode" "<MODE>")])
 
 (define_insn "*ashr<mode>3_1"
-  [(set (match_operand:SWI48 0 "nonimmediate_operand" "=rm,r")
+  [(set (match_operand:SWI48 0 "nonimmediate_operand" "=rm,r,r")
        (ashiftrt:SWI48
-         (match_operand:SWI48 1 "nonimmediate_operand" "0,rm")
-         (match_operand:QI 2 "nonmemory_operand" "c<S>,r")))
+         (match_operand:SWI48 1 "nonimmediate_operand" "0,rm,rm")
+         (match_operand:QI 2 "nonmemory_operand" "c<S>,r,c<S>")))
    (clobber (reg:CC FLAGS_REG))]
-  "ix86_binary_operator_ok (ASHIFTRT, <MODE>mode, operands)"
+  "ix86_binary_operator_ok (ASHIFTRT, <MODE>mode, operands, TARGET_APX_NDD)"
 {
+  bool use_ndd = get_attr_isa (insn) == ISA_APX_NDD;
   switch (get_attr_type (insn))
     {
     case TYPE_ISHIFTX:
 
     default:
       if (operands[2] == const1_rtx
-         && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
+         && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))
+         && !use_ndd)
        return "sar{<imodesuffix>}\t%0";
       else
-       return "sar{<imodesuffix>}\t{%2, %0|%0, %2}";
+       return use_ndd ? "sar{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
+                      : "sar{<imodesuffix>}\t{%2, %0|%0, %2}";
     }
 }
-  [(set_attr "isa" "*,bmi2")
-   (set_attr "type" "ishift,ishiftx")
+  [(set_attr "isa" "*,bmi2,apx_ndd")
+   (set_attr "type" "ishift,ishiftx,ishift")
    (set (attr "length_immediate")
      (if_then_else
        (and (match_operand 2 "const1_operand")
 ;; Specialization of *lshr<mode>3_1 below, extracting the SImode
 ;; highpart of a DI to be extracted, but allowing it to be clobbered.
 (define_insn_and_split "*highpartdisi2"
-  [(set (subreg:DI (match_operand:SI 0 "register_operand" "=r,x,?k") 0)
-        (lshiftrt:DI (match_operand:DI 1 "register_operand" "0,0,k")
+  [(set (subreg:DI (match_operand:SI 0 "register_operand" "=r,x,?k,r") 0)
+        (lshiftrt:DI (match_operand:DI 1 "nonimmediate_operand" "0,0,k,rm")
                     (const_int 32)))
    (clobber (reg:CC FLAGS_REG))]
   "TARGET_64BIT"
       DONE;
     }
   operands[0] = gen_rtx_REG (DImode, REGNO (operands[0]));
-})
+}
+[(set_attr "isa" "*,*,*,apx_ndd")])
+
 
 (define_insn "*lshr<mode>3_1"
-  [(set (match_operand:SWI48 0 "nonimmediate_operand" "=rm,r,?k")
+  [(set (match_operand:SWI48 0 "nonimmediate_operand" "=rm,r,?k,r")
        (lshiftrt:SWI48
-         (match_operand:SWI48 1 "nonimmediate_operand" "0,rm,k")
-         (match_operand:QI 2 "nonmemory_operand" "c<S>,r,<KS>")))
+         (match_operand:SWI48 1 "nonimmediate_operand" "0,rm,k,rm")
+         (match_operand:QI 2 "nonmemory_operand" "c<S>,r,<KS>,c<S>")))
    (clobber (reg:CC FLAGS_REG))]
-  "ix86_binary_operator_ok (LSHIFTRT, <MODE>mode, operands)"
+  "ix86_binary_operator_ok (LSHIFTRT, <MODE>mode, operands, TARGET_APX_NDD)"
 {
+  bool use_ndd = get_attr_isa (insn) == ISA_APX_NDD;
   switch (get_attr_type (insn))
     {
     case TYPE_ISHIFTX:
 
     default:
       if (operands[2] == const1_rtx
-         && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
+         && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))
+         && !use_ndd)
        return "shr{<imodesuffix>}\t%0";
       else
-       return "shr{<imodesuffix>}\t{%2, %0|%0, %2}";
+       return use_ndd ? "shr{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
+                      : "shr{<imodesuffix>}\t{%2, %0|%0, %2}";
     }
 }
-  [(set_attr "isa" "*,bmi2,<kmov_isa>")
-   (set_attr "type" "ishift,ishiftx,msklog")
+  [(set_attr "isa" "*,bmi2,avx512bw,apx_ndd")
+   (set_attr "type" "ishift,ishiftx,msklog,ishift")
    (set (attr "length_immediate")
      (if_then_else
        (and (and (match_operand 2 "const1_operand")
    (set_attr "mode" "SI")])
 
 (define_insn "*<insn>si3_1_zext"
-  [(set (match_operand:DI 0 "register_operand" "=r,r")
+  [(set (match_operand:DI 0 "register_operand" "=r,r,r")
        (zero_extend:DI
-         (any_shiftrt:SI (match_operand:SI 1 "nonimmediate_operand" "0,rm")
-                         (match_operand:QI 2 "nonmemory_operand" "cI,r"))))
+         (any_shiftrt:SI (match_operand:SI 1 "nonimmediate_operand" "0,rm,rm")
+                         (match_operand:QI 2 "nonmemory_operand" "cI,r,cI"))))
    (clobber (reg:CC FLAGS_REG))]
-  "TARGET_64BIT && ix86_binary_operator_ok (<CODE>, SImode, operands)"
+  "TARGET_64BIT
+   && ix86_binary_operator_ok (<CODE>, SImode, operands, TARGET_APX_NDD)"
 {
+  bool use_ndd = get_attr_isa (insn) == ISA_APX_NDD;
   switch (get_attr_type (insn))
     {
     case TYPE_ISHIFTX:
 
     default:
       if (operands[2] == const1_rtx
-         && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
+         && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))
+         && !use_ndd)
        return "<shift>{l}\t%k0";
       else
-       return "<shift>{l}\t{%2, %k0|%k0, %2}";
+       return use_ndd ? "<shift>{l}\t{%2, %1, %k0|%k0, %1, %2}"
+                      : "<shift>{l}\t{%2, %k0|%k0, %2}";
     }
 }
-  [(set_attr "isa" "*,bmi2")
-   (set_attr "type" "ishift,ishiftx")
+  [(set_attr "isa" "*,bmi2,apx_ndd")
+   (set_attr "type" "ishift,ishiftx,ishift")
    (set (attr "length_immediate")
      (if_then_else
        (and (match_operand 2 "const1_operand")
   "operands[2] = gen_lowpart (SImode, operands[2]);")
 
 (define_insn "*ashr<mode>3_1"
-  [(set (match_operand:SWI12 0 "nonimmediate_operand" "=<r>m")
+  [(set (match_operand:SWI12 0 "nonimmediate_operand" "=<r>m, r")
        (ashiftrt:SWI12
-         (match_operand:SWI12 1 "nonimmediate_operand" "0")
-         (match_operand:QI 2 "nonmemory_operand" "c<S>")))
+         (match_operand:SWI12 1 "nonimmediate_operand" "0, rm")
+         (match_operand:QI 2 "nonmemory_operand" "c<S>, c<S>")))
    (clobber (reg:CC FLAGS_REG))]
-  "ix86_binary_operator_ok (ASHIFTRT, <MODE>mode, operands)"
+  "ix86_binary_operator_ok (ASHIFTRT, <MODE>mode, operands, TARGET_APX_NDD)"
 {
+  bool use_ndd = get_attr_isa (insn) == ISA_APX_NDD;
   if (operands[2] == const1_rtx
-      && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
+      && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))
+      && !use_ndd)
     return "sar{<imodesuffix>}\t%0";
   else
-    return "sar{<imodesuffix>}\t{%2, %0|%0, %2}";
+    return use_ndd ? "sar{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
+                  : "sar{<imodesuffix>}\t{%2, %0|%0, %2}";
 }
-  [(set_attr "type" "ishift")
+  [(set_attr "isa" "*, apx_ndd")
+   (set_attr "type" "ishift")
    (set (attr "length_immediate")
      (if_then_else
        (and (match_operand 2 "const1_operand")
    (set_attr "mode" "<MODE>")])
 
 (define_insn "*lshrqi3_1"
-  [(set (match_operand:QI 0 "nonimmediate_operand"  "=qm,?k")
+  [(set (match_operand:QI 0 "nonimmediate_operand"  "=qm,?k,r")
        (lshiftrt:QI
-         (match_operand:QI 1 "nonimmediate_operand" "0, k")
-         (match_operand:QI 2 "nonmemory_operand"    "cI,Wb")))
+         (match_operand:QI 1 "nonimmediate_operand" "0, k, rm")
+         (match_operand:QI 2 "nonmemory_operand"    "cI,Wb,cI")))
    (clobber (reg:CC FLAGS_REG))]
-  "ix86_binary_operator_ok (LSHIFTRT, QImode, operands)"
+  "ix86_binary_operator_ok (LSHIFTRT, QImode, operands, TARGET_APX_NDD)"
 {
+  bool use_ndd = get_attr_isa (insn) == ISA_APX_NDD;
   switch (get_attr_type (insn))
     {
     case TYPE_ISHIFT:
       if (operands[2] == const1_rtx
-         && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
+         && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))
+         && !use_ndd)
        return "shr{b}\t%0";
       else
-       return "shr{b}\t{%2, %0|%0, %2}";
+       return use_ndd ? "shr{b}\t{%2, %1, %0|%0, %1, %2}"
+                      : "shr{b}\t{%2, %0|%0, %2}";
     case TYPE_MSKLOG:
       return "#";
     default:
       gcc_unreachable ();
     }
 }
-  [(set_attr "isa" "*,avx512dq")
-   (set_attr "type" "ishift,msklog")
+  [(set_attr "isa" "*,avx512dq,apx_ndd")
+   (set_attr "type" "ishift,msklog,ishift")
    (set (attr "length_immediate")
      (if_then_else
        (and (and (match_operand 2 "const1_operand")
    (set_attr "mode" "QI")])
 
 (define_insn "*lshrhi3_1"
-  [(set (match_operand:HI 0 "nonimmediate_operand" "=rm, ?k")
+  [(set (match_operand:HI 0 "nonimmediate_operand" "=rm, ?k, r")
        (lshiftrt:HI
-         (match_operand:HI 1 "nonimmediate_operand" "0, k")
-         (match_operand:QI 2 "nonmemory_operand" "cI, Ww")))
+         (match_operand:HI 1 "nonimmediate_operand" "0, k, rm")
+         (match_operand:QI 2 "nonmemory_operand" "cI, Ww, cI")))
    (clobber (reg:CC FLAGS_REG))]
-  "ix86_binary_operator_ok (LSHIFTRT, HImode, operands)"
+  "ix86_binary_operator_ok (LSHIFTRT, HImode, operands, TARGET_APX_NDD)"
 {
+  bool use_ndd = get_attr_isa (insn) == ISA_APX_NDD;
   switch (get_attr_type (insn))
     {
     case TYPE_ISHIFT:
       if (operands[2] == const1_rtx
-         && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
+         && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))
+         && !use_ndd)
        return "shr{w}\t%0";
       else
-       return "shr{w}\t{%2, %0|%0, %2}";
+       return use_ndd ? "shr{w}\t{%2, %1, %0|%0, %1, %2}"
+                      : "shr{w}\t{%2, %0|%0, %2}";
     case TYPE_MSKLOG:
       return "#";
     default:
       gcc_unreachable ();
     }
 }
-  [(set_attr "isa" "*, avx512f")
-   (set_attr "type" "ishift,msklog")
+  [(set_attr "isa" "*, avx512f, apx_ndd")
+   (set_attr "type" "ishift,msklog,ishift")
    (set (attr "length_immediate")
      (if_then_else
        (and (and (match_operand 2 "const1_operand")
   [(set (reg FLAGS_REG)
        (compare
          (any_shiftrt:SWI
-           (match_operand:SWI 1 "nonimmediate_operand" "0")
-           (match_operand:QI 2 "<shift_immediate_operand>" "<S>"))
+           (match_operand:SWI 1 "nonimmediate_operand" "0,rm")
+           (match_operand:QI 2 "<shift_immediate_operand>" "<S>,<S>"))
          (const_int 0)))
-   (set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m")
+   (set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m,r")
        (any_shiftrt:SWI (match_dup 1) (match_dup 2)))]
   "(optimize_function_for_size_p (cfun)
     || !TARGET_PARTIAL_FLAG_REG_STALL
     || (operands[2] == const1_rtx
        && TARGET_SHIFT1))
    && ix86_match_ccmode (insn, CCGOCmode)
-   && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
+   && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands, TARGET_APX_NDD)"
 {
+  bool use_ndd = get_attr_isa (insn) == ISA_APX_NDD;
   if (operands[2] == const1_rtx
-      && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
+      && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))
+      && !use_ndd)
     return "<shift>{<imodesuffix>}\t%0";
   else
-    return "<shift>{<imodesuffix>}\t{%2, %0|%0, %2}";
+    return use_ndd ? "<shift>{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
+                  : "<shift>{<imodesuffix>}\t{%2, %0|%0, %2}";
 }
-  [(set_attr "type" "ishift")
+  [(set_attr "isa" "*,apx_ndd")
+   (set_attr "type" "ishift")
    (set (attr "length_immediate")
      (if_then_else
        (and (match_operand 2 "const1_operand")
 (define_insn "*<insn>si3_cmp_zext"
   [(set (reg FLAGS_REG)
        (compare
-         (any_shiftrt:SI (match_operand:SI 1 "register_operand" "0")
+         (any_shiftrt:SI (match_operand:SI 1 "nonimmediate_operand" "0,rm")
                          (match_operand:QI 2 "const_1_to_31_operand"))
          (const_int 0)))
-   (set (match_operand:DI 0 "register_operand" "=r")
+   (set (match_operand:DI 0 "register_operand" "=r,r")
        (zero_extend:DI (any_shiftrt:SI (match_dup 1) (match_dup 2))))]
   "TARGET_64BIT
    && (optimize_function_for_size_p (cfun)
        || (operands[2] == const1_rtx
           && TARGET_SHIFT1))
    && ix86_match_ccmode (insn, CCGOCmode)
-   && ix86_binary_operator_ok (<CODE>, SImode, operands)"
+   && ix86_binary_operator_ok (<CODE>, SImode, operands, TARGET_APX_NDD)"
 {
+  bool use_ndd = get_attr_isa (insn) == ISA_APX_NDD;
   if (operands[2] == const1_rtx
-      && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
+      && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))
+      && !use_ndd)
     return "<shift>{l}\t%k0";
   else
-    return "<shift>{l}\t{%2, %k0|%k0, %2}";
+    return use_ndd ? "<shift>{l}\t{%2, %1, %k0|%k0, %1, %2}"
+                  : "<shift>{l}\t{%2, %k0|%k0, %2}";
 }
-  [(set_attr "type" "ishift")
+  [(set_attr "isa" "*,apx_ndd")
+   (set_attr "type" "ishift")
    (set (attr "length_immediate")
      (if_then_else
        (and (match_operand 2 "const1_operand")
   [(set (reg FLAGS_REG)
        (compare
          (any_shiftrt:SWI
-           (match_operand:SWI 1 "register_operand" "0")
-           (match_operand:QI 2 "<shift_immediate_operand>" "<S>"))
+           (match_operand:SWI 1 "nonimmediate_operand" "0,rm")
+           (match_operand:QI 2 "<shift_immediate_operand>" "<S>,<S>"))
          (const_int 0)))
-   (clobber (match_scratch:SWI 0 "=<r>"))]
+   (clobber (match_scratch:SWI 0 "=<r>,r"))]
   "(optimize_function_for_size_p (cfun)
     || !TARGET_PARTIAL_FLAG_REG_STALL
     || (operands[2] == const1_rtx
        && TARGET_SHIFT1))
    && ix86_match_ccmode (insn, CCGOCmode)"
 {
+  bool use_ndd = get_attr_isa (insn) == ISA_APX_NDD;
   if (operands[2] == const1_rtx
-      && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
+      && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))
+      && !use_ndd)
     return "<shift>{<imodesuffix>}\t%0";
   else
-    return "<shift>{<imodesuffix>}\t{%2, %0|%0, %2}";
+    return use_ndd
+          ? "<shift>{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
+          : "<shift>{<imodesuffix>}\t{%2, %0|%0, %2}";
 }
-  [(set_attr "type" "ishift")
+  [(set_attr "isa" "*,apx_ndd")
+   (set_attr "type" "ishift")
    (set (attr "length_immediate")
      (if_then_else
        (and (match_operand 2 "const1_operand")
    && !(rtx_equal_p (operands[0], operands[1]))"
   [(set (zero_extract:SWI248
          (match_dup 0) (const_int 8) (const_int 8))
-       (match_dup 1))
+       (zero_extract:SWI248
+         (match_dup 1) (const_int 8) (const_int 8)))
    (parallel
      [(set (zero_extract:SWI248
             (match_dup 0) (const_int 8) (const_int 8))
  ""
 {
   if (TARGET_64BIT)
-    ix86_expand_binary_operator (<CODE>, DImode, operands);
+    ix86_expand_binary_operator (<CODE>, DImode, operands, TARGET_APX_NDD);
   else if (const_1_to_31_operand (operands[2], VOIDmode))
     emit_insn (gen_ix86_<insn>di3_doubleword
                (operands[0], operands[1], operands[2]));
        (any_rotate:SWIM124 (match_operand:SWIM124 1 "nonimmediate_operand")
                            (match_operand:QI 2 "nonmemory_operand")))]
   ""
-  "ix86_expand_binary_operator (<CODE>, <MODE>mode, operands); DONE;")
+{
+  ix86_expand_binary_operator (<CODE>, <MODE>mode, operands, TARGET_APX_NDD);
+  DONE;
+})
 
 ;; Avoid useless masking of count operand.
 (define_insn_and_split "*<insn><mode>3_mask"
    (set_attr "mode" "<MODE>")])
 
 (define_insn "*<insn><mode>3_1"
-  [(set (match_operand:SWI48 0 "nonimmediate_operand" "=rm,r")
+  [(set (match_operand:SWI48 0 "nonimmediate_operand" "=rm,r,r")
        (any_rotate:SWI48
-         (match_operand:SWI48 1 "nonimmediate_operand" "0,rm")
-         (match_operand:QI 2 "nonmemory_operand" "c<S>,<S>")))
+         (match_operand:SWI48 1 "nonimmediate_operand" "0,rm,rm")
+         (match_operand:QI 2 "nonmemory_operand" "c<S>,<S>,c<S>")))
    (clobber (reg:CC FLAGS_REG))]
-  "ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
+  "ix86_binary_operator_ok (<CODE>, <MODE>mode, operands, TARGET_APX_NDD)"
 {
+  bool use_ndd = get_attr_isa (insn) == ISA_APX_NDD;
   switch (get_attr_type (insn))
     {
     case TYPE_ROTATEX:
 
     default:
       if (operands[2] == const1_rtx
-         && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
+         && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))
+         && !use_ndd)
        return "<rotate>{<imodesuffix>}\t%0";
       else
-       return "<rotate>{<imodesuffix>}\t{%2, %0|%0, %2}";
+       return use_ndd ? "<rotate>{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
+                      : "<rotate>{<imodesuffix>}\t{%2, %0|%0, %2}";
     }
 }
-  [(set_attr "isa" "*,bmi2")
-   (set_attr "type" "rotate,rotatex")
+  [(set_attr "isa" "*,bmi2,apx_ndd")
+   (set_attr "type" "rotate,rotatex,rotate")
    (set (attr "preferred_for_size")
      (cond [(eq_attr "alternative" "0")
              (symbol_ref "true")]
    (set_attr "mode" "SI")])
 
 (define_insn "*<insn>si3_1_zext"
-  [(set (match_operand:DI 0 "register_operand" "=r,r")
+  [(set (match_operand:DI 0 "register_operand" "=r,r,r")
        (zero_extend:DI
-         (any_rotate:SI (match_operand:SI 1 "nonimmediate_operand" "0,rm")
-                        (match_operand:QI 2 "nonmemory_operand" "cI,I"))))
+         (any_rotate:SI (match_operand:SI 1 "nonimmediate_operand" "0,rm,rm")
+                        (match_operand:QI 2 "nonmemory_operand" "cI,I,cI"))))
    (clobber (reg:CC FLAGS_REG))]
   "TARGET_64BIT && ix86_binary_operator_ok (<CODE>, SImode, operands)"
 {
+  bool use_ndd = get_attr_isa (insn) == ISA_APX_NDD;
   switch (get_attr_type (insn))
     {
     case TYPE_ROTATEX:
 
     default:
       if (operands[2] == const1_rtx
-         && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
+         && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))
+         && !use_ndd)
        return "<rotate>{l}\t%k0";
       else
-       return "<rotate>{l}\t{%2, %k0|%k0, %2}";
+       return use_ndd ? "<rotate>{l}\t{%2, %1, %k0|%k0, %1, %2}"
+                      : "<rotate>{l}\t{%2, %k0|%k0, %2}";
     }
 }
-  [(set_attr "isa" "*,bmi2")
-   (set_attr "type" "rotate,rotatex")
+  [(set_attr "isa" "*,bmi2,apx_ndd")
+   (set_attr "type" "rotate,rotatex,rotate")
    (set (attr "preferred_for_size")
      (cond [(eq_attr "alternative" "0")
              (symbol_ref "true")]
        (zero_extend:DI (rotatert:SI (match_dup 1) (match_dup 2))))])
 
 (define_insn "*<insn><mode>3_1"
-  [(set (match_operand:SWI12 0 "nonimmediate_operand" "=<r>m")
-       (any_rotate:SWI12 (match_operand:SWI12 1 "nonimmediate_operand" "0")
-                         (match_operand:QI 2 "nonmemory_operand" "c<S>")))
+  [(set (match_operand:SWI12 0 "nonimmediate_operand" "=<r>m,r")
+       (any_rotate:SWI12 (match_operand:SWI12 1 "nonimmediate_operand" "0,rm")
+                         (match_operand:QI 2 "nonmemory_operand" "c<S>,c<S>")))
    (clobber (reg:CC FLAGS_REG))]
-  "ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
+  "ix86_binary_operator_ok (<CODE>, <MODE>mode, operands, TARGET_APX_NDD)"
 {
+  bool use_ndd = get_attr_isa (insn) == ISA_APX_NDD;
   if (operands[2] == const1_rtx
-      && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
+      && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))
+      && !use_ndd)
     return "<rotate>{<imodesuffix>}\t%0";
   else
-    return "<rotate>{<imodesuffix>}\t{%2, %0|%0, %2}";
+    return use_ndd
+          ? "<rotate>{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
+          : "<rotate>{<imodesuffix>}\t{%2, %0|%0, %2}";
 }
-  [(set_attr "type" "rotate")
+  [(set_attr "isa" "*,apx_ndd")
+   (set_attr "type" "rotate")
    (set (attr "length_immediate")
      (if_then_else
        (and (match_operand 2 "const1_operand")
 
 ;; Rotations through carry flag
 (define_insn "rcrsi2"
-  [(set (match_operand:SI 0 "register_operand" "=r")
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
        (plus:SI
-         (lshiftrt:SI (match_operand:SI 1 "register_operand" "0")
+         (lshiftrt:SI (match_operand:SI 1 "nonimmediate_operand" "0,rm")
                       (const_int 1))
          (ashift:SI (ltu:SI (reg:CCC FLAGS_REG) (const_int 0))
                     (const_int 31))))
    (clobber (reg:CC FLAGS_REG))]
   ""
-  "rcr{l}\t%0"
-  [(set_attr "type" "ishift1")
+  "@
+   rcr{l}\t%0
+   rcr{l}\t{%1, %0|%0, %1}"
+  [(set_attr "isa" "*,apx_ndd")
+   (set_attr "type" "ishift1")
    (set_attr "memory" "none")
    (set_attr "length_immediate" "0")
    (set_attr "mode" "SI")])
 
 (define_insn "rcrdi2"
-  [(set (match_operand:DI 0 "register_operand" "=r")
+  [(set (match_operand:DI 0 "register_operand" "=r,r")
        (plus:DI
-         (lshiftrt:DI (match_operand:DI 1 "register_operand" "0")
+         (lshiftrt:DI (match_operand:DI 1 "nonimmediate_operand" "0,rm")
                       (const_int 1))
          (ashift:DI (ltu:DI (reg:CCC FLAGS_REG) (const_int 0))
                     (const_int 63))))
    (clobber (reg:CC FLAGS_REG))]
   "TARGET_64BIT"
-  "rcr{q}\t%0"
-  [(set_attr "type" "ishift1")
+  "@
+   rcr{q}\t%0
+   rcr{q}\t{%1, %0|%0, %1}"
+  [(set_attr "isa" "*,apx_ndd")
+   (set_attr "type" "ishift1")
    (set_attr "length_immediate" "0")
    (set_attr "mode" "DI")])
 
 ;; Versions of sar and shr that set the carry flag.
 (define_insn "<insn><mode>3_carry"
   [(set (reg:CCC FLAGS_REG)
-       (unspec:CCC [(and:SWI48 (match_operand:SWI48 1 "register_operand" "0")
+       (unspec:CCC [(and:SWI48 (match_operand:SWI48 1 "nonimmediate_operand" "0,rm")
                                (const_int 1))
                     (const_int 0)] UNSPEC_CC_NE))
-   (set (match_operand:SWI48 0 "register_operand" "=r")
+   (set (match_operand:SWI48 0 "register_operand" "=r,r")
        (any_shiftrt:SWI48 (match_dup 1) (const_int 1)))]
   ""
 {
-  if (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))
+  bool use_ndd = get_attr_isa (insn) == ISA_APX_NDD;
+  if ((TARGET_SHIFT1 || optimize_function_for_size_p (cfun))
+      && !use_ndd)
     return "<shift>{<imodesuffix>}\t%0";
-  return "<shift>{<imodesuffix>}\t{1, %0|%0, 1}";
+  return use_ndd ? "<shift>{<imodesuffix>}\t{$1, %1, %0|%0, %1, 1}"
+                : "<shift>{<imodesuffix>}\t{$1, %0|%0, 1}";
 }
-  [(set_attr "type" "ishift1")
+  [(set_attr "isa" "*, apx_ndd")
+   (set_attr "type" "ishift1")
    (set (attr "length_immediate")
      (if_then_else
        (ior (match_test "TARGET_SHIFT1")
     FAIL;
 })
 
+;; Eliminate redundant compare between set{z,nz} and j{z,nz}:
+;; setz %al; test %al,%al; jz <...> -> setz %al; jnz <...> and
+;; setnz %al, test %al,%al; jz <...> -> setnz %al; jz <...>.
+(define_peephole2
+  [(set (match_operand:QI 0 "nonimmediate_operand")
+       (match_operator:QI 1 "bt_comparison_operator"
+         [(reg:CCZ FLAGS_REG) (const_int 0)]))
+   (set (reg:CCZ FLAGS_REG)
+       (compare:CCZ (match_dup 0) (const_int 0)))
+   (set (pc)
+       (if_then_else (match_operator 2 "bt_comparison_operator"
+                       [(reg:CCZ FLAGS_REG) (const_int 0)])
+                     (match_operand 3)
+                     (pc)))]
+  "peep2_regno_dead_p (3, FLAGS_REG)"
+  [(set (match_dup 0)
+       (match_op_dup 1 [(reg:CCZ FLAGS_REG) (const_int 0)]))
+   (set (pc)
+       (if_then_else (match_dup 2)
+                     (match_dup 3)
+                     (pc)))]
+{
+  if (GET_CODE (operands[1]) == EQ)
+    {
+      operands[2] = shallow_copy_rtx (operands[2]);
+      PUT_CODE (operands[2], reverse_condition (GET_CODE (operands[2])));
+    }
+})
+
 ;; The SSE store flag instructions saves 0 or 0xffffffff to the result.
 ;; subsequent logical operations are used to imitate conditional moves.
 ;; 0xffffffff is NaN, but not in normalized form, so we can't represent
                                     (const_int 0))
                  (compare:CC (match_operand 4 "memory_operand")
                              (match_operand 5 "memory_operand"))
-                 (const_int 0)))
+                 (reg:CC FLAGS_REG)))
              (use (match_operand:SI 3 "immediate_operand"))
-             (use (reg:CC FLAGS_REG))
              (clobber (match_operand 0 "register_operand"))
              (clobber (match_operand 1 "register_operand"))
              (clobber (match_dup 2))])]
                             (const_int 0))
          (compare:CC (mem:BLK (match_operand:P 4 "register_operand" "0"))
                      (mem:BLK (match_operand:P 5 "register_operand" "1")))
-         (const_int 0)))
+         (reg:CC FLAGS_REG)))
    (use (match_operand:SI 3 "immediate_operand" "i"))
-   (use (reg:CC FLAGS_REG))
    (clobber (match_operand:P 0 "register_operand" "=S"))
    (clobber (match_operand:P 1 "register_operand" "=D"))
    (clobber (match_operand:P 2 "register_operand" "=c"))]
                               (const_int 0))
            (compare:CC (mem:BLK (match_operand 4 "register_operand"))
                        (mem:BLK (match_operand 5 "register_operand")))
-           (const_int 0)))
+           (reg:CC FLAGS_REG)))
      (use (match_operand:SI 3 "immediate_operand"))
-     (use (reg:CC FLAGS_REG))
      (clobber (match_operand 0 "register_operand"))
      (clobber (match_operand 1 "register_operand"))
      (clobber (match_operand 2 "register_operand"))])
                               (const_int 0))
            (compare:CC (mem:BLK (match_dup 4))
                        (mem:BLK (match_dup 5)))
-           (const_int 0)))
+           (reg:CC FLAGS_REG)))
      (use (match_dup 3))
-     (use (reg:CC FLAGS_REG))
      (clobber (match_dup 0))
      (clobber (match_dup 1))
      (clobber (match_dup 2))])])
        (neg:SWI (ltu:SWI (reg:CCC FLAGS_REG) (const_int 0))))])
 
 (define_insn "*mov<mode>cc_noc"
-  [(set (match_operand:SWI248 0 "register_operand" "=r,r")
+  [(set (match_operand:SWI248 0 "register_operand" "=r,r,r,r")
        (if_then_else:SWI248 (match_operator 1 "ix86_comparison_operator"
                               [(reg FLAGS_REG) (const_int 0)])
-         (match_operand:SWI248 2 "nonimmediate_operand" "rm,0")
-         (match_operand:SWI248 3 "nonimmediate_operand" "0,rm")))]
+         (match_operand:SWI248 2 "nonimmediate_operand" "rm,0,rm,r")
+         (match_operand:SWI248 3 "nonimmediate_operand" "0,rm,r,rm")))]
   "TARGET_CMOVE && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
   "@
    cmov%O2%C1\t{%2, %0|%0, %2}
-   cmov%O2%c1\t{%3, %0|%0, %3}"
-  [(set_attr "type" "icmov")
+   cmov%O2%c1\t{%3, %0|%0, %3}
+   cmov%O2%C1\t{%2, %3, %0|%0, %3, %2}
+   cmov%O2%c1\t{%3, %2, %0|%0, %2, %3}"
+  [(set_attr "isa" "*,*,apx_ndd,apx_ndd")
+   (set_attr "type" "icmov")
    (set_attr "mode" "<MODE>")])
 
 (define_insn "*movsicc_noc_zext"
-  [(set (match_operand:DI 0 "register_operand" "=r,r")
+  [(set (match_operand:DI 0 "register_operand" "=r,r,r,r")
        (if_then_else:DI (match_operator 1 "ix86_comparison_operator"
                           [(reg FLAGS_REG) (const_int 0)])
          (zero_extend:DI
-           (match_operand:SI 2 "nonimmediate_operand" "rm,0"))
+           (match_operand:SI 2 "nonimmediate_operand" "rm,0,rm,r"))
          (zero_extend:DI
-           (match_operand:SI 3 "nonimmediate_operand" "0,rm"))))]
+           (match_operand:SI 3 "nonimmediate_operand" "0,rm,r,rm"))))]
   "TARGET_64BIT
    && TARGET_CMOVE && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
   "@
    cmov%O2%C1\t{%2, %k0|%k0, %2}
-   cmov%O2%c1\t{%3, %k0|%k0, %3}"
-  [(set_attr "type" "icmov")
+   cmov%O2%c1\t{%3, %k0|%k0, %3}
+   cmov%O2%C1\t{%2, %3, %k0|%k0, %3, %2}
+   cmov%O2%c1\t{%3, %2, %k0|%k0, %2, %3}"
+  [(set_attr "isa" "*,*,apx_ndd,apx_ndd")
+   (set_attr "type" "icmov")
    (set_attr "mode" "SI")])
 
 (define_insn "*movsicc_noc_zext_1"
-  [(set (match_operand:DI 0 "nonimmediate_operand" "=r,r")
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=r,r,r,r")
        (zero_extend:DI
          (if_then_else:SI (match_operator 1 "ix86_comparison_operator"
                             [(reg FLAGS_REG) (const_int 0)])
-            (match_operand:SI 2 "nonimmediate_operand" "rm,0")
-            (match_operand:SI 3 "nonimmediate_operand" "0,rm"))))]
+            (match_operand:SI 2 "nonimmediate_operand" "rm,0,rm,r")
+            (match_operand:SI 3 "nonimmediate_operand" "0,rm,r,rm"))))]
   "TARGET_64BIT
    && TARGET_CMOVE && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
   "@
    cmov%O2%C1\t{%2, %k0|%k0, %2}
-   cmov%O2%c1\t{%3, %k0|%k0, %3}"
-  [(set_attr "type" "icmov")
+   cmov%O2%c1\t{%3, %k0|%k0, %3}
+   cmov%O2%C1\t{%2, %3, %k0|%k0, %3, %2}
+   cmov%O2%c1\t{%3, %2, %k0|%k0, %2, %3}"
+  [(set_attr "isa" "*,*,apx_ndd,apx_ndd")
+   (set_attr "type" "icmov")
    (set_attr "mode" "SI")])
 
 
 })
 
 (define_insn "*movqicc_noc"
-  [(set (match_operand:QI 0 "register_operand" "=r,r")
+  [(set (match_operand:QI 0 "register_operand" "=r,r,r")
        (if_then_else:QI (match_operator 1 "ix86_comparison_operator"
                           [(reg FLAGS_REG) (const_int 0)])
-                     (match_operand:QI 2 "register_operand" "r,0")
-                     (match_operand:QI 3 "register_operand" "0,r")))]
+                     (match_operand:QI 2 "register_operand" "r,0,r")
+                     (match_operand:QI 3 "register_operand" "0,r,r")))]
   "TARGET_CMOVE && !TARGET_PARTIAL_REG_STALL"
   "#"
-  [(set_attr "type" "icmov")
+  [(set_attr "isa" "*,*,apx_ndd")
+   (set_attr "type" "icmov")
    (set_attr "mode" "QI")])
 
 (define_split
   [(set_attr "prefix" "vex")
    (set_attr "type" "other")])
 
+(define_insn "ldtilecfg"
+  [(unspec_volatile [(match_operand:XI 0 "memory_operand" "m")]
+            UNSPECV_LDTILECFG)]
+  "TARGET_AMX_TILE"
+  "ldtilecfg\t%0"
+  [(set_attr "type" "other")
+   (set_attr "prefix" "maybe_evex")
+   (set_attr "memory" "load")
+   (set_attr "mode" "XI")])
+
+(define_insn "sttilecfg"
+  [(set (match_operand:XI 0 "memory_operand" "=m")
+        (unspec_volatile:XI [(const_int 0)] UNSPECV_STTILECFG))]
+  "TARGET_AMX_TILE"
+  "sttilecfg\t%0"
+  [(set_attr "type" "other")
+   (set_attr "prefix" "maybe_evex")
+   (set_attr "memory" "store")
+   (set_attr "mode" "XI")])
+
 (include "mmx.md")
 (include "sse.md")
 (include "sync.md")