]> git.ipfire.org Git - thirdparty/gcc.git/commitdiff
[ARC] Add SIMD extensions for ARC HS
authorClaudiu Zissulescu <claziss@synopsys.com>
Thu, 28 Apr 2016 09:53:13 +0000 (11:53 +0200)
committerClaudiu Zissulescu <claziss@gcc.gnu.org>
Thu, 28 Apr 2016 09:53:13 +0000 (11:53 +0200)
gcc/
2016-04-28  Claudiu Zissulescu  <claziss@synopsys.com>

* config/arc/arc.c (arc_vector_mode_supported_p): Add support for
the new ARC HS SIMD instructions.
(arc_preferred_simd_mode): New function.
(arc_autovectorize_vector_sizes): Likewise.
(TARGET_VECTORIZE_PREFERRED_SIMD_MODE)
(TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES): Define.
(arc_init_reg_tables): Accept new ARC HS SIMD modes.
(arc_init_builtins): Add new SIMD builtin types.
(arc_split_move): Handle 64 bit vector moves.
* config/arc/arc.h (TARGET_PLUS_DMPY, TARGET_PLUS_MACD)
(TARGET_PLUS_QMACW): Define.
* config/arc/builtins.def (QMACH, QMACHU, QMPYH, QMPYHU, DMACH)
(DMACHU, DMPYH, DMPYHU, DMACWH, DMACWHU, VMAC2H, VMAC2HU, VMPY2H)
(VMPY2HU, VADDSUB2H, VSUBADD2H, VADDSUB, VSUBADD, VADDSUB4H)
(VSUBADD4H): New builtins.
* config/arc/simdext.md: Add new ARC HS SIMD instructions.
* testsuite/gcc.target/arc/builtin_simdarc.c: New file.

From-SVN: r235551

gcc/ChangeLog
gcc/config/arc/arc.c
gcc/config/arc/arc.h
gcc/config/arc/builtins.def
gcc/config/arc/simdext.md
gcc/testsuite/gcc.target/arc/builtin_simdarc.c [new file with mode: 0644]

index a41b8e0841cd92902e1b850cc803303e2421785c..f557c9ff453748495447d46a12370a3538c38bff 100644 (file)
@@ -1,3 +1,23 @@
+2016-04-28  Claudiu Zissulescu  <claziss@synopsys.com>
+
+       * config/arc/arc.c (arc_vector_mode_supported_p): Add support for
+       the new ARC HS SIMD instructions.
+       (arc_preferred_simd_mode): New function.
+       (arc_autovectorize_vector_sizes): Likewise.
+       (TARGET_VECTORIZE_PREFERRED_SIMD_MODE)
+       (TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES): Define.
+       (arc_init_reg_tables): Accept new ARC HS SIMD modes.
+       (arc_init_builtins): Add new SIMD builtin types.
+       (arc_split_move): Handle 64 bit vector moves.
+       * config/arc/arc.h (TARGET_PLUS_DMPY, TARGET_PLUS_MACD)
+       (TARGET_PLUS_QMACW): Define.
+       * config/arc/builtins.def (QMACH, QMACHU, QMPYH, QMPYHU, DMACH)
+       (DMACHU, DMPYH, DMPYHU, DMACWH, DMACWHU, VMAC2H, VMAC2HU, VMPY2H)
+       (VMPY2HU, VADDSUB2H, VSUBADD2H, VADDSUB, VSUBADD, VADDSUB4H)
+       (VSUBADD4H): New builtins.
+       * config/arc/simdext.md: Add new ARC HS SIMD instructions.
+       * testsuite/gcc.target/arc/builtin_simdarc.c: New file.
+
 2016-04-28  Eduard Sanou  <dhole@openmailbox.org>
            Matthias Klose  <doko@debian.org>
 
index d60db502ef85eb07cca0d582a21265584e51b714..d120946a5f2fa78ecf8df464d108ffa8adb0a536 100644 (file)
@@ -247,16 +247,47 @@ static bool arc_use_by_pieces_infrastructure_p (unsigned HOST_WIDE_INT,
 static bool
 arc_vector_mode_supported_p (machine_mode mode)
 {
-  if (!TARGET_SIMD_SET)
-    return false;
+  switch (mode)
+    {
+    case V2HImode:
+      return TARGET_PLUS_DMPY;
+    case V4HImode:
+    case V2SImode:
+      return TARGET_PLUS_QMACW;
+    case V4SImode:
+    case V8HImode:
+      return TARGET_SIMD_SET;
 
-  if ((mode == V4SImode)
-      || (mode == V8HImode))
-    return true;
+    default:
+      return false;
+    }
+}
 
-  return false;
+/* Implements target hook TARGET_VECTORIZE_PREFERRED_SIMD_MODE.  */
+
+static enum machine_mode
+arc_preferred_simd_mode (enum machine_mode mode)
+{
+  switch (mode)
+    {
+    case HImode:
+      return TARGET_PLUS_QMACW ? V4HImode : V2HImode;
+    case SImode:
+      return V2SImode;
+
+    default:
+      return word_mode;
+    }
 }
 
+/* Implements target hook
+   TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES.  */
+
+static unsigned int
+arc_autovectorize_vector_sizes (void)
+{
+  return TARGET_PLUS_QMACW ? (8 | 4) : 0;
+}
 
 /* TARGET_PRESERVE_RELOAD_P is still awaiting patch re-evaluation / review.  */
 static bool arc_preserve_reload_p (rtx in) ATTRIBUTE_UNUSED;
@@ -345,6 +376,12 @@ static void arc_finalize_pic (void);
 #undef TARGET_VECTOR_MODE_SUPPORTED_P
 #define TARGET_VECTOR_MODE_SUPPORTED_P arc_vector_mode_supported_p
 
+#undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
+#define TARGET_VECTORIZE_PREFERRED_SIMD_MODE arc_preferred_simd_mode
+
+#undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
+#define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES arc_autovectorize_vector_sizes
+
 #undef TARGET_CAN_USE_DOLOOP_P
 #define TARGET_CAN_USE_DOLOOP_P arc_can_use_doloop_p
 
@@ -1214,7 +1251,12 @@ arc_init_reg_tables (void)
            arc_mode_class[i] = 0;
          break;
        case MODE_VECTOR_INT:
-         arc_mode_class [i] = (1<< (int) V_MODE);
+         if (GET_MODE_SIZE (m) == 4)
+           arc_mode_class[i] = (1 << (int) S_MODE);
+         else if (GET_MODE_SIZE (m) == 8)
+           arc_mode_class[i] = (1 << (int) D_MODE);
+         else
+           arc_mode_class[i] = (1 << (int) V_MODE);
          break;
        case MODE_CC:
        default:
@@ -5277,6 +5319,15 @@ arc_builtin_decl (unsigned id, bool initialize_p ATTRIBUTE_UNUSED)
 static void
 arc_init_builtins (void)
 {
+  tree V4HI_type_node;
+  tree V2SI_type_node;
+  tree V2HI_type_node;
+
+  /* Vector types based on HS SIMD elements.  */
+  V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode);
+  V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode);
+  V2HI_type_node = build_vector_type_for_mode (intHI_type_node, V2HImode);
+
   tree pcvoid_type_node
     = build_pointer_type (build_qualified_type (void_type_node,
                                                TYPE_QUAL_CONST));
@@ -5341,6 +5392,28 @@ arc_init_builtins (void)
   tree v8hi_ftype_v8hi
     = build_function_type_list (V8HI_type_node, V8HI_type_node,
                                NULL_TREE);
+  /* ARCv2 SIMD types.  */
+  tree long_ftype_v4hi_v4hi
+    = build_function_type_list (long_long_integer_type_node,
+                               V4HI_type_node, V4HI_type_node, NULL_TREE);
+  tree int_ftype_v2hi_v2hi
+    = build_function_type_list (integer_type_node,
+                               V2HI_type_node, V2HI_type_node, NULL_TREE);
+  tree v2si_ftype_v2hi_v2hi
+    = build_function_type_list (V2SI_type_node,
+                               V2HI_type_node, V2HI_type_node, NULL_TREE);
+  tree v2hi_ftype_v2hi_v2hi
+    = build_function_type_list (V2HI_type_node,
+                               V2HI_type_node, V2HI_type_node, NULL_TREE);
+  tree v2si_ftype_v2si_v2si
+    = build_function_type_list (V2SI_type_node,
+                               V2SI_type_node, V2SI_type_node, NULL_TREE);
+  tree v4hi_ftype_v4hi_v4hi
+    = build_function_type_list (V4HI_type_node,
+                               V4HI_type_node, V4HI_type_node, NULL_TREE);
+  tree long_ftype_v2si_v2hi
+    = build_function_type_list (long_long_integer_type_node,
+                               V2SI_type_node, V2HI_type_node, NULL_TREE);
 
   /* Add the builtins.  */
 #define DEF_BUILTIN(NAME, N_ARGS, TYPE, ICODE, MASK)                   \
@@ -8706,6 +8779,31 @@ arc_split_move (rtx *operands)
       return;
     }
 
+  if (TARGET_PLUS_QMACW
+      && GET_CODE (operands[1]) == CONST_VECTOR)
+    {
+      HOST_WIDE_INT intval0, intval1;
+      if (GET_MODE (operands[1]) == V2SImode)
+       {
+         intval0 = INTVAL (XVECEXP (operands[1], 0, 0));
+         intval1 = INTVAL (XVECEXP (operands[1], 0, 1));
+       }
+      else
+       {
+         intval1  = INTVAL (XVECEXP (operands[1], 0, 3)) << 16;
+         intval1 |= INTVAL (XVECEXP (operands[1], 0, 2)) & 0xFFFF;
+         intval0  = INTVAL (XVECEXP (operands[1], 0, 1)) << 16;
+         intval0 |= INTVAL (XVECEXP (operands[1], 0, 0)) & 0xFFFF;
+       }
+      xop[0] = gen_rtx_REG (SImode, REGNO (operands[0]));
+      xop[3] = gen_rtx_REG (SImode, REGNO (operands[0]) + 1);
+      xop[2] = GEN_INT (trunc_int_for_mode (intval0, SImode));
+      xop[1] = GEN_INT (trunc_int_for_mode (intval1, SImode));
+      emit_move_insn (xop[0], xop[2]);
+      emit_move_insn (xop[3], xop[1]);
+      return;
+    }
+
   for (i = 0; i < 2; i++)
     {
       if (MEM_P (operands[i]) && auto_inc_p (XEXP (operands[i], 0)))
index 1c2a38d4acfc10ffde19cd4f8a1d08fd802234bf..5100a5b8f821f3e5a6dfa5f986790ddc3ac60674 100644 (file)
@@ -1724,6 +1724,12 @@ enum
 /* Any multiplication feature macro.  */
 #define TARGET_ANY_MPY                                         \
   (TARGET_MPY || TARGET_MUL64_SET || TARGET_MULMAC_32BY16_SET)
+/* PLUS_DMPY feature macro.  */
+#define TARGET_PLUS_DMPY  ((arc_mpy_option > 6) && TARGET_HS)
+/* PLUS_MACD feature macro.  */
+#define TARGET_PLUS_MACD  ((arc_mpy_option > 7) && TARGET_HS)
+/* PLUS_QMACW feature macro.  */
+#define TARGET_PLUS_QMACW ((arc_mpy_option > 8) && TARGET_HS)
 
 /* ARC600 and ARC601 feature macro.  */
 #define TARGET_ARC600_FAMILY (TARGET_ARC600 || TARGET_ARC601)
index 19be1d218520b19993f07bbf08077c5c9412f381..8c71d30a459a74fe5e7d2c4c98a4f6a942616997 100644 (file)
@@ -193,3 +193,30 @@ DEF_BUILTIN (VINTI, 1, void_ftype_int, vinti_insn, TARGET_SIMD_SET)
 
 /* END SIMD marker.  */
 DEF_BUILTIN (SIMD_END, 0, void_ftype_void, nothing, 0)
+
+/* ARCv2 SIMD instructions that use/clobber the accumulator reg.  */
+DEF_BUILTIN (QMACH,      2, long_ftype_v4hi_v4hi,   qmach,      TARGET_PLUS_QMACW)
+DEF_BUILTIN (QMACHU,     2, long_ftype_v4hi_v4hi,   qmachu,     TARGET_PLUS_QMACW)
+DEF_BUILTIN (QMPYH,      2, long_ftype_v4hi_v4hi,   qmpyh,      TARGET_PLUS_QMACW)
+DEF_BUILTIN (QMPYHU,     2, long_ftype_v4hi_v4hi,   qmpyhu,     TARGET_PLUS_QMACW)
+
+DEF_BUILTIN (DMACH,      2, int_ftype_v2hi_v2hi,    dmach,      TARGET_PLUS_DMPY)
+DEF_BUILTIN (DMACHU,     2, int_ftype_v2hi_v2hi,    dmachu,     TARGET_PLUS_DMPY)
+DEF_BUILTIN (DMPYH,      2, int_ftype_v2hi_v2hi,    dmpyh,      TARGET_PLUS_DMPY)
+DEF_BUILTIN (DMPYHU,     2, int_ftype_v2hi_v2hi,    dmpyhu,     TARGET_PLUS_DMPY)
+
+DEF_BUILTIN (DMACWH,     2, long_ftype_v2si_v2hi,   dmacwh,     TARGET_PLUS_QMACW)
+DEF_BUILTIN (DMACWHU,    2, long_ftype_v2si_v2hi,   dmacwhu,    TARGET_PLUS_QMACW)
+
+DEF_BUILTIN (VMAC2H,     2, v2si_ftype_v2hi_v2hi,   vmac2h,     TARGET_PLUS_MACD)
+DEF_BUILTIN (VMAC2HU,    2, v2si_ftype_v2hi_v2hi,   vmac2hu,    TARGET_PLUS_MACD)
+DEF_BUILTIN (VMPY2H,     2, v2si_ftype_v2hi_v2hi,   vmpy2h,     TARGET_PLUS_MACD)
+DEF_BUILTIN (VMPY2HU,    2, v2si_ftype_v2hi_v2hi,   vmpy2hu,    TARGET_PLUS_MACD)
+
+/* Combined add/sub HS SIMD instructions.  */
+DEF_BUILTIN (VADDSUB2H,  2, v2hi_ftype_v2hi_v2hi,   addsubv2hi3, TARGET_PLUS_DMPY)
+DEF_BUILTIN (VSUBADD2H,  2, v2hi_ftype_v2hi_v2hi,   subaddv2hi3, TARGET_PLUS_DMPY)
+DEF_BUILTIN (VADDSUB,    2, v2si_ftype_v2si_v2si,   addsubv2si3, TARGET_PLUS_QMACW)
+DEF_BUILTIN (VSUBADD,    2, v2si_ftype_v2si_v2si,   subaddv2si3, TARGET_PLUS_QMACW)
+DEF_BUILTIN (VADDSUB4H,  2, v4hi_ftype_v4hi_v4hi,   addsubv4hi3, TARGET_PLUS_QMACW)
+DEF_BUILTIN (VSUBADD4H,  2, v4hi_ftype_v4hi_v4hi,   subaddv4hi3, TARGET_PLUS_QMACW)
index 9fd9d62e048341ccc673dbba1c9613f12636ff77..51869e367726522dc70b2191df1f9228455239cd 100644 (file)
   [(set_attr "type" "simd_vcontrol")
    (set_attr "length" "4")
    (set_attr "cond" "nocond")])
+
+;; New ARCv2 SIMD extensions
+
+;;64-bit vectors of halwords and words
+(define_mode_iterator VWH [V4HI V2SI])
+
+;;double element vectors
+(define_mode_iterator VDV [V2HI V2SI])
+(define_mode_attr V_addsub [(V2HI "HI") (V2SI "SI")])
+(define_mode_attr V_addsub_suffix [(V2HI "2h") (V2SI "")])
+
+;;all vectors
+(define_mode_iterator VCT [V2HI V4HI V2SI])
+(define_mode_attr V_suffix [(V2HI "2h") (V4HI "4h") (V2SI "2")])
+
+;; Widening operations.
+(define_code_iterator SE [sign_extend zero_extend])
+(define_code_attr V_US [(sign_extend "s") (zero_extend "u")])
+(define_code_attr V_US_suffix [(sign_extend "") (zero_extend "u")])
+
+
+;; Move patterns
+(define_expand "movv2hi"
+  [(set (match_operand:V2HI 0 "move_dest_operand" "")
+       (match_operand:V2HI 1 "general_operand" ""))]
+  ""
+  "{
+    if (prepare_move_operands (operands, V2HImode))
+         DONE;
+   }")
+
+(define_insn_and_split "*movv2hi_insn"
+  [(set (match_operand:V2HI 0 "nonimmediate_operand" "=r,r,r,m")
+       (match_operand:V2HI 1 "general_operand"       "i,r,m,r"))]
+  "(register_operand (operands[0], V2HImode)
+    || register_operand (operands[1], V2HImode))"
+  "@
+   #
+   mov%? %0, %1
+   ld%U1%V1 %0,%1
+   st%U0%V0 %1,%0"
+  "reload_completed && GET_CODE (operands[1]) == CONST_VECTOR"
+  [(set (match_dup 0) (match_dup 2))]
+  {
+   HOST_WIDE_INT intval = INTVAL (XVECEXP (operands[1], 0, 1)) << 16;
+   intval |= INTVAL (XVECEXP (operands[1], 0, 0)) & 0xFFFF;
+
+   operands[0] = gen_rtx_REG (SImode, REGNO (operands[0]));
+   operands[2] = GEN_INT (trunc_int_for_mode (intval, SImode));
+  }
+  [(set_attr "type" "move,move,load,store")
+   (set_attr "predicable" "yes,yes,no,no")
+   (set_attr "iscompact"  "false,false,false,false")
+   ])
+
+(define_expand "movmisalignv2hi"
+ [(set (match_operand:V2HI 0 "general_operand" "")
+       (match_operand:V2HI 1 "general_operand" ""))]
+ ""
+{
+ if (!register_operand (operands[0], V2HImode)
+      && !register_operand (operands[1], V2HImode))
+    operands[1] = force_reg (V2HImode, operands[1]);
+})
+
+(define_expand "mov<mode>"
+  [(set (match_operand:VWH 0 "move_dest_operand" "")
+       (match_operand:VWH 1 "general_operand" ""))]
+  ""
+  "{
+    if (GET_CODE (operands[0]) == MEM)
+     operands[1] = force_reg (<MODE>mode, operands[1]);
+   }")
+
+(define_insn_and_split "*mov<mode>_insn"
+  [(set (match_operand:VWH 0 "move_dest_operand" "=r,r,r,m")
+       (match_operand:VWH 1 "general_operand"    "i,r,m,r"))]
+  "TARGET_PLUS_QMACW
+   && (register_operand (operands[0], <MODE>mode)
+       || register_operand (operands[1], <MODE>mode))"
+  "*
+{
+  switch (which_alternative)
+    {
+     default:
+       return \"#\";
+
+     case 1:
+       return \"vadd2 %0, %1, 0\";
+
+     case 2:
+       if (TARGET_LL64)
+         return \"ldd%U1%V1 %0,%1\";
+       return \"#\";
+
+     case 3:
+       if (TARGET_LL64)
+          return \"std%U0%V0 %1,%0\";
+        return \"#\";
+    }
+}"
+  "reload_completed"
+  [(const_int 0)]
+  {
+   arc_split_move (operands);
+   DONE;
+  }
+  [(set_attr "type" "move,move,load,store")
+   (set_attr "predicable" "yes,no,no,no")
+   (set_attr "iscompact"  "false,false,false,false")
+   ])
+
+(define_expand "movmisalign<mode>"
+ [(set (match_operand:VWH 0 "general_operand" "")
+       (match_operand:VWH 1 "general_operand" ""))]
+ ""
+{
+ if (!register_operand (operands[0], <MODE>mode)
+      && !register_operand (operands[1], <MODE>mode))
+    operands[1] = force_reg (<MODE>mode, operands[1]);
+})
+
+(define_insn "bswapv2hi2"
+  [(set (match_operand:V2HI 0 "register_operand" "=r,r")
+        (bswap:V2HI (match_operand:V2HI 1 "nonmemory_operand" "r,i")))]
+  "TARGET_V2 && TARGET_SWAP"
+  "swape %0, %1"
+  [(set_attr "length" "4,8")
+   (set_attr "type" "two_cycle_core")])
+
+;; Simple arithmetic insns
+(define_insn "add<mode>3"
+  [(set (match_operand:VCT 0 "register_operand"          "=r,r")
+       (plus:VCT (match_operand:VCT 1 "register_operand" "0,r")
+                 (match_operand:VCT 2 "register_operand" "r,r")))]
+  "TARGET_PLUS_DMPY"
+  "vadd<V_suffix>%? %0, %1, %2"
+  [(set_attr "length" "4")
+   (set_attr "type" "multi")
+   (set_attr "predicable" "yes,no")
+   (set_attr "cond" "canuse,nocond")])
+
+(define_insn "sub<mode>3"
+  [(set (match_operand:VCT 0 "register_operand"           "=r,r")
+       (minus:VCT (match_operand:VCT 1 "register_operand" "0,r")
+                  (match_operand:VCT 2 "register_operand" "r,r")))]
+  "TARGET_PLUS_DMPY"
+  "vsub<V_suffix>%? %0, %1, %2"
+  [(set_attr "length" "4")
+   (set_attr "type" "multi")
+   (set_attr "predicable" "yes,no")
+   (set_attr "cond" "canuse,nocond")])
+
+;; Combined arithmetic ops
+(define_insn "addsub<mode>3"
+  [(set (match_operand:VDV 0 "register_operand" "=r,r")
+       (vec_concat:VDV
+        (plus:<V_addsub> (vec_select:<V_addsub> (match_operand:VDV 1 "register_operand" "0,r")
+                                                (parallel [(const_int 0)]))
+                         (vec_select:<V_addsub> (match_operand:VDV 2 "register_operand" "r,r")
+                                                (parallel [(const_int 0)])))
+        (minus:<V_addsub> (vec_select:<V_addsub> (match_dup 1) (parallel [(const_int 1)]))
+                          (vec_select:<V_addsub> (match_dup 2) (parallel [(const_int 1)])))))]
+  "TARGET_PLUS_DMPY"
+  "vaddsub<V_addsub_suffix>%? %0, %1, %2"
+  [(set_attr "length" "4")
+   (set_attr "type" "multi")
+   (set_attr "predicable" "yes,no")
+   (set_attr "cond" "canuse,nocond")])
+
+(define_insn "subadd<mode>3"
+  [(set (match_operand:VDV 0 "register_operand" "=r,r")
+       (vec_concat:VDV
+        (minus:<V_addsub> (vec_select:<V_addsub> (match_operand:VDV 1 "register_operand" "0,r")
+                                                 (parallel [(const_int 0)]))
+                          (vec_select:<V_addsub> (match_operand:VDV 2 "register_operand" "r,r")
+                                                 (parallel [(const_int 0)])))
+        (plus:<V_addsub> (vec_select:<V_addsub> (match_dup 1) (parallel [(const_int 1)]))
+                         (vec_select:<V_addsub> (match_dup 2) (parallel [(const_int 1)])))))]
+  "TARGET_PLUS_DMPY"
+  "vsubadd<V_addsub_suffix>%? %0, %1, %2"
+  [(set_attr "length" "4")
+   (set_attr "type" "multi")
+   (set_attr "predicable" "yes,no")
+   (set_attr "cond" "canuse,nocond")])
+
+(define_insn "addsubv4hi3"
+  [(set (match_operand:V4HI 0 "even_register_operand" "=r,r")
+       (vec_concat:V4HI
+        (vec_concat:V2HI
+         (plus:HI (vec_select:HI (match_operand:V4HI 1 "even_register_operand" "0,r")
+                                 (parallel [(const_int 0)]))
+                  (vec_select:HI (match_operand:V4HI 2 "even_register_operand" "r,r")
+                                 (parallel [(const_int 0)])))
+         (minus:HI (vec_select:HI (match_dup 1) (parallel [(const_int 1)]))
+                   (vec_select:HI (match_dup 2) (parallel [(const_int 1)]))))
+        (vec_concat:V2HI
+         (plus:HI (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
+                  (vec_select:HI (match_dup 2) (parallel [(const_int 2)])))
+         (minus:HI (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))
+                   (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
+        ))]
+  "TARGET_PLUS_QMACW"
+  "vaddsub4h%? %0, %1, %2"
+  [(set_attr "length" "4")
+   (set_attr "type" "multi")
+   (set_attr "predicable" "yes,no")
+   (set_attr "cond" "canuse,nocond")])
+
+(define_insn "subaddv4hi3"
+  [(set (match_operand:V4HI 0 "even_register_operand" "=r,r")
+       (vec_concat:V4HI
+        (vec_concat:V2HI
+         (minus:HI (vec_select:HI (match_operand:V4HI 1 "even_register_operand" "0,r")
+                                  (parallel [(const_int 0)]))
+                   (vec_select:HI (match_operand:V4HI 2 "even_register_operand" "r,r")
+                                 (parallel [(const_int 0)])))
+         (plus:HI (vec_select:HI (match_dup 1) (parallel [(const_int 1)]))
+                  (vec_select:HI (match_dup 2) (parallel [(const_int 1)]))))
+        (vec_concat:V2HI
+         (minus:HI (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
+                   (vec_select:HI (match_dup 2) (parallel [(const_int 2)])))
+         (plus:HI (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))
+                  (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
+        ))]
+  "TARGET_PLUS_QMACW"
+  "vsubadd4h%? %0, %1, %2"
+  [(set_attr "length" "4")
+   (set_attr "type" "multi")
+   (set_attr "predicable" "yes,no")
+   (set_attr "cond" "canuse,nocond")])
+
+;; Multiplication
+(define_insn "dmpyh<V_US_suffix>"
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
+       (plus:SI
+        (mult:SI
+         (SE:SI
+          (vec_select:HI (match_operand:V2HI 1 "register_operand" "0,r")
+                         (parallel [(const_int 0)])))
+         (SE:SI
+          (vec_select:HI (match_operand:V2HI 2 "register_operand" "r,r")
+                         (parallel [(const_int 0)]))))
+        (mult:SI
+         (SE:SI (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
+         (SE:SI (vec_select:HI (match_dup 2) (parallel [(const_int 1)]))))))
+   (set (reg:DI ARCV2_ACC)
+       (zero_extend:DI
+        (plus:SI
+         (mult:SI
+          (SE:SI (vec_select:HI (match_dup 1) (parallel [(const_int 0)])))
+          (SE:SI (vec_select:HI (match_dup 2) (parallel [(const_int 0)]))))
+         (mult:SI
+          (SE:SI (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
+          (SE:SI (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))))))]
+  "TARGET_PLUS_DMPY"
+  "dmpy<V_US_suffix>%? %0, %1, %2"
+  [(set_attr "length" "4")
+   (set_attr "type" "multi")
+   (set_attr "predicable" "yes,no")
+   (set_attr "cond" "canuse,nocond")])
+
+;; We can use dmac as well here.  To be investigated which version
+;; brings more.
+(define_expand "sdot_prodv2hi"
+  [(match_operand:SI 0 "register_operand" "")
+   (match_operand:V2HI 1 "register_operand" "")
+   (match_operand:V2HI 2 "register_operand" "")
+   (match_operand:SI 3 "register_operand" "")]
+  "TARGET_PLUS_DMPY"
+{
+ rtx t = gen_reg_rtx (SImode);
+ emit_insn (gen_dmpyh (t, operands[1], operands[2]));
+ emit_insn (gen_addsi3 (operands[0], operands[3], t));
+ DONE;
+})
+
+(define_expand "udot_prodv2hi"
+  [(match_operand:SI 0 "register_operand" "")
+   (match_operand:V2HI 1 "register_operand" "")
+   (match_operand:V2HI 2 "register_operand" "")
+   (match_operand:SI 3 "register_operand" "")]
+  "TARGET_PLUS_DMPY"
+{
+ rtx t = gen_reg_rtx (SImode);
+ emit_insn (gen_dmpyhu (t, operands[1], operands[2]));
+ emit_insn (gen_addsi3 (operands[0], operands[3], t));
+ DONE;
+})
+
+(define_insn "arc_vec_<V_US>mult_lo_v4hi"
+ [(set (match_operand:V2SI 0 "even_register_operand"                     "=r,r")
+       (mult:V2SI (SE:V2SI (vec_select:V2HI
+                           (match_operand:V4HI 1 "even_register_operand" "0,r")
+                           (parallel [(const_int 0) (const_int 1)])))
+                 (SE:V2SI (vec_select:V2HI
+                           (match_operand:V4HI 2 "even_register_operand" "r,r")
+                           (parallel [(const_int 0) (const_int 1)])))))
+  (set (reg:V2SI ARCV2_ACC)
+       (mult:V2SI (SE:V2SI (vec_select:V2HI (match_dup 1)
+                                           (parallel [(const_int 0) (const_int 1)])))
+                 (SE:V2SI (vec_select:V2HI (match_dup 2)
+                                           (parallel [(const_int 0) (const_int 1)])))))
+  ]
+  "TARGET_PLUS_MACD"
+  "vmpy2h<V_US_suffix>%? %0, %1, %2"
+  [(set_attr "length" "4")
+   (set_attr "type" "multi")
+   (set_attr "predicable" "yes,no")
+   (set_attr "cond" "canuse,nocond")])
+
+(define_insn "arc_vec_<V_US>multacc_lo_v4hi"
+  [(set (reg:V2SI ARCV2_ACC)
+       (mult:V2SI (SE:V2SI (vec_select:V2HI
+                            (match_operand:V4HI 0 "even_register_operand" "r")
+                            (parallel [(const_int 0) (const_int 1)])))
+                  (SE:V2SI (vec_select:V2HI
+                            (match_operand:V4HI 1 "even_register_operand" "r")
+                            (parallel [(const_int 0) (const_int 1)])))))
+  ]
+  "TARGET_PLUS_MACD"
+  "vmpy2h<V_US_suffix>%? 0, %0, %1"
+  [(set_attr "length" "4")
+   (set_attr "type" "multi")
+   (set_attr "predicable" "no")
+   (set_attr "cond" "nocond")])
+
+(define_expand "vec_widen_<V_US>mult_lo_v4hi"
+ [(set (match_operand:V2SI 0 "even_register_operand"                 "")
+       (mult:V2SI (SE:V2SI (vec_select:V2HI
+                           (match_operand:V4HI 1 "even_register_operand" "")
+                           (parallel [(const_int 0) (const_int 1)])))
+                 (SE:V2SI (vec_select:V2HI
+                           (match_operand:V4HI 2 "even_register_operand" "")
+                           (parallel [(const_int 0) (const_int 1)])))))]
+  "TARGET_PLUS_QMACW"
+  {
+     emit_insn (gen_arc_vec_<V_US>mult_lo_v4hi (operands[0],
+                                               operands[1],
+                                               operands[2]));
+     DONE;
+  }
+)
+
+(define_insn "arc_vec_<V_US>mult_hi_v4hi"
+ [(set (match_operand:V2SI 0 "even_register_operand"                     "=r,r")
+       (mult:V2SI (SE:V2SI (vec_select:V2HI
+                           (match_operand:V4HI 1 "even_register_operand" "0,r")
+                           (parallel [(const_int 2) (const_int 3)])))
+                 (SE:V2SI (vec_select:V2HI
+                           (match_operand:V4HI 2 "even_register_operand" "r,r")
+                           (parallel [(const_int 2) (const_int 3)])))))
+  (set (reg:V2SI ARCV2_ACC)
+       (mult:V2SI (SE:V2SI (vec_select:V2HI (match_dup 1)
+                                           (parallel [(const_int 2) (const_int 3)])))
+                 (SE:V2SI (vec_select:V2HI (match_dup 2)
+                                           (parallel [(const_int 2) (const_int 3)])))))
+  ]
+  "TARGET_PLUS_QMACW"
+  "vmpy2h<V_US_suffix>%? %0, %R1, %R2"
+  [(set_attr "length" "4")
+   (set_attr "type" "multi")
+   (set_attr "predicable" "yes,no")
+   (set_attr "cond" "canuse,nocond")])
+
+(define_expand "vec_widen_<V_US>mult_hi_v4hi"
+ [(set (match_operand:V2SI 0 "even_register_operand"                               "")
+       (mult:V2SI (SE:V2SI (vec_select:V2HI
+                                    (match_operand:V4HI 1 "even_register_operand" "")
+                                    (parallel [(const_int 2) (const_int 3)])))
+                 (SE:V2SI (vec_select:V2HI
+                                    (match_operand:V4HI 2 "even_register_operand" "")
+                                    (parallel [(const_int 2) (const_int 3)])))))]
+  "TARGET_PLUS_MACD"
+  {
+     emit_insn (gen_arc_vec_<V_US>mult_hi_v4hi (operands[0],
+                                               operands[1],
+                                               operands[2]));
+     DONE;
+  }
+)
+
+(define_insn "arc_vec_<V_US>mac_hi_v4hi"
+ [(set (match_operand:V2SI 0 "even_register_operand"                     "=r,r")
+       (plus:V2SI
+       (reg:V2SI ARCV2_ACC)
+       (mult:V2SI (SE:V2SI (vec_select:V2HI
+                            (match_operand:V4HI 1 "even_register_operand" "0,r")
+                            (parallel [(const_int 2) (const_int 3)])))
+                  (SE:V2SI (vec_select:V2HI
+                            (match_operand:V4HI 2 "even_register_operand" "r,r")
+                            (parallel [(const_int 2) (const_int 3)]))))))
+  (set (reg:V2SI ARCV2_ACC)
+       (plus:V2SI
+       (reg:V2SI ARCV2_ACC)
+       (mult:V2SI (SE:V2SI (vec_select:V2HI (match_dup 1)
+                                            (parallel [(const_int 2) (const_int 3)])))
+                  (SE:V2SI (vec_select:V2HI (match_dup 2)
+                                            (parallel [(const_int 2) (const_int 3)]))))))
+  ]
+  "TARGET_PLUS_MACD"
+  "vmac2h<V_US_suffix>%? %0, %R1, %R2"
+  [(set_attr "length" "4")
+   (set_attr "type" "multi")
+   (set_attr "predicable" "yes,no")
+   (set_attr "cond" "canuse,nocond")])
+
+;; Builtins
+(define_insn "dmach"
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
+       (unspec:SI [(match_operand:V2HI 1 "register_operand" "0,r")
+                   (match_operand:V2HI 2 "register_operand" "r,r")
+                   (reg:DI ARCV2_ACC)]
+                  UNSPEC_ARC_DMACH))
+   (clobber (reg:DI ARCV2_ACC))]
+  "TARGET_PLUS_DMPY"
+  "dmach%? %0, %1, %2"
+  [(set_attr "length" "4")
+   (set_attr "type" "multi")
+   (set_attr "predicable" "yes,no")
+   (set_attr "cond" "canuse,nocond")])
+
+(define_insn "dmachu"
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
+       (unspec:SI [(match_operand:V2HI 1 "register_operand" "0,r")
+                   (match_operand:V2HI 2 "register_operand" "r,r")
+                   (reg:DI ARCV2_ACC)]
+                  UNSPEC_ARC_DMACHU))
+   (clobber (reg:DI ARCV2_ACC))]
+  "TARGET_PLUS_DMPY"
+  "dmachu%? %0, %1, %2"
+  [(set_attr "length" "4")
+   (set_attr "type" "multi")
+   (set_attr "predicable" "yes,no")
+   (set_attr "cond" "canuse,nocond")])
+
+(define_insn "dmacwh"
+  [(set (match_operand:DI 0 "even_register_operand" "=r,r")
+       (unspec:DI [(match_operand:V2SI 1 "even_register_operand" "0,r")
+                   (match_operand:V2HI 2 "register_operand"      "r,r")
+                   (reg:DI ARCV2_ACC)]
+                  UNSPEC_ARC_DMACWH))
+   (clobber (reg:DI ARCV2_ACC))]
+  "TARGET_PLUS_QMACW"
+  "dmacwh%? %0, %1, %2"
+  [(set_attr "length" "4")
+   (set_attr "type" "multi")
+   (set_attr "predicable" "yes,no")
+   (set_attr "cond" "canuse,nocond")])
+
+(define_insn "dmacwhu"
+  [(set (match_operand:DI 0 "register_operand" "=r,r")
+       (unspec:DI [(match_operand:V2SI 1 "even_register_operand" "0,r")
+                   (match_operand:V2HI 2 "register_operand"      "r,r")
+                   (reg:DI ARCV2_ACC)]
+                  UNSPEC_ARC_DMACWHU))
+   (clobber (reg:DI ARCV2_ACC))]
+  "TARGET_PLUS_QMACW"
+  "dmacwhu%? %0, %1, %2"
+  [(set_attr "length" "4")
+   (set_attr "type" "multi")
+   (set_attr "predicable" "yes,no")
+   (set_attr "cond" "canuse,nocond")])
+
+(define_insn "vmac2h"
+  [(set (match_operand:V2SI 0 "even_register_operand" "=r,r")
+       (unspec:V2SI [(match_operand:V2HI 1 "register_operand" "0,r")
+                     (match_operand:V2HI 2 "register_operand" "r,r")
+                     (reg:DI ARCV2_ACC)]
+                    UNSPEC_ARC_VMAC2H))
+   (clobber (reg:DI ARCV2_ACC))]
+  "TARGET_PLUS_MACD"
+  "vmac2h%? %0, %1, %2"
+  [(set_attr "length" "4")
+   (set_attr "type" "multi")
+   (set_attr "predicable" "yes,no")
+   (set_attr "cond" "canuse,nocond")])
+
+(define_insn "vmac2hu"
+  [(set (match_operand:V2SI 0 "even_register_operand" "=r,r")
+       (unspec:V2SI [(match_operand:V2HI 1 "register_operand" "0,r")
+                     (match_operand:V2HI 2 "register_operand" "r,r")
+                     (reg:DI ARCV2_ACC)]
+                  UNSPEC_ARC_VMAC2HU))
+   (clobber (reg:DI ARCV2_ACC))]
+  "TARGET_PLUS_MACD"
+  "vmac2hu%? %0, %1, %2"
+  [(set_attr "length" "4")
+   (set_attr "type" "multi")
+   (set_attr "predicable" "yes,no")
+   (set_attr "cond" "canuse,nocond")])
+
+(define_insn "vmpy2h"
+  [(set (match_operand:V2SI 0 "even_register_operand" "=r,r")
+       (unspec:V2SI [(match_operand:V2HI 1 "register_operand" "0,r")
+                     (match_operand:V2HI 2 "register_operand" "r,r")]
+                    UNSPEC_ARC_VMPY2H))
+   (clobber (reg:DI ARCV2_ACC))]
+  "TARGET_PLUS_MACD"
+  "vmpy2h%? %0, %1, %2"
+  [(set_attr "length" "4")
+   (set_attr "type" "multi")
+   (set_attr "predicable" "yes,no")
+   (set_attr "cond" "canuse,nocond")])
+
+(define_insn "vmpy2hu"
+  [(set (match_operand:V2SI 0 "even_register_operand" "=r,r")
+       (unspec:V2SI [(match_operand:V2HI 1 "register_operand" "0,r")
+                     (match_operand:V2HI 2 "register_operand" "r,r")]
+                    UNSPEC_ARC_VMPY2HU))
+   (clobber (reg:DI ARCV2_ACC))]
+  "TARGET_PLUS_MACD"
+  "vmpy2hu%? %0, %1, %2"
+  [(set_attr "length" "4")
+   (set_attr "type" "multi")
+   (set_attr "predicable" "yes,no")
+   (set_attr "cond" "canuse,nocond")])
+
+(define_insn "qmach"
+  [(set (match_operand:DI 0 "even_register_operand" "=r,r")
+       (unspec:DI [(match_operand:V4HI 1 "even_register_operand" "0,r")
+                   (match_operand:V4HI 2 "even_register_operand" "r,r")
+                   (reg:DI ARCV2_ACC)]
+                    UNSPEC_ARC_QMACH))
+   (clobber (reg:DI ARCV2_ACC))]
+  "TARGET_PLUS_QMACW"
+  "qmach%? %0, %1, %2"
+  [(set_attr "length" "4")
+   (set_attr "type" "multi")
+   (set_attr "predicable" "yes,no")
+   (set_attr "cond" "canuse,nocond")])
+
+(define_insn "qmachu"
+  [(set (match_operand:DI 0 "even_register_operand" "=r,r")
+       (unspec:DI [(match_operand:V4HI 1 "even_register_operand" "0,r")
+                   (match_operand:V4HI 2 "even_register_operand" "r,r")
+                   (reg:DI ARCV2_ACC)]
+                  UNSPEC_ARC_QMACHU))
+   (clobber (reg:DI ARCV2_ACC))]
+  "TARGET_PLUS_QMACW"
+  "qmachu%? %0, %1, %2"
+  [(set_attr "length" "4")
+   (set_attr "type" "multi")
+   (set_attr "predicable" "yes,no")
+   (set_attr "cond" "canuse,nocond")])
+
+(define_insn "qmpyh"
+  [(set (match_operand:DI 0 "even_register_operand" "=r,r")
+       (unspec:DI [(match_operand:V4HI 1 "even_register_operand" "0,r")
+                   (match_operand:V4HI 2 "even_register_operand" "r,r")]
+                    UNSPEC_ARC_QMPYH))
+   (clobber (reg:DI ARCV2_ACC))]
+  "TARGET_PLUS_QMACW"
+  "qmpyh%? %0, %1, %2"
+  [(set_attr "length" "4")
+   (set_attr "type" "multi")
+   (set_attr "predicable" "yes,no")
+   (set_attr "cond" "canuse,nocond")])
+
+(define_insn "qmpyhu"
+  [(set (match_operand:DI 0 "even_register_operand" "=r,r")
+       (unspec:DI [(match_operand:V4HI 1 "even_register_operand" "0,r")
+                   (match_operand:V4HI 2 "even_register_operand" "r,r")]
+                  UNSPEC_ARC_QMPYHU))
+   (clobber (reg:DI ARCV2_ACC))]
+  "TARGET_PLUS_QMACW"
+  "qmpyhu%? %0, %1, %2"
+  [(set_attr "length" "4")
+   (set_attr "type" "multi")
+   (set_attr "predicable" "yes,no")
+   (set_attr "cond" "canuse,nocond")])
diff --git a/gcc/testsuite/gcc.target/arc/builtin_simdarc.c b/gcc/testsuite/gcc.target/arc/builtin_simdarc.c
new file mode 100644 (file)
index 0000000..68aae40
--- /dev/null
@@ -0,0 +1,38 @@
+/* { dg-do compile } */
+/* { dg-options "-mcpu=archs -O2 -Werror-implicit-function-declaration -mmpy-option=9" } */
+
+#define STEST(name, rettype, op1type, op2type) \
+  rettype test_ ## name                                \
+  (op1type a, op2type b)                       \
+  {                                            \
+    return __builtin_arc_ ## name (a, b);      \
+  }
+
+typedef short v2hi __attribute__ ((vector_size (4)));
+typedef short v4hi __attribute__ ((vector_size (8)));
+typedef int   v2si __attribute__ ((vector_size (8)));
+
+STEST (qmach,  long long, v4hi, v4hi)
+STEST (qmachu, long long, v4hi, v4hi)
+STEST (qmpyh,  long long, v4hi, v4hi)
+STEST (qmpyhu, long long, v4hi, v4hi)
+
+STEST (dmach,  int, v2hi, v2hi)
+STEST (dmachu, int, v2hi, v2hi)
+STEST (dmpyh,  int, v2hi, v2hi)
+STEST (dmpyhu, int, v2hi, v2hi)
+
+STEST (dmacwh,  long, v2si, v2hi)
+STEST (dmacwhu, long, v2si, v2hi)
+
+STEST (vmac2h,  v2si, v2hi, v2hi)
+STEST (vmac2hu, v2si, v2hi, v2hi)
+STEST (vmpy2h,  v2si, v2hi, v2hi)
+STEST (vmpy2hu, v2si, v2hi, v2hi)
+
+STEST (vaddsub2h, v2hi, v2hi, v2hi)
+STEST (vsubadd2h, v2hi, v2hi, v2hi)
+STEST (vaddsub,   v2si, v2si, v2si)
+STEST (vsubadd,   v2si, v2si, v2si)
+STEST (vaddsub4h, v4hi, v4hi, v4hi)
+STEST (vsubadd4h, v4hi, v4hi, v4hi)