"vml<bhfgq><w>\t%v0,%v1,%v2"
[(set_attr "op_type" "VRR")])
+; vdf, vdg, vdq
+(define_insn "div<mode>3"
+ [(set (match_operand:VI_HW_SDT 0 "register_operand" "=v")
+ (div:VI_HW_SDT (match_operand:VI_HW_SDT 1 "register_operand" "v")
+ (match_operand:VI_HW_SDT 2 "register_operand" "v")))]
+ "TARGET_VXE3"
+ "vd<bhfgq>\t%v0,%v1,%v2,0"
+ [(set_attr "op_type" "VRR")])
+
+; vdlf, vdlg, vdlq
+(define_insn "udiv<mode>3"
+ [(set (match_operand:VI_HW_SDT 0 "register_operand" "=v")
+ (udiv:VI_HW_SDT (match_operand:VI_HW_SDT 1 "register_operand" "v")
+ (match_operand:VI_HW_SDT 2 "register_operand" "v")))]
+ "TARGET_VXE3"
+ "vdl<bhfgq>\t%v0,%v1,%v2,0"
+ [(set_attr "op_type" "VRR")])
+
+; vrf, vrg, vrq
+(define_insn "mod<mode>3"
+ [(set (match_operand:VI_HW_SDT 0 "register_operand" "=v")
+ (mod:VI_HW_SDT (match_operand:VI_HW_SDT 1 "register_operand" "v")
+ (match_operand:VI_HW_SDT 2 "register_operand" "v")))]
+ "TARGET_VXE3"
+ "vr<bhfgq>\t%v0,%v1,%v2,0"
+ [(set_attr "op_type" "VRR")])
+
+; vrlf, vrlg, vrlq
+(define_insn "umod<mode>3"
+ [(set (match_operand:VI_HW_SDT 0 "register_operand" "=v")
+ (umod:VI_HW_SDT (match_operand:VI_HW_SDT 1 "register_operand" "v")
+ (match_operand:VI_HW_SDT 2 "register_operand" "v")))]
+ "TARGET_VXE3"
+ "vrl<bhfgq>\t%v0,%v1,%v2,0"
+ [(set_attr "op_type" "VRR")])
+
; vlcb, vlch, vlcf, vlcg
(define_insn "neg<mode>2"
[(set (match_operand:VI 0 "register_operand" "=v")
--- /dev/null
+/* { dg-do compile } */
+/* { dg-require-effective-target int128 } */
+/* { dg-final { scan-assembler {\tvdf\t%v[0-9]+,%v[0-9]+,%v[0-9]+,0} } } */
+/* { dg-final { scan-assembler {\tvdg\t%v[0-9]+,%v[0-9]+,%v[0-9]+,0} } } */
+/* { dg-final { scan-assembler {\tvdq\t%v[0-9]+,%v[0-9]+,%v[0-9]+,0} } } */
+
+typedef int __attribute__ ((vector_size (16))) V4SI;
+typedef long long __attribute__ ((vector_size (16))) V2DI;
+typedef __int128 __attribute__ ((vector_size (16))) V1TI;
+
+V4SI
+vdf (V4SI x, V4SI y)
+{
+ return x / y;
+}
+
+V2DI
+vdg (V2DI x, V2DI y)
+{
+ return x / y;
+}
+
+V1TI
+vdq (V1TI x, V1TI y)
+{
+ return x / y;
+}
--- /dev/null
+/* { dg-do compile } */
+/* { dg-require-effective-target int128 } */
+/* { dg-final { scan-assembler {\tvdf\t%v[0-9]+,%v[0-9]+,%v[0-9]+,0} } } */
+/* { dg-final { scan-assembler {\tvdg\t%v[0-9]+,%v[0-9]+,%v[0-9]+,0} } } */
+/* { dg-final { scan-assembler {\tvdq\t%v[0-9]+,%v[0-9]+,%v[0-9]+,0} } } */
+
+typedef int int32;
+typedef long long int64;
+typedef __int128 int128;
+
+#define vd(T) \
+void \
+vd_##T (T *res, T *x, T *y) \
+{ \
+ for (int i = 0; i < 128; ++i) \
+ res[i] = x[i] / *y; \
+}
+
+vd(int32)
+vd(int64)
+vd(int128)
--- /dev/null
+/* { dg-do compile } */
+/* { dg-require-effective-target int128 } */
+/* { dg-final { scan-assembler {\tvdlf\t%v[0-9]+,%v[0-9]+,%v[0-9]+,0} } } */
+/* { dg-final { scan-assembler {\tvdlg\t%v[0-9]+,%v[0-9]+,%v[0-9]+,0} } } */
+/* { dg-final { scan-assembler {\tvdlq\t%v[0-9]+,%v[0-9]+,%v[0-9]+,0} } } */
+
+typedef unsigned int __attribute__ ((vector_size (16))) UV4SI;
+typedef unsigned long long __attribute__ ((vector_size (16))) UV2DI;
+typedef unsigned __int128 __attribute__ ((vector_size (16))) UV1TI;
+
+UV4SI
+vdlf (UV4SI x, UV4SI y)
+{
+ return x / y;
+}
+
+UV2DI
+vdlg (UV2DI x, UV2DI y)
+{
+ return x / y;
+}
+
+UV1TI
+vdlq (UV1TI x, UV1TI y)
+{
+ return x / y;
+}
--- /dev/null
+/* { dg-do compile } */
+/* { dg-require-effective-target int128 } */
+/* { dg-final { scan-assembler {\tvdlf\t%v[0-9]+,%v[0-9]+,%v[0-9]+,0} } } */
+/* { dg-final { scan-assembler {\tvdlg\t%v[0-9]+,%v[0-9]+,%v[0-9]+,0} } } */
+/* { dg-final { scan-assembler {\tvdlq\t%v[0-9]+,%v[0-9]+,%v[0-9]+,0} } } */
+
+typedef unsigned int uint32;
+typedef unsigned long long uint64;
+typedef unsigned __int128 uint128;
+
+#define vdl(T) \
+void \
+vdl_##T (T *res, T *x, T *y) \
+{ \
+ for (int i = 0; i < 128; ++i) \
+ res[i] = x[i] / *y; \
+}
+
+vdl(uint32)
+vdl(uint64)
+vdl(uint128)
--- /dev/null
+/* { dg-do compile } */
+/* { dg-require-effective-target int128 } */
+/* { dg-final { scan-assembler {\tvrf\t%v[0-9]+,%v[0-9]+,%v[0-9]+,0} } } */
+/* { dg-final { scan-assembler {\tvrg\t%v[0-9]+,%v[0-9]+,%v[0-9]+,0} } } */
+/* { dg-final { scan-assembler {\tvrq\t%v[0-9]+,%v[0-9]+,%v[0-9]+,0} } } */
+
+typedef int __attribute__ ((vector_size (16))) V4SI;
+typedef long long __attribute__ ((vector_size (16))) V2DI;
+typedef __int128 __attribute__ ((vector_size (16))) V1TI;
+
+V4SI
+vrf (V4SI x, V4SI y)
+{
+ return x % y;
+}
+
+V2DI
+vrg (V2DI x, V2DI y)
+{
+ return x % y;
+}
+
+V1TI
+vrq (V1TI x, V1TI y)
+{
+ return x % y;
+}
--- /dev/null
+/* { dg-do compile } */
+/* { dg-require-effective-target int128 } */
+/* { dg-final { scan-assembler {\tvrf\t%v[0-9]+,%v[0-9]+,%v[0-9]+,0} } } */
+/* { dg-final { scan-assembler {\tvrg\t%v[0-9]+,%v[0-9]+,%v[0-9]+,0} } } */
+/* { dg-final { scan-assembler {\tvrq\t%v[0-9]+,%v[0-9]+,%v[0-9]+,0} } } */
+
+typedef int int32;
+typedef long long int64;
+typedef __int128 int128;
+
+#define vr(T) \
+void \
+vr_##T (T *res, T *x, T *y) \
+{ \
+ for (int i = 0; i < 128; ++i) \
+ res[i] = x[i] % *y; \
+}
+
+vr(int32)
+vr(int64)
+vr(int128)
--- /dev/null
+/* { dg-do compile } */
+/* { dg-require-effective-target int128 } */
+/* { dg-final { scan-assembler {\tvrlf\t%v[0-9]+,%v[0-9]+,%v[0-9]+,0} } } */
+/* { dg-final { scan-assembler {\tvrlg\t%v[0-9]+,%v[0-9]+,%v[0-9]+,0} } } */
+/* { dg-final { scan-assembler {\tvrlq\t%v[0-9]+,%v[0-9]+,%v[0-9]+,0} } } */
+
+typedef unsigned int __attribute__ ((vector_size (16))) UV4SI;
+typedef unsigned long long __attribute__ ((vector_size (16))) UV2DI;
+typedef unsigned __int128 __attribute__ ((vector_size (16))) UV1TI;
+
+UV4SI
+vrlf (UV4SI x, UV4SI y)
+{
+ return x % y;
+}
+
+UV2DI
+vrlg (UV2DI x, UV2DI y)
+{
+ return x % y;
+}
+
+UV1TI
+vrlq (UV1TI x, UV1TI y)
+{
+ return x % y;
+}
--- /dev/null
+/* { dg-do compile } */
+/* { dg-require-effective-target int128 } */
+/* { dg-final { scan-assembler {\tvrlf\t%v[0-9]+,%v[0-9]+,%v[0-9]+,0} } } */
+/* { dg-final { scan-assembler {\tvrlg\t%v[0-9]+,%v[0-9]+,%v[0-9]+,0} } } */
+/* { dg-final { scan-assembler {\tvrlq\t%v[0-9]+,%v[0-9]+,%v[0-9]+,0} } } */
+
+typedef unsigned int uint32;
+typedef unsigned long long uint64;
+typedef unsigned __int128 uint128;
+
+#define vrl(T) \
+void \
+vrl_##T (T *res, T *x, T *y) \
+{ \
+ for (int i = 0; i < 128; ++i) \
+ res[i] = x[i] % *y; \
+}
+
+vrl(uint32)
+vrl(uint64)
+vrl(uint128)