For previous architectures emulate operation max/min.
gcc/ChangeLog:
* config/s390/s390-builtins.def: Add 128-bit variants and remove
bool variants.
* config/s390/s390-builtin-types.def: Update accordinly.
* config/s390/s390.md: Emulate min/max for GPR.
* config/s390/vector.md: Add min/max patterns and emulate in
case of no VXE3.
gcc/testsuite/ChangeLog:
* gcc.target/s390/vector/vec-max-emu.c: New test.
* gcc.target/s390/vector/vec-min-emu.c: New test.
DEF_FN_TYPE_2 (BT_FN_V2DF_V2DF_UCHAR, BT_V2DF, BT_V2DF, BT_UCHAR)
DEF_FN_TYPE_2 (BT_FN_V2DF_V2DF_V2DF, BT_V2DF, BT_V2DF, BT_V2DF)
DEF_FN_TYPE_2 (BT_FN_V2DF_V2DI_INT, BT_V2DF, BT_V2DI, BT_INT)
-DEF_FN_TYPE_2 (BT_FN_V2DI_BV2DI_V2DI, BT_V2DI, BT_BV2DI, BT_V2DI)
DEF_FN_TYPE_2 (BT_FN_V2DI_UV2DI_UV2DI, BT_V2DI, BT_UV2DI, BT_UV2DI)
DEF_FN_TYPE_2 (BT_FN_V2DI_V2DF_INT, BT_V2DI, BT_V2DF, BT_INT)
DEF_FN_TYPE_2 (BT_FN_V2DI_V2DF_V2DF, BT_V2DI, BT_V2DF, BT_V2DF)
DEF_FN_TYPE_2 (BT_FN_V4SF_UV8HI_UINT, BT_V4SF, BT_UV8HI, BT_UINT)
DEF_FN_TYPE_2 (BT_FN_V4SF_V4SF_UCHAR, BT_V4SF, BT_V4SF, BT_UCHAR)
DEF_FN_TYPE_2 (BT_FN_V4SF_V4SF_V4SF, BT_V4SF, BT_V4SF, BT_V4SF)
-DEF_FN_TYPE_2 (BT_FN_V4SI_BV4SI_V4SI, BT_V4SI, BT_BV4SI, BT_V4SI)
DEF_FN_TYPE_2 (BT_FN_V4SI_INT_VOIDCONSTPTR, BT_V4SI, BT_INT, BT_VOIDCONSTPTR)
DEF_FN_TYPE_2 (BT_FN_V4SI_UV4SI_UV4SI, BT_V4SI, BT_UV4SI, BT_UV4SI)
DEF_FN_TYPE_2 (BT_FN_V4SI_V2DI_V2DI, BT_V4SI, BT_V2DI, BT_V2DI)
DEF_FN_TYPE_2 (BT_FN_V4SI_V4SF_V4SF, BT_V4SI, BT_V4SF, BT_V4SF)
DEF_FN_TYPE_2 (BT_FN_V4SI_V4SI_V4SI, BT_V4SI, BT_V4SI, BT_V4SI)
DEF_FN_TYPE_2 (BT_FN_V4SI_V8HI_V8HI, BT_V4SI, BT_V8HI, BT_V8HI)
-DEF_FN_TYPE_2 (BT_FN_V8HI_BV8HI_V8HI, BT_V8HI, BT_BV8HI, BT_V8HI)
DEF_FN_TYPE_2 (BT_FN_V8HI_UV8HI_UV8HI, BT_V8HI, BT_UV8HI, BT_UV8HI)
DEF_FN_TYPE_2 (BT_FN_V8HI_V16QI_V16QI, BT_V8HI, BT_V16QI, BT_V16QI)
DEF_FN_TYPE_2 (BT_FN_V8HI_V4SI_V4SI, BT_V8HI, BT_V4SI, BT_V4SI)
OB_DEF_VAR (s390_vec_max_u64_a, s390_vmxlg, B_DEP, 0, BT_OV_UV2DI_BV2DI_UV2DI)
OB_DEF_VAR (s390_vec_max_u64_b, s390_vmxlg, 0, 0, BT_OV_UV2DI_UV2DI_UV2DI)
OB_DEF_VAR (s390_vec_max_u64_c, s390_vmxlg, B_DEP, 0, BT_OV_UV2DI_UV2DI_BV2DI)
+OB_DEF_VAR (s390_vec_max_s128, s390_vmxq, 0, 0, BT_OV_V1TI_V1TI_V1TI) /* NOGEN */
+OB_DEF_VAR (s390_vec_max_u128, s390_vmxlq, 0, 0, BT_OV_UV1TI_UV1TI_UV1TI) /* NOGEN */
OB_DEF_VAR (s390_vec_max_flt, s390_vfmaxsb_4, B_VXE, 0, BT_OV_V4SF_V4SF_V4SF)
OB_DEF_VAR (s390_vec_max_dbl, s390_vfmaxdb_4, 0, 0, BT_OV_V2DF_V2DF_V2DF)
B_DEF (s390_vmxb, smaxv16qi3, 0, B_VX, 0, BT_FN_V16QI_BV16QI_V16QI)
B_DEF (s390_vmxlb, umaxv16qi3, 0, B_VX, 0, BT_FN_UV16QI_UV16QI_UV16QI)
-B_DEF (s390_vmxh, smaxv8hi3, 0, B_VX, 0, BT_FN_V8HI_BV8HI_V8HI)
+B_DEF (s390_vmxh, smaxv8hi3, 0, B_VX, 0, BT_FN_V8HI_V8HI_V8HI)
B_DEF (s390_vmxlh, umaxv8hi3, 0, B_VX, 0, BT_FN_UV8HI_UV8HI_UV8HI)
-B_DEF (s390_vmxf, smaxv4si3, 0, B_VX, 0, BT_FN_V4SI_BV4SI_V4SI)
+B_DEF (s390_vmxf, smaxv4si3, 0, B_VX, 0, BT_FN_V4SI_V4SI_V4SI)
B_DEF (s390_vmxlf, umaxv4si3, 0, B_VX, 0, BT_FN_UV4SI_UV4SI_UV4SI)
-B_DEF (s390_vmxg, smaxv2di3, 0, B_VX, 0, BT_FN_V2DI_BV2DI_V2DI)
+B_DEF (s390_vmxg, smaxv2di3, 0, B_VX, 0, BT_FN_V2DI_V2DI_V2DI)
B_DEF (s390_vmxlg, umaxv2di3, 0, B_VX, 0, BT_FN_UV2DI_UV2DI_UV2DI)
+B_DEF (s390_vmxq, smaxti3, 0, B_VX, 0, BT_FN_INT128_INT128_INT128)
+B_DEF (s390_vmxlq, umaxti3, 0, B_VX, 0, BT_FN_UINT128_UINT128_UINT128)
B_DEF (s390_vfmaxsb, vfmaxv4sf, 0, B_VXE, O3_U4, BT_FN_V4SF_V4SF_V4SF_INT)
B_DEF (s390_vfmaxdb, vfmaxv2df, 0, B_VXE, O3_U4, BT_FN_V2DF_V2DF_V2DF_INT)
B_DEF (s390_vfmaxsb_4, smaxv4sf3, 0, B_INT | B_VXE, 0, BT_FN_V4SF_V4SF_V4SF)
OB_DEF_VAR (s390_vec_min_u64_a, s390_vmnlg, B_DEP, 0, BT_OV_UV2DI_BV2DI_UV2DI)
OB_DEF_VAR (s390_vec_min_u64_b, s390_vmnlg, 0, 0, BT_OV_UV2DI_UV2DI_UV2DI)
OB_DEF_VAR (s390_vec_min_u64_c, s390_vmnlg, B_DEP, 0, BT_OV_UV2DI_UV2DI_BV2DI)
+OB_DEF_VAR (s390_vec_min_s128, s390_vmnq, 0, 0, BT_OV_V1TI_V1TI_V1TI) /* NOGEN */
+OB_DEF_VAR (s390_vec_min_u128, s390_vmnlq, 0, 0, BT_OV_UV1TI_UV1TI_UV1TI) /* NOGEN */
OB_DEF_VAR (s390_vec_min_flt, s390_vfminsb_4, B_VXE, 0, BT_OV_V4SF_V4SF_V4SF)
OB_DEF_VAR (s390_vec_min_dbl, s390_vfmindb_4, 0, 0, BT_OV_V2DF_V2DF_V2DF)
B_DEF (s390_vmnb, sminv16qi3, 0, B_VX, 0, BT_FN_V16QI_BV16QI_V16QI)
B_DEF (s390_vmnlb, uminv16qi3, 0, B_VX, 0, BT_FN_UV16QI_UV16QI_UV16QI)
-B_DEF (s390_vmnh, sminv8hi3, 0, B_VX, 0, BT_FN_V8HI_BV8HI_V8HI)
+B_DEF (s390_vmnh, sminv8hi3, 0, B_VX, 0, BT_FN_V8HI_V8HI_V8HI)
B_DEF (s390_vmnlh, uminv8hi3, 0, B_VX, 0, BT_FN_UV8HI_UV8HI_UV8HI)
-B_DEF (s390_vmnf, sminv4si3, 0, B_VX, 0, BT_FN_V4SI_BV4SI_V4SI)
+B_DEF (s390_vmnf, sminv4si3, 0, B_VX, 0, BT_FN_V4SI_V4SI_V4SI)
B_DEF (s390_vmnlf, uminv4si3, 0, B_VX, 0, BT_FN_UV4SI_UV4SI_UV4SI)
-B_DEF (s390_vmng, sminv2di3, 0, B_VX, 0, BT_FN_V2DI_BV2DI_V2DI)
+B_DEF (s390_vmng, sminv2di3, 0, B_VX, 0, BT_FN_V2DI_V2DI_V2DI)
B_DEF (s390_vmnlg, uminv2di3, 0, B_VX, 0, BT_FN_UV2DI_UV2DI_UV2DI)
+B_DEF (s390_vmnq, sminti3, 0, B_VX, 0, BT_FN_INT128_INT128_INT128)
+B_DEF (s390_vmnlq, uminti3, 0, B_VX, 0, BT_FN_UINT128_UINT128_UINT128)
B_DEF (s390_vfminsb, vfminv4sf, 0, B_VXE, O3_U4, BT_FN_V4SF_V4SF_V4SF_INT)
B_DEF (s390_vfmindb, vfminv2df, 0, B_VXE, O3_U4, BT_FN_V2DF_V2DF_V2DF_INT)
B_DEF (s390_vfminsb_4, sminv4sf3, 0, B_INT | B_VXE, 0, BT_FN_V4SF_V4SF_V4SF) /* vfminsb */
(set_attr "enabled" "*,<DFDI>")])
+;;
+;; Emulate smin, smax, umin, umax
+;;
+
+; We have to manually emulate min/max for SI/DI modes since if they are not
+; available expand tries to find an implementation for a wider mode and ends
+; up with a vector implementation for TI mode.
+
+(define_code_iterator MINMAXOP [lt gt ltu gtu])
+(define_code_attr minmaxcc [(lt "CCS") (gt "CCS") (ltu "CCU") (gtu "CCU")])
+(define_code_attr minmaxname [(lt "smin") (gt "smax") (ltu "umin") (gtu "umax")])
+
+(define_expand "<minmaxname><mode>3"
+ [(set (reg:<minmaxcc> CC_REGNUM)
+ (compare:<minmaxcc> (match_operand:GPR 1 "loc_operand")
+ (match_operand:GPR 2 "loc_operand")))
+ (set (match_operand:GPR 0 "nonimmediate_operand")
+ (if_then_else:GPR (MINMAXOP (reg:<minmaxcc> CC_REGNUM) (const_int 0))
+ (match_dup 1)
+ (match_dup 2)))]
+ "TARGET_Z196")
+
+
;;
;;- Negated absolute value instructions
;;
operands[3] = gen_reg_rtx(V16QImode);
})
-; vmnb, vmnh, vmnf, vmng
-(define_insn "smin<mode>3"
- [(set (match_operand:VI 0 "register_operand" "=v")
- (smin:VI (match_operand:VI 1 "register_operand" "v")
- (match_operand:VI 2 "register_operand" "v")))]
+(define_expand "smin<mode>3"
+ [(set (match_operand:VIT 0 "register_operand" "=v")
+ (smin:VIT (match_operand:VIT 1 "register_operand" "v")
+ (match_operand:VIT 2 "register_operand" "v")))]
+ "TARGET_VX"
+{
+ // Emulate via vec_sel (op1, op2, op2 < op1)
+ if ((<MODE>mode == V1TImode || <MODE>mode == TImode) && !TARGET_VXE3)
+ {
+ rtx lt = gen_reg_rtx (<MODE>mode);
+ s390_expand_vec_compare (lt, LT, operands[2], operands[1]);
+ emit_insn (gen_vec_sel0<mode> (operands[0], operands[1], operands[2], lt, GEN_INT (0)));
+ DONE;
+ }
+})
+
+; vmnb, vmnh, vmnf, vmng, vmnq
+(define_insn "*smin<mode>3"
+ [(set (match_operand:VIT_VXE3 0 "register_operand" "=v")
+ (smin:VIT_VXE3 (match_operand:VIT_VXE3 1 "register_operand" "v")
+ (match_operand:VIT_VXE3 2 "register_operand" "v")))]
"TARGET_VX"
"vmn<bhfgq>\t%v0,%v1,%v2"
[(set_attr "op_type" "VRR")])
-; vmxb, vmxh, vmxf, vmxg
-(define_insn "smax<mode>3"
- [(set (match_operand:VI 0 "register_operand" "=v")
- (smax:VI (match_operand:VI 1 "register_operand" "v")
- (match_operand:VI 2 "register_operand" "v")))]
+(define_expand "smax<mode>3"
+ [(set (match_operand:VIT 0 "register_operand" "=v")
+ (smax:VIT (match_operand:VIT 1 "register_operand" "v")
+ (match_operand:VIT 2 "register_operand" "v")))]
+ "TARGET_VX"
+{
+ // Emulate via vec_sel (op1, op2, op1 < op2)
+ if ((<MODE>mode == V1TImode || <MODE>mode == TImode) && !TARGET_VXE3)
+ {
+ rtx lt = gen_reg_rtx (<MODE>mode);
+ s390_expand_vec_compare (lt, LT, operands[1], operands[2]);
+ emit_insn (gen_vec_sel0<mode> (operands[0], operands[1], operands[2], lt, GEN_INT (0)));
+ DONE;
+ }
+})
+
+; vmxb, vmxh, vmxf, vmxg, vmxq
+(define_insn "*smax<mode>3"
+ [(set (match_operand:VIT_VXE3 0 "register_operand" "=v")
+ (smax:VIT_VXE3 (match_operand:VIT_VXE3 1 "register_operand" "v")
+ (match_operand:VIT_VXE3 2 "register_operand" "v")))]
"TARGET_VX"
"vmx<bhfgq>\t%v0,%v1,%v2"
[(set_attr "op_type" "VRR")])
-; vmnlb, vmnlh, vmnlf, vmnlg
-(define_insn "umin<mode>3"
- [(set (match_operand:VI 0 "register_operand" "=v")
- (umin:VI (match_operand:VI 1 "register_operand" "v")
- (match_operand:VI 2 "register_operand" "v")))]
+(define_expand "umin<mode>3"
+ [(set (match_operand:VIT 0 "register_operand" "=v")
+ (umin:VIT (match_operand:VIT 1 "register_operand" "v")
+ (match_operand:VIT 2 "register_operand" "v")))]
+ "TARGET_VX"
+{
+ // Emulate via vec_sel (op1, op2, op2 < op1)
+ if ((<MODE>mode == V1TImode || <MODE>mode == TImode) && !TARGET_VXE3)
+ {
+ rtx ltu = gen_reg_rtx (<MODE>mode);
+ s390_expand_vec_compare (ltu, LTU, operands[2], operands[1]);
+ emit_insn (gen_vec_sel0<mode> (operands[0], operands[1], operands[2], ltu, GEN_INT (0)));
+ DONE;
+ }
+})
+
+; vmnlb, vmnlh, vmnlf, vmnlg, vmnlq
+(define_insn "*umin<mode>3"
+ [(set (match_operand:VIT_VXE3 0 "register_operand" "=v")
+ (umin:VIT_VXE3 (match_operand:VIT_VXE3 1 "register_operand" "v")
+ (match_operand:VIT_VXE3 2 "register_operand" "v")))]
"TARGET_VX"
"vmnl<bhfgq>\t%v0,%v1,%v2"
[(set_attr "op_type" "VRR")])
-; vmxlb, vmxlh, vmxlf, vmxlg
-(define_insn "umax<mode>3"
- [(set (match_operand:VI 0 "register_operand" "=v")
- (umax:VI (match_operand:VI 1 "register_operand" "v")
- (match_operand:VI 2 "register_operand" "v")))]
+(define_expand "umax<mode>3"
+ [(set (match_operand:VIT 0 "register_operand" "=v")
+ (umax:VIT (match_operand:VIT 1 "register_operand" "v")
+ (match_operand:VIT 2 "register_operand" "v")))]
+ "TARGET_VX"
+{
+ // Emulate via vec_sel (op1, op2, op1 < op2)
+ if ((<MODE>mode == V1TImode || <MODE>mode == TImode) && !TARGET_VXE3)
+ {
+ rtx ltu = gen_reg_rtx (<MODE>mode);
+ s390_expand_vec_compare (ltu, LTU, operands[1], operands[2]);
+ emit_insn (gen_vec_sel0<mode> (operands[0], operands[1], operands[2], ltu, GEN_INT (0)));
+ DONE;
+ }
+})
+
+; vmxlb, vmxlh, vmxlf, vmxlg, vmxlq
+(define_insn "*umax<mode>3"
+ [(set (match_operand:VIT_VXE3 0 "register_operand" "=v")
+ (umax:VIT_VXE3 (match_operand:VIT_VXE3 1 "register_operand" "v")
+ (match_operand:VIT_VXE3 2 "register_operand" "v")))]
"TARGET_VX"
"vmxl<bhfgq>\t%v0,%v1,%v2"
[(set_attr "op_type" "VRR")])
--- /dev/null
+/* { dg-do run } */
+/* { dg-options "-O3 -mzarch -march=z13 -save-temps" } */
+/* { dg-require-effective-target int128 } */
+/* { dg-final { check-function-bodies "**" "" "" } } */
+/* { dg-final { scan-assembler-not {\tvmxq\t} } } */
+
+#include <assert.h>
+
+typedef __attribute__ ((vector_size (16))) signed long long v2di;
+typedef __attribute__ ((vector_size (16))) signed __int128 v1ti;
+
+/*
+** my_max:
+** vchlg %v[0-9]+,%v[0-9]+,%v[0-9]+
+** vceqg %v[0-9]+,%v[0-9]+,%v[0-9]+
+** vpdi %v[0-9]+,%v[0-9]+,%v[0-9]+,4
+** vchg %v[0-9]+,%v[0-9]+,%v[0-9]+
+** vn %v[0-9]+,%v[0-9]+,%v[0-9]+
+** vo %v[0-9]+,%v[0-9]+,%v[0-9]+
+** vrepg %v[0-9]+,%v[0-9]+,1
+** vsel %v[0-9]+,%v[0-9]+,%v[0-9]+,%v[0-9]+
+** br %r14
+*/
+__attribute__ ((noipa)) v1ti
+my_max (v1ti x, v1ti y)
+{
+ return __builtin_s390_vec_max (x, y);
+}
+
+int
+main (void)
+{
+ v2di x, y, z;
+
+ x = (v2di){ -1, -42 };
+ y = (v2di){ 0, 42 };
+ z = (v2di) my_max ((v1ti) x, (v1ti) y);
+ assert (z[0] == 0 && z[1] == 42);
+ z = (v2di) my_max ((v1ti) y, (v1ti) x);
+ assert (z[0] == 0 && z[1] == 42);
+
+ x = (v2di){ 42, 42 };
+ y = (v2di){ 42, 43 };
+ z = (v2di) my_max ((v1ti) x, (v1ti) y);
+ assert (z[0] == 42 && z[1] == 43);
+ z = (v2di) my_max ((v1ti) y, (v1ti) x);
+ assert (z[0] == 42 && z[1] == 43);
+
+ x = (v2di){ 42, 42 };
+ y = (v2di){ 43, 42 };
+ z = (v2di) my_max ((v1ti) x, (v1ti) y);
+ assert (z[0] == 43 && z[1] == 42);
+ z = (v2di) my_max ((v1ti) y, (v1ti) x);
+ assert (z[0] == 43 && z[1] == 42);
+
+ return 0;
+}
--- /dev/null
+/* { dg-do run } */
+/* { dg-options "-O3 -mzarch -march=z13 -save-temps" } */
+/* { dg-require-effective-target int128 } */
+/* { dg-final { check-function-bodies "**" "" "" } } */
+/* { dg-final { scan-assembler-not {\tvmnq\t} } } */
+
+#include <assert.h>
+
+typedef __attribute__ ((vector_size (16))) signed long long v2di;
+typedef __attribute__ ((vector_size (16))) signed __int128 v1ti;
+
+/*
+** my_min:
+** vchlg %v[0-9]+,%v[0-9]+,%v[0-9]+
+** vceqg %v[0-9]+,%v[0-9]+,%v[0-9]+
+** vpdi %v[0-9]+,%v[0-9]+,%v[0-9]+,4
+** vchg %v[0-9]+,%v[0-9]+,%v[0-9]+
+** vn %v[0-9]+,%v[0-9]+,%v[0-9]+
+** vo %v[0-9]+,%v[0-9]+,%v[0-9]+
+** vrepg %v[0-9]+,%v[0-9]+,1
+** vsel %v[0-9]+,%v[0-9]+,%v[0-9]+,%v[0-9]+
+** br %r14
+*/
+__attribute__ ((noipa)) v1ti
+my_min (v1ti x, v1ti y)
+{
+ return __builtin_s390_vec_min (x, y);
+}
+
+int
+main (void)
+{
+ v2di x, y, z;
+
+ x = (v2di){ -1, -42 };
+ y = (v2di){ 0, 42 };
+ z = (v2di) my_min ((v1ti) x, (v1ti) y);
+ assert (z[0] == -1 && z[1] == -42);
+ z = (v2di) my_min ((v1ti) y, (v1ti) x);
+ assert (z[0] == -1 && z[1] == -42);
+
+ x = (v2di){ 42, 42 };
+ y = (v2di){ 42, 43 };
+ z = (v2di) my_min ((v1ti) x, (v1ti) y);
+ assert (z[0] == 42 && z[1] == 42);
+ z = (v2di) my_min ((v1ti) y, (v1ti) x);
+ assert (z[0] == 42 && z[1] == 42);
+
+ x = (v2di){ 42, 42 };
+ y = (v2di){ 43, 42 };
+ z = (v2di) my_min ((v1ti) x, (v1ti) y);
+ assert (z[0] == 42 && z[1] == 42);
+ z = (v2di) my_min ((v1ti) y, (v1ti) x);
+ assert (z[0] == 42 && z[1] == 42);
+
+ return 0;
+}