s390: Implement reduction optabs

author Juergen Christ <jchrist@linux.ibm.com>

Tue, 8 Jul 2025 17:02:42 +0000 (19:02 +0200)

committer Juergen Christ <jchrist@linux.ibm.com>

Mon, 14 Jul 2025 15:16:53 +0000 (17:16 +0200)
author Juergen Christ <jchrist@linux.ibm.com>
Tue, 8 Jul 2025 17:02:42 +0000 (19:02 +0200)
committer Juergen Christ <jchrist@linux.ibm.com>
Mon, 14 Jul 2025 15:16:53 +0000 (17:16 +0200)
diff --git a/gcc/config/s390/vector.md b/gcc/config/s390/vector.md

index 26753c099cdadefaec095e1b2605a266749953a5..c63360f8bcc7a385aa8faea080f218236ed52048 100644 (file)
--- a/gcc/config/s390/vector.md
+++ b/gcc/config/s390/vector.md
@@ -3572,11 +3572,6 @@
    "veval\t%v0,%v1,%v2,%v3,%b4"
    [(set_attr "op_type" "VRI")])
  
-; reduc_smin
-; reduc_smax
-; reduc_umin
-; reduc_umax
-
  ; vec_pack_sfix_trunc: convert + pack ?
  ; vec_pack_ufix_trunc
  ; vec_unpacks_float_hi
@@ -3627,3 +3622,291 @@
                (const_int 4)]
               UNSPEC_FMIN))]
    "TARGET_VXE")
+
+; reduc_plus
+(define_expand "reduc_plus_scal_<mode>"
+  [(set (match_dup 4)
+       (unspec:V4SI [(match_operand:VI_HW_QH 1 "register_operand")
+                     (match_dup 2)]
+                     UNSPEC_VEC_VSUM))
+   (set (match_dup 5)
+       (unspec:V2DI [(match_dup 4) (match_dup 3)] UNSPEC_VEC_VSUMQ))
+   (set (match_operand:<non_vec> 0 "register_operand")
+       (vec_select:<non_vec> (match_dup 6)
+                             (parallel [(match_dup 7)])))]
+  "TARGET_VX"
+{
+  operands[2] = force_reg (<MODE>mode, CONST0_RTX (<MODE>mode));
+  operands[3] = simplify_gen_subreg (V4SImode, operands[2], <MODE>mode, 0);
+  operands[4] = gen_reg_rtx (V4SImode);
+  operands[5] = gen_reg_rtx (V2DImode);
+  operands[6] = simplify_gen_subreg (<MODE>mode, operands[5], V2DImode, 0);
+  operands[7] = GEN_INT (16 / GET_MODE_SIZE (<non_vec>mode) - 1);
+})
+
+(define_expand "reduc_plus_scal_<mode>"
+  [(set (match_dup 3)
+       (unspec:V2DI [(match_operand:VI_HW_SD 1 "register_operand")
+                     (match_dup 2)]
+                     UNSPEC_VEC_VSUMQ))
+   (set (match_operand:<non_vec> 0 "register_operand")
+       (vec_select:<non_vec> (match_dup 4)
+                             (parallel [(match_dup 5)])))]
+  "TARGET_VX"
+{
+  operands[2] = force_reg (<MODE>mode, CONST0_RTX (<MODE>mode));
+  operands[3] = gen_reg_rtx (V2DImode);
+  operands[4] = simplify_gen_subreg (<MODE>mode, operands[3], V2DImode, 0);
+  operands[5] = GEN_INT (16 / GET_MODE_SIZE (<non_vec>mode) - 1);
+})
+
+(define_expand "reduc_plus_scal_v2df"
+  [(set (match_dup 2)
+       (unspec:V2DF [(match_operand:V2DF 1 "register_operand")
+                     (match_dup 1)
+                     (const_int 8)]
+                    UNSPEC_VEC_SLDBYTE))
+   (set (match_dup 3) (plus:V2DF (match_dup 1) (match_dup 2)))
+   (set (match_operand:DF 0 "register_operand")
+       (vec_select:DF (match_dup 3) (parallel [(const_int 0)])))]
+  "TARGET_VX"
+{
+  operands[2] = gen_reg_rtx (V2DFmode);
+  operands[3] = gen_reg_rtx (V2DFmode);
+})
+
+(define_expand "reduc_plus_scal_v4sf"
+  [(set (match_dup 2)
+       (unspec:V4SF [(match_operand:V4SF 1 "register_operand")
+                     (match_dup 1)
+                     (const_int 4)]
+                    UNSPEC_VEC_SLDBYTE))
+   (set (match_dup 3) (plus:V4SF (match_dup 1) (match_dup 2)))
+   (set (match_dup 4)
+       (unspec:V4SF [(match_dup 3) (match_dup 3) (const_int 8)]
+                    UNSPEC_VEC_SLDBYTE))
+   (set (match_dup 5) (plus:V4SF (match_dup 3) (match_dup 4)))
+   (set (match_operand:SF 0 "register_operand")
+       (vec_select:SF (match_dup 5) (parallel [(const_int 0)])))]
+  "TARGET_VXE"
+{
+  operands[2] = gen_reg_rtx (V4SFmode);
+  operands[3] = gen_reg_rtx (V4SFmode);
+  operands[4] = gen_reg_rtx (V4SFmode);
+  operands[5] = gen_reg_rtx (V4SFmode);
+})
+
+; reduc_fmin, reduc_fmax, reduc_smin, reduc_smax
+
+(define_int_iterator REDUC_FMINMAX [UNSPEC_FMAX UNSPEC_FMIN])
+(define_int_attr reduc_fminmax_name [(UNSPEC_FMAX "fmax") (UNSPEC_FMIN "fmin")])
+(define_code_iterator REDUC_MINMAX [smin smax])
+(define_code_attr reduc_minmax_name [(smin "smin") (smax "smax")])
+
+(define_expand "reduc_<reduc_fminmax_name>_scal_v2df"
+  [(set (match_dup 2)
+       (unspec:V2DF [(match_operand:V2DF 1 "register_operand")
+                     (match_dup 1)
+                     (const_int 8)]
+                    UNSPEC_VEC_SLDBYTE))
+   (set (match_dup 3)
+       (unspec:V2DF [(match_dup 1) (match_dup 2) (const_int 4)] REDUC_FMINMAX))
+   (set (match_operand:DF 0 "register_operand" "")
+       (vec_select:DF (match_dup 3) (parallel [(const_int 0)])))]
+  "TARGET_VX"
+{
+  operands[2] = gen_reg_rtx (V2DFmode);
+  operands[3] = gen_reg_rtx (V2DFmode);
+})
+
+(define_expand "reduc_<reduc_fminmax_name>_scal_v4sf"
+  [(set (match_dup 2)
+       (unspec:V4SF [(match_operand:V4SF 1 "register_operand")
+                     (match_dup 1)
+                     (const_int 4)]
+                    UNSPEC_VEC_SLDBYTE))
+   (set (match_dup 3)
+       (unspec:V4SF [(match_dup 1) (match_dup 2) (const_int 4)] REDUC_FMINMAX))
+   (set (match_dup 4)
+       (unspec:V4SF [(match_dup 3)
+                     (match_dup 3)
+                     (const_int 8)]
+                    UNSPEC_VEC_SLDBYTE))
+   (set (match_dup 5)
+       (unspec:V4SF [(match_dup 3) (match_dup 4) (const_int 4)] REDUC_FMINMAX))
+   (set (match_operand:SF 0 "register_operand")
+       (vec_select:SF (match_dup 5) (parallel [(const_int 0)])))]
+   "TARGET_VXE"
+{
+  operands[2] = gen_reg_rtx (V4SFmode);
+  operands[3] = gen_reg_rtx (V4SFmode);
+  operands[4] = gen_reg_rtx (V4SFmode);
+  operands[5] = gen_reg_rtx (V4SFmode);
+})
+
+(define_expand "reduc_<reduc_minmax_name>_scal_v2df"
+  [(set (match_dup 2)
+       (unspec:V2DF [(match_operand:V2DF 1 "register_operand")
+                     (match_dup 1)
+                     (const_int 8)]
+                    UNSPEC_VEC_SLDBYTE))
+   (set (match_dup 3)
+       (REDUC_MINMAX:V2DF (match_dup 1) (match_dup 2)))
+   (set (match_operand:DF 0 "register_operand" "")
+       (vec_select:DF (match_dup 3) (parallel [(const_int 0)])))]
+  "TARGET_VX"
+{
+  operands[2] = gen_reg_rtx (V2DFmode);
+  operands[3] = gen_reg_rtx (V2DFmode);
+})
+
+(define_expand "reduc_<reduc_minmax_name>_scal_v4sf"
+  [(set (match_dup 2)
+       (unspec:V4SF [(match_operand:V4SF 1 "register_operand")
+                     (match_dup 1)
+                     (const_int 4)]
+                    UNSPEC_VEC_SLDBYTE))
+   (set (match_dup 3)
+       (REDUC_MINMAX:V4SF (match_dup 1) (match_dup 2)))
+   (set (match_dup 4)
+       (unspec:V4SF [(match_dup 3)
+                     (match_dup 3)
+                     (const_int 8)]
+                    UNSPEC_VEC_SLDBYTE))
+   (set (match_dup 5)
+       (REDUC_MINMAX:V4SF (match_dup 3) (match_dup 4)))
+   (set (match_operand:SF 0 "register_operand" "")
+       (vec_select:SF (match_dup 5) (parallel [(const_int 0)])))]
+   "TARGET_VXE"
+{
+  operands[2] = gen_reg_rtx (V4SFmode);
+  operands[3] = gen_reg_rtx (V4SFmode);
+  operands[4] = gen_reg_rtx (V4SFmode);
+  operands[5] = gen_reg_rtx (V4SFmode);
+})
+
+; reduce_and, reduc_ior, reduc_xor
+; reduc_smin, reduc_smax, reduc_umin, reduc_umax
+
+(define_code_iterator REDUCBIN [and xor ior smin smax umin umax])
+(define_code_attr reduc_bin_insn [(and "and") (xor "xor") (ior "ior")
+                                 (smin "smin") (smax "smax")
+                                 (umin "umin") (umax "umax")])
+
+(define_expand "reduc_<reduc_bin_insn>_scal_v2di"
+  [(set (match_dup 2)
+       (unspec:V2DI [(match_operand:V2DI 1 "register_operand")
+                     (match_dup 1)
+                     (const_int 8)]
+                    UNSPEC_VEC_SLDBYTE))
+   (set (match_dup 3)
+       (REDUCBIN:V2DI (match_dup 1) (match_dup 2)))
+   (set (match_operand:DI 0 "register_operand" "")
+       (vec_select:DI (match_dup 3) (parallel [(const_int 0)])))]
+  "TARGET_VX"
+{
+  operands[2] = gen_reg_rtx (V2DImode);
+  operands[3] = gen_reg_rtx (V2DImode);
+})
+
+(define_expand "reduc_<reduc_bin_insn>_scal_v4si"
+  [(set (match_dup 2)
+       (unspec:V4SI [(match_operand:V4SI 1 "register_operand")
+                     (match_dup 1)
+                     (const_int 4)]
+                    UNSPEC_VEC_SLDBYTE))
+   (set (match_dup 3)
+       (REDUCBIN:V4SI (match_dup 1) (match_dup 2)))
+   (set (match_dup 4)
+       (unspec:V4SI [(match_dup 3)
+                     (match_dup 3)
+                     (const_int 8)]
+                    UNSPEC_VEC_SLDBYTE))
+   (set (match_dup 5)
+       (REDUCBIN:V4SI (match_dup 3) (match_dup 4)))
+   (set (match_operand:SI 0 "register_operand" "")
+       (vec_select:SI (match_dup 5) (parallel [(const_int 0)])))]
+  "TARGET_VX"
+{
+  operands[2] = gen_reg_rtx (V4SImode);
+  operands[3] = gen_reg_rtx (V4SImode);
+  operands[4] = gen_reg_rtx (V4SImode);
+  operands[5] = gen_reg_rtx (V4SImode);
+})
+
+(define_expand "reduc_<reduc_bin_insn>_scal_v8hi"
+  [(set (match_dup 2)
+       (unspec:V8HI [(match_operand:V8HI 1 "register_operand")
+                     (match_dup 1)
+                     (const_int 2)]
+                    UNSPEC_VEC_SLDBYTE))
+   (set (match_dup 3)
+       (REDUCBIN:V8HI (match_dup 1) (match_dup 2)))
+   (set (match_dup 4)
+       (unspec:V8HI [(match_dup 3)
+                     (match_dup 3)
+                     (const_int 4)]
+                    UNSPEC_VEC_SLDBYTE))
+   (set (match_dup 5)
+       (REDUCBIN:V8HI (match_dup 3) (match_dup 4)))
+   (set (match_dup 6)
+       (unspec:V8HI [(match_dup 5)
+                     (match_dup 5)
+                     (const_int 8)]
+                    UNSPEC_VEC_SLDBYTE))
+   (set (match_dup 7)
+       (REDUCBIN:V8HI (match_dup 5) (match_dup 6)))
+   (set (match_operand:HI 0 "register_operand" "")
+       (vec_select:HI (match_dup 7) (parallel [(const_int 0)])))]
+  "TARGET_VX"
+{
+  operands[2] = gen_reg_rtx (V8HImode);
+  operands[3] = gen_reg_rtx (V8HImode);
+  operands[4] = gen_reg_rtx (V8HImode);
+  operands[5] = gen_reg_rtx (V8HImode);
+  operands[6] = gen_reg_rtx (V8HImode);
+  operands[7] = gen_reg_rtx (V8HImode);
+})
+
+(define_expand "reduc_<reduc_bin_insn>_scal_v16qi"
+  [(set (match_dup 2)
+       (unspec:V16QI [(match_operand:V16QI 1 "register_operand")
+                      (match_dup 1)
+                      (const_int 1)]
+                     UNSPEC_VEC_SLDBYTE))
+   (set (match_dup 3)
+       (REDUCBIN:V16QI (match_dup 1) (match_dup 2)))
+   (set (match_dup 4)
+       (unspec:V16QI [(match_dup 3)
+                      (match_dup 3)
+                      (const_int 2)]
+                     UNSPEC_VEC_SLDBYTE))
+   (set (match_dup 5)
+       (REDUCBIN:V16QI (match_dup 3) (match_dup 4)))
+   (set (match_dup 6)
+       (unspec:V16QI [(match_dup 5)
+                      (match_dup 5)
+                      (const_int 4)]
+                     UNSPEC_VEC_SLDBYTE))
+   (set (match_dup 7)
+       (REDUCBIN:V16QI (match_dup 5) (match_dup 6)))
+   (set (match_dup 8)
+       (unspec:V16QI [(match_dup 7)
+                      (match_dup 7)
+                      (const_int 8)]
+                     UNSPEC_VEC_SLDBYTE))
+   (set (match_dup 9)
+       (REDUCBIN:V16QI (match_dup 7) (match_dup 8)))
+   (set (match_operand:QI 0 "register_operand" "")
+       (vec_select:QI (match_dup 9) (parallel [(const_int 0)])))]
+  "TARGET_VX"
+{
+  operands[2] = gen_reg_rtx (V16QImode);
+  operands[3] = gen_reg_rtx (V16QImode);
+  operands[4] = gen_reg_rtx (V16QImode);
+  operands[5] = gen_reg_rtx (V16QImode);
+  operands[6] = gen_reg_rtx (V16QImode);
+  operands[7] = gen_reg_rtx (V16QImode);
+  operands[8] = gen_reg_rtx (V16QImode);
+  operands[9] = gen_reg_rtx (V16QImode);
+})
diff --git a/gcc/testsuite/gcc.target/s390/vector/reduc-binops-1.c b/gcc/testsuite/gcc.target/s390/vector/reduc-binops-1.c

new file mode 100644 (file)

index 0000000..efd3294
--- /dev/null
+++ b/gcc/testsuite/gcc.target/s390/vector/reduc-binops-1.c
@@ -0,0 +1,40 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -mzarch -march=z13 -ftree-vectorize -fdump-tree-optimized" } */
+
+#define T(X,N)                                  \
+  unsigned X                                    \
+  reduce_and_##X (unsigned X *in)               \
+  {                                             \
+  unsigned X acc = (unsigned X)-1;              \
+  for (int i = 0; i < N; i++)                   \
+    acc &= in[i];                               \
+  return acc;                                   \
+  }                                             \
+  unsigned X                                    \
+  reduce_ior_##X (unsigned X *in)               \
+  {                                             \
+  unsigned X acc = 0;                           \
+  for (int i = 0; i < N; i++)                   \
+    acc |= in[i];                               \
+  return acc;                                   \
+  }                                             \
+  unsigned X                                    \
+  redue_xor_##X (unsigned X *in)                \
+  {                                             \
+  unsigned X acc = 0;                           \
+  for (int i = 0; i < N; i++)                   \
+    acc ^= in[i];                               \
+  return acc;                                   \
+  }
+
+T(char,16)
+
+T(short, 8)
+
+T(int,4)
+
+T(long,4)
+
+/* { dg-final { scan-tree-dump-times "\.REDUC_AND" 4 "optimized" } } */
+/* { dg-final { scan-tree-dump-times "\.REDUC_IOR" 4 "optimized" } } */
+/* { dg-final { scan-tree-dump-times "\.REDUC_XOR" 4 "optimized" } } */
diff --git a/gcc/testsuite/gcc.target/s390/vector/reduc-minmax-1.c b/gcc/testsuite/gcc.target/s390/vector/reduc-minmax-1.c

new file mode 100644 (file)

index 0000000..5295250
--- /dev/null
+++ b/gcc/testsuite/gcc.target/s390/vector/reduc-minmax-1.c
@@ -0,0 +1,234 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -mzarch -march=z14 -ftree-vectorize -fdump-tree-optimized" } */
+
+#define MAX(a, b) ((a) > (b) ? (a) : (b))
+#define MIN(a, b) ((a) > (b) ? (b) : (a))
+
+/* unsigned integers */
+
+unsigned char
+reduce_umax_char (unsigned char *p)
+{
+  unsigned char res = p[0];
+  for (int i = 0; i < 16; i++)
+    res = MAX (res, p[i]);
+  return res;
+}
+
+unsigned char
+reduce_umin_char (unsigned char *p)
+{
+  unsigned char res = p[0];
+  for (int i = 0; i < 16; i++)
+    res = MIN (res, p[i]);
+  return res;
+}
+
+unsigned short
+reduce_umax_short (unsigned short *p)
+{
+  unsigned short res = p[0];
+  for (int i = 0; i < 8; i++)
+    res = MAX (res, p[i]);
+  return res;
+}
+
+unsigned short
+reduce_umin_short (unsigned short *p)
+{
+  unsigned short res = p[0];
+  for (int i = 0; i < 8; i++)
+    res = MIN (res, p[i]);
+  return res;
+}
+
+unsigned int
+reduce_umax_int (unsigned int* p)
+{
+  unsigned int res = p[0];
+  for (int i = 0; i != 4; i++)
+    res = MAX (res, p[i]);
+  return res;
+}
+
+unsigned int
+reduce_umin_int (unsigned int* p)
+{
+  unsigned int res = p[0];
+  for (int i = 0; i != 4; i++)
+    res = MIN(res, p[i]);
+  return res;
+}
+
+unsigned long
+reduce_umax_long (unsigned long* p)
+{
+  unsigned long res = p[0];
+  for (int i = 0; i != 4; i++)
+    res = MAX (res, p[i]);
+  return res;
+}
+
+unsigned long
+reduce_umin_long (unsigned long* p)
+{
+  unsigned long res = p[0];
+  for (int i = 0; i != 4; i++)
+    res = MIN(res, p[i]);
+  return res;
+}
+
+/* signed integers */
+
+signed char
+reduce_smax_char (signed char *p)
+{
+  signed char res = p[0];
+  for (int i = 0; i < 16; i++)
+    res = MAX (res, p[i]);
+  return res;
+}
+
+signed char
+reduce_smin_char (signed char *p)
+{
+  signed char res = p[0];
+  for (int i = 0; i < 16; i++)
+    res = MIN (res, p[i]);
+  return res;
+}
+
+signed short
+reduce_smax_short (signed short *p)
+{
+  signed short res = p[0];
+  for (int i = 0; i < 8; i++)
+    res = MAX (res, p[i]);
+  return res;
+}
+
+signed short
+reduce_smin_short (signed short *p)
+{
+  signed short res = p[0];
+  for (int i = 0; i < 8; i++)
+    res = MIN (res, p[i]);
+  return res;
+}
+
+signed int
+reduce_smax_int (signed int* p)
+{
+  signed int res = p[0];
+  for (int i = 0; i != 4; i++)
+    res = MAX (res, p[i]);
+  return res;
+}
+
+signed int
+reduce_smin_int (signed int* p)
+{
+  signed int res = p[0];
+  for (int i = 0; i != 4; i++)
+    res = MIN(res, p[i]);
+  return res;
+}
+
+signed long
+reduce_smax_long (signed long* p)
+{
+  signed long res = p[0];
+  for (int i = 0; i != 4; i++)
+    res = MAX (res, p[i]);
+  return res;
+}
+
+signed long
+reduce_smin_long (signed long* p)
+{
+  signed long res = p[0];
+  for (int i = 0; i != 4; i++)
+    res = MIN(res, p[i]);
+  return res;
+}
+
+float
+__attribute__((optimize("Ofast")))
+reduce_smax_float (float* p)
+{
+  float res = p[0];
+  for (int i = 0; i != 4; i++)
+    res = MAX (res, p[i]);
+  return res;
+}
+
+float
+__attribute__((optimize("Ofast")))
+reduce_smin_float (float* p)
+{
+  float res = p[0];
+  for (int i = 0; i != 4; i++)
+    res = MIN (res, p[i]);
+  return res;
+}
+
+double
+__attribute__((optimize("Ofast")))
+reduce_smax_double (double* p)
+{
+  double res = p[0];
+  for (int i = 0; i != 4; i++)
+    res = MAX (res, p[i]);
+  return res;
+}
+
+double
+__attribute__((optimize("Ofast")))
+reduce_smin_double (double* p)
+{
+  double res = p[0];
+  for (int i = 0; i != 4; i++)
+    res = MIN (res, p[i]);
+  return res;
+}
+
+float
+reduce_fmax_float (float* p)
+{
+  float res = p[0];
+  for (int i = 0; i != 4; i++)
+    res = __builtin_fmaxf (res, p[i]);
+  return res;
+}
+
+float
+reduce_fmin_float (float* p)
+{
+  float res = p[0];
+  for (int i = 0; i != 4; i++)
+    res = __builtin_fminf (res, p[i]);
+  return res;
+}
+
+double
+reduce_fmax_double (double* p)
+{
+  double res = p[0];
+  for (int i = 0; i != 4; i++)
+    res = __builtin_fmax (res, p[i]);
+  return res;
+}
+
+double
+reduce_fmin_double (double* p)
+{
+  double res = p[0];
+  for (int i = 0; i != 4; i++)
+    res = __builtin_fmin (res, p[i]);
+  return res;
+}
+
+/* { dg-final { scan-tree-dump-times "\.REDUC_MAX" 10 "optimized" } } */
+/* { dg-final { scan-tree-dump-times "\.REDUC_MIN" 10 "optimized" } } */
+/* { dg-final { scan-tree-dump-times "\.REDUC_FMAX" 2 "optimized" } } */
+/* { dg-final { scan-tree-dump-times "\.REDUC_FMIN" 2 "optimized" } } */
diff --git a/gcc/testsuite/gcc.target/s390/vector/reduc-plus-1.c b/gcc/testsuite/gcc.target/s390/vector/reduc-plus-1.c

new file mode 100644 (file)

index 0000000..12cdd5f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/s390/vector/reduc-plus-1.c
@@ -0,0 +1,152 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -mzarch -march=z14 -ftree-vectorize -fdump-tree-optimized" } */
+/* { dg-do run { target { s390_z14_hw } } } */
+
+/* signed integers */
+
+signed char
+__attribute__((noipa, optimize("Ofast")))
+reduce_add_char (signed char* p)
+{
+  signed char sum = 0;
+  for (int i = 0; i != 16; i++)
+    sum += p[i];
+  return sum;
+}
+
+short
+__attribute__((noipa, optimize("Ofast")))
+reduce_add_short (short* p)
+{
+  short sum = 0;
+  for (int i = 0; i != 16; i++)
+    sum += p[i];
+  return sum;
+}
+
+int
+__attribute__((noipa, optimize("Ofast")))
+reduce_add_int (int* p)
+{
+  int sum = 0;
+  for (int i = 0; i != 16; i++)
+    sum += p[i];
+  return sum;
+}
+
+long
+__attribute__((noipa, optimize("Ofast")))
+reduce_add_long (long* p)
+{
+  long sum = 0;
+  for (int i = 0; i != 16; i++)
+    sum += p[i];
+  return sum;
+}
+
+/* unsigned integers */
+
+unsigned char
+__attribute__((noipa, optimize("Ofast")))
+reduce_add_uchar (unsigned char* p)
+{
+  unsigned char sum = 0;
+  for (int i = 0; i != 16; i++)
+    sum += p[i];
+  return sum;
+}
+
+unsigned short
+__attribute__((noipa, optimize("Ofast")))
+reduce_add_ushort (unsigned short* p)
+{
+  unsigned short sum = 0;
+  for (int i = 0; i != 16; i++)
+    sum += p[i];
+  return sum;
+}
+
+unsigned int
+__attribute__((noipa, optimize("Ofast")))
+reduce_add_uint (unsigned int* p)
+{
+  unsigned int sum = 0;
+  for (int i = 0; i != 16; i++)
+    sum += p[i];
+  return sum;
+}
+
+unsigned long
+__attribute__((noipa, optimize("Ofast")))
+reduce_add_ulong (unsigned long* p)
+{
+  unsigned long sum = 0;
+  for (int i = 0; i != 16; i++)
+    sum += p[i];
+  return sum;
+}
+
+/* floating point */
+
+float
+__attribute__((noipa, optimize("Ofast")))
+reduce_add_float (float* p)
+{
+  float sum = 0;
+  for (int i = 0; i != 16; i++)
+    sum += p[i];
+  return sum;
+}
+
+double
+__attribute__((noipa, optimize("Ofast")))
+reduce_add_double (double* p)
+{
+  double sum = 0;
+  for (int i = 0; i != 16; i++)
+    sum += p[i];
+  return sum;
+}
+
+int
+main()
+{
+  signed char chararr[] = {-1,-2,-3,-4,-5,-6,-7,-8,-9,-10,-11,-12,-13,-14,-15,-16};
+  signed short shortarr[] = {-1,-2,-3,-4,-5,-6,-7,-8,-9,-10,-11,-12,-13,-14,-15,-16};
+  signed int intarr[] = {-1,-2,-3,-4,-5,-6,-7,-8,-9,-10,-11,-12,-13,-14,-15,-16};
+  signed long longarr[] = {-1,-2,-3,-4,-5,-6,-7,-8,-9,-10,-11,-12,-13,-14,-15,-16};
+
+  unsigned char uchararr[] = {1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16};
+  unsigned short ushortarr[] = {1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16};
+  unsigned int uintarr[] = {1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16};
+  unsigned long ulongarr[] = {1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16};
+
+  float floatarr[] = {1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16};
+  double doublearr[] = {-1,-2,-3,-4,-5,-6,-7,-8,-9,-10,-11,-12,-13,-14,-15,-16};
+
+  if (reduce_add_char (chararr) != (-136 & 0xff))
+    __builtin_abort();
+  if (reduce_add_short (shortarr) != -136)
+    __builtin_abort();
+  if (reduce_add_int (intarr) != -136)
+    __builtin_abort();
+  if (reduce_add_long (longarr) != -136)
+    __builtin_abort();
+
+  if (reduce_add_uchar (uchararr) != 136)
+    __builtin_abort();
+  if (reduce_add_ushort (ushortarr) != 136)
+    __builtin_abort();
+  if (reduce_add_uint (uintarr) != 136)
+    __builtin_abort();
+  if (reduce_add_ulong (ulongarr) != 136)
+    __builtin_abort();
+
+  if (reduce_add_float (floatarr) != 136)
+    __builtin_abort();
+  if (reduce_add_double (doublearr) != -136)
+    __builtin_abort();
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "\.REDUC_PLUS" 10 "optimized" } } */
diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp

index c37a30a32ed49f3c9dd73c54e525a61697c8bea6..4486a6ac99b939e11dde2049032c5e0c6ad150df 100644 (file)
--- a/gcc/testsuite/lib/target-supports.exp
+++ b/gcc/testsuite/lib/target-supports.exp
@@ -9961,7 +9961,8 @@ proc check_effective_target_vect_logical_reduc { } {
                    || [istarget amdgcn-*-*]
                    || [check_effective_target_riscv_v]
                    || [check_effective_target_loongarch_sx]
-                  || [check_effective_target_x86]}]
+                  || [check_effective_target_x86]
+                  || [check_effective_target_s390_vx]}]
  }
  
  # Return 1 if the target supports the fold_extract_last optab.
author	Juergen Christ <jchrist@linux.ibm.com>
	Tue, 8 Jul 2025 17:02:42 +0000 (19:02 +0200)
committer	Juergen Christ <jchrist@linux.ibm.com>
	Mon, 14 Jul 2025 15:16:53 +0000 (17:16 +0200)
gcc/config/s390/vector.md		patch \| blob \| blame \| history
gcc/testsuite/gcc.target/s390/vector/reduc-binops-1.c	[new file with mode: 0644]	patch \| blob
gcc/testsuite/gcc.target/s390/vector/reduc-minmax-1.c	[new file with mode: 0644]	patch \| blob
gcc/testsuite/gcc.target/s390/vector/reduc-plus-1.c	[new file with mode: 0644]	patch \| blob
gcc/testsuite/lib/target-supports.exp		patch \| blob \| blame \| history