Implementation and tests for the standard reduction optabs.
Signed-off-by: Juergen Christ <jchrist@linux.ibm.com>
gcc/ChangeLog:
* config/s390/vector.md (reduc_plus_scal_<mode>): Implement.
(reduc_plus_scal_v2df): Implement.
(reduc_plus_scal_v4sf): Implement.
(REDUC_FMINMAX): New int iterator.
(reduc_fminmax_name): New int attribute.
(reduc_minmax): New code iterator.
(reduc_minmax_name): New code attribute.
(reduc_<reduc_fminmax_name>_scal_v2df): Implement.
(reduc_<reduc_fminmax_name>_scal_v4sf): Implement.
(reduc_<reduc_minmax_name>_scal_v2df): Implement.
(reduc_<reduc_minmax_name>_scal_v4sf): Implement.
(REDUCBIN): New code iterator.
(reduc_bin_insn): New code attribute.
(reduc_<reduc_bin_insn>_scal_v2di): Implement.
(reduc_<reduc_bin_insn>_scal_v4si): Implement.
(reduc_<reduc_bin_insn>_scal_v8hi): Implement.
(reduc_<reduc_bin_insn>_scal_v16qi): Implement.
gcc/testsuite/ChangeLog:
* lib/target-supports.exp: Add s390 to vect_logical_reduc targets.
* gcc.target/s390/vector/reduc-binops-1.c: New test.
* gcc.target/s390/vector/reduc-minmax-1.c: New test.
* gcc.target/s390/vector/reduc-plus-1.c: New test.
"veval\t%v0,%v1,%v2,%v3,%b4"
[(set_attr "op_type" "VRI")])
-; reduc_smin
-; reduc_smax
-; reduc_umin
-; reduc_umax
-
; vec_pack_sfix_trunc: convert + pack ?
; vec_pack_ufix_trunc
; vec_unpacks_float_hi
(const_int 4)]
UNSPEC_FMIN))]
"TARGET_VXE")
+
+; reduc_plus
+(define_expand "reduc_plus_scal_<mode>"
+ [(set (match_dup 4)
+ (unspec:V4SI [(match_operand:VI_HW_QH 1 "register_operand")
+ (match_dup 2)]
+ UNSPEC_VEC_VSUM))
+ (set (match_dup 5)
+ (unspec:V2DI [(match_dup 4) (match_dup 3)] UNSPEC_VEC_VSUMQ))
+ (set (match_operand:<non_vec> 0 "register_operand")
+ (vec_select:<non_vec> (match_dup 6)
+ (parallel [(match_dup 7)])))]
+ "TARGET_VX"
+{
+ operands[2] = force_reg (<MODE>mode, CONST0_RTX (<MODE>mode));
+ operands[3] = simplify_gen_subreg (V4SImode, operands[2], <MODE>mode, 0);
+ operands[4] = gen_reg_rtx (V4SImode);
+ operands[5] = gen_reg_rtx (V2DImode);
+ operands[6] = simplify_gen_subreg (<MODE>mode, operands[5], V2DImode, 0);
+ operands[7] = GEN_INT (16 / GET_MODE_SIZE (<non_vec>mode) - 1);
+})
+
+(define_expand "reduc_plus_scal_<mode>"
+ [(set (match_dup 3)
+ (unspec:V2DI [(match_operand:VI_HW_SD 1 "register_operand")
+ (match_dup 2)]
+ UNSPEC_VEC_VSUMQ))
+ (set (match_operand:<non_vec> 0 "register_operand")
+ (vec_select:<non_vec> (match_dup 4)
+ (parallel [(match_dup 5)])))]
+ "TARGET_VX"
+{
+ operands[2] = force_reg (<MODE>mode, CONST0_RTX (<MODE>mode));
+ operands[3] = gen_reg_rtx (V2DImode);
+ operands[4] = simplify_gen_subreg (<MODE>mode, operands[3], V2DImode, 0);
+ operands[5] = GEN_INT (16 / GET_MODE_SIZE (<non_vec>mode) - 1);
+})
+
+(define_expand "reduc_plus_scal_v2df"
+ [(set (match_dup 2)
+ (unspec:V2DF [(match_operand:V2DF 1 "register_operand")
+ (match_dup 1)
+ (const_int 8)]
+ UNSPEC_VEC_SLDBYTE))
+ (set (match_dup 3) (plus:V2DF (match_dup 1) (match_dup 2)))
+ (set (match_operand:DF 0 "register_operand")
+ (vec_select:DF (match_dup 3) (parallel [(const_int 0)])))]
+ "TARGET_VX"
+{
+ operands[2] = gen_reg_rtx (V2DFmode);
+ operands[3] = gen_reg_rtx (V2DFmode);
+})
+
+(define_expand "reduc_plus_scal_v4sf"
+ [(set (match_dup 2)
+ (unspec:V4SF [(match_operand:V4SF 1 "register_operand")
+ (match_dup 1)
+ (const_int 4)]
+ UNSPEC_VEC_SLDBYTE))
+ (set (match_dup 3) (plus:V4SF (match_dup 1) (match_dup 2)))
+ (set (match_dup 4)
+ (unspec:V4SF [(match_dup 3) (match_dup 3) (const_int 8)]
+ UNSPEC_VEC_SLDBYTE))
+ (set (match_dup 5) (plus:V4SF (match_dup 3) (match_dup 4)))
+ (set (match_operand:SF 0 "register_operand")
+ (vec_select:SF (match_dup 5) (parallel [(const_int 0)])))]
+ "TARGET_VXE"
+{
+ operands[2] = gen_reg_rtx (V4SFmode);
+ operands[3] = gen_reg_rtx (V4SFmode);
+ operands[4] = gen_reg_rtx (V4SFmode);
+ operands[5] = gen_reg_rtx (V4SFmode);
+})
+
+; reduc_fmin, reduc_fmax, reduc_smin, reduc_smax
+
+(define_int_iterator REDUC_FMINMAX [UNSPEC_FMAX UNSPEC_FMIN])
+(define_int_attr reduc_fminmax_name [(UNSPEC_FMAX "fmax") (UNSPEC_FMIN "fmin")])
+(define_code_iterator REDUC_MINMAX [smin smax])
+(define_code_attr reduc_minmax_name [(smin "smin") (smax "smax")])
+
+(define_expand "reduc_<reduc_fminmax_name>_scal_v2df"
+ [(set (match_dup 2)
+ (unspec:V2DF [(match_operand:V2DF 1 "register_operand")
+ (match_dup 1)
+ (const_int 8)]
+ UNSPEC_VEC_SLDBYTE))
+ (set (match_dup 3)
+ (unspec:V2DF [(match_dup 1) (match_dup 2) (const_int 4)] REDUC_FMINMAX))
+ (set (match_operand:DF 0 "register_operand" "")
+ (vec_select:DF (match_dup 3) (parallel [(const_int 0)])))]
+ "TARGET_VX"
+{
+ operands[2] = gen_reg_rtx (V2DFmode);
+ operands[3] = gen_reg_rtx (V2DFmode);
+})
+
+(define_expand "reduc_<reduc_fminmax_name>_scal_v4sf"
+ [(set (match_dup 2)
+ (unspec:V4SF [(match_operand:V4SF 1 "register_operand")
+ (match_dup 1)
+ (const_int 4)]
+ UNSPEC_VEC_SLDBYTE))
+ (set (match_dup 3)
+ (unspec:V4SF [(match_dup 1) (match_dup 2) (const_int 4)] REDUC_FMINMAX))
+ (set (match_dup 4)
+ (unspec:V4SF [(match_dup 3)
+ (match_dup 3)
+ (const_int 8)]
+ UNSPEC_VEC_SLDBYTE))
+ (set (match_dup 5)
+ (unspec:V4SF [(match_dup 3) (match_dup 4) (const_int 4)] REDUC_FMINMAX))
+ (set (match_operand:SF 0 "register_operand")
+ (vec_select:SF (match_dup 5) (parallel [(const_int 0)])))]
+ "TARGET_VXE"
+{
+ operands[2] = gen_reg_rtx (V4SFmode);
+ operands[3] = gen_reg_rtx (V4SFmode);
+ operands[4] = gen_reg_rtx (V4SFmode);
+ operands[5] = gen_reg_rtx (V4SFmode);
+})
+
+(define_expand "reduc_<reduc_minmax_name>_scal_v2df"
+ [(set (match_dup 2)
+ (unspec:V2DF [(match_operand:V2DF 1 "register_operand")
+ (match_dup 1)
+ (const_int 8)]
+ UNSPEC_VEC_SLDBYTE))
+ (set (match_dup 3)
+ (REDUC_MINMAX:V2DF (match_dup 1) (match_dup 2)))
+ (set (match_operand:DF 0 "register_operand" "")
+ (vec_select:DF (match_dup 3) (parallel [(const_int 0)])))]
+ "TARGET_VX"
+{
+ operands[2] = gen_reg_rtx (V2DFmode);
+ operands[3] = gen_reg_rtx (V2DFmode);
+})
+
+(define_expand "reduc_<reduc_minmax_name>_scal_v4sf"
+ [(set (match_dup 2)
+ (unspec:V4SF [(match_operand:V4SF 1 "register_operand")
+ (match_dup 1)
+ (const_int 4)]
+ UNSPEC_VEC_SLDBYTE))
+ (set (match_dup 3)
+ (REDUC_MINMAX:V4SF (match_dup 1) (match_dup 2)))
+ (set (match_dup 4)
+ (unspec:V4SF [(match_dup 3)
+ (match_dup 3)
+ (const_int 8)]
+ UNSPEC_VEC_SLDBYTE))
+ (set (match_dup 5)
+ (REDUC_MINMAX:V4SF (match_dup 3) (match_dup 4)))
+ (set (match_operand:SF 0 "register_operand" "")
+ (vec_select:SF (match_dup 5) (parallel [(const_int 0)])))]
+ "TARGET_VXE"
+{
+ operands[2] = gen_reg_rtx (V4SFmode);
+ operands[3] = gen_reg_rtx (V4SFmode);
+ operands[4] = gen_reg_rtx (V4SFmode);
+ operands[5] = gen_reg_rtx (V4SFmode);
+})
+
+; reduce_and, reduc_ior, reduc_xor
+; reduc_smin, reduc_smax, reduc_umin, reduc_umax
+
+(define_code_iterator REDUCBIN [and xor ior smin smax umin umax])
+(define_code_attr reduc_bin_insn [(and "and") (xor "xor") (ior "ior")
+ (smin "smin") (smax "smax")
+ (umin "umin") (umax "umax")])
+
+(define_expand "reduc_<reduc_bin_insn>_scal_v2di"
+ [(set (match_dup 2)
+ (unspec:V2DI [(match_operand:V2DI 1 "register_operand")
+ (match_dup 1)
+ (const_int 8)]
+ UNSPEC_VEC_SLDBYTE))
+ (set (match_dup 3)
+ (REDUCBIN:V2DI (match_dup 1) (match_dup 2)))
+ (set (match_operand:DI 0 "register_operand" "")
+ (vec_select:DI (match_dup 3) (parallel [(const_int 0)])))]
+ "TARGET_VX"
+{
+ operands[2] = gen_reg_rtx (V2DImode);
+ operands[3] = gen_reg_rtx (V2DImode);
+})
+
+(define_expand "reduc_<reduc_bin_insn>_scal_v4si"
+ [(set (match_dup 2)
+ (unspec:V4SI [(match_operand:V4SI 1 "register_operand")
+ (match_dup 1)
+ (const_int 4)]
+ UNSPEC_VEC_SLDBYTE))
+ (set (match_dup 3)
+ (REDUCBIN:V4SI (match_dup 1) (match_dup 2)))
+ (set (match_dup 4)
+ (unspec:V4SI [(match_dup 3)
+ (match_dup 3)
+ (const_int 8)]
+ UNSPEC_VEC_SLDBYTE))
+ (set (match_dup 5)
+ (REDUCBIN:V4SI (match_dup 3) (match_dup 4)))
+ (set (match_operand:SI 0 "register_operand" "")
+ (vec_select:SI (match_dup 5) (parallel [(const_int 0)])))]
+ "TARGET_VX"
+{
+ operands[2] = gen_reg_rtx (V4SImode);
+ operands[3] = gen_reg_rtx (V4SImode);
+ operands[4] = gen_reg_rtx (V4SImode);
+ operands[5] = gen_reg_rtx (V4SImode);
+})
+
+(define_expand "reduc_<reduc_bin_insn>_scal_v8hi"
+ [(set (match_dup 2)
+ (unspec:V8HI [(match_operand:V8HI 1 "register_operand")
+ (match_dup 1)
+ (const_int 2)]
+ UNSPEC_VEC_SLDBYTE))
+ (set (match_dup 3)
+ (REDUCBIN:V8HI (match_dup 1) (match_dup 2)))
+ (set (match_dup 4)
+ (unspec:V8HI [(match_dup 3)
+ (match_dup 3)
+ (const_int 4)]
+ UNSPEC_VEC_SLDBYTE))
+ (set (match_dup 5)
+ (REDUCBIN:V8HI (match_dup 3) (match_dup 4)))
+ (set (match_dup 6)
+ (unspec:V8HI [(match_dup 5)
+ (match_dup 5)
+ (const_int 8)]
+ UNSPEC_VEC_SLDBYTE))
+ (set (match_dup 7)
+ (REDUCBIN:V8HI (match_dup 5) (match_dup 6)))
+ (set (match_operand:HI 0 "register_operand" "")
+ (vec_select:HI (match_dup 7) (parallel [(const_int 0)])))]
+ "TARGET_VX"
+{
+ operands[2] = gen_reg_rtx (V8HImode);
+ operands[3] = gen_reg_rtx (V8HImode);
+ operands[4] = gen_reg_rtx (V8HImode);
+ operands[5] = gen_reg_rtx (V8HImode);
+ operands[6] = gen_reg_rtx (V8HImode);
+ operands[7] = gen_reg_rtx (V8HImode);
+})
+
+(define_expand "reduc_<reduc_bin_insn>_scal_v16qi"
+ [(set (match_dup 2)
+ (unspec:V16QI [(match_operand:V16QI 1 "register_operand")
+ (match_dup 1)
+ (const_int 1)]
+ UNSPEC_VEC_SLDBYTE))
+ (set (match_dup 3)
+ (REDUCBIN:V16QI (match_dup 1) (match_dup 2)))
+ (set (match_dup 4)
+ (unspec:V16QI [(match_dup 3)
+ (match_dup 3)
+ (const_int 2)]
+ UNSPEC_VEC_SLDBYTE))
+ (set (match_dup 5)
+ (REDUCBIN:V16QI (match_dup 3) (match_dup 4)))
+ (set (match_dup 6)
+ (unspec:V16QI [(match_dup 5)
+ (match_dup 5)
+ (const_int 4)]
+ UNSPEC_VEC_SLDBYTE))
+ (set (match_dup 7)
+ (REDUCBIN:V16QI (match_dup 5) (match_dup 6)))
+ (set (match_dup 8)
+ (unspec:V16QI [(match_dup 7)
+ (match_dup 7)
+ (const_int 8)]
+ UNSPEC_VEC_SLDBYTE))
+ (set (match_dup 9)
+ (REDUCBIN:V16QI (match_dup 7) (match_dup 8)))
+ (set (match_operand:QI 0 "register_operand" "")
+ (vec_select:QI (match_dup 9) (parallel [(const_int 0)])))]
+ "TARGET_VX"
+{
+ operands[2] = gen_reg_rtx (V16QImode);
+ operands[3] = gen_reg_rtx (V16QImode);
+ operands[4] = gen_reg_rtx (V16QImode);
+ operands[5] = gen_reg_rtx (V16QImode);
+ operands[6] = gen_reg_rtx (V16QImode);
+ operands[7] = gen_reg_rtx (V16QImode);
+ operands[8] = gen_reg_rtx (V16QImode);
+ operands[9] = gen_reg_rtx (V16QImode);
+})
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-O3 -mzarch -march=z13 -ftree-vectorize -fdump-tree-optimized" } */
+
+#define T(X,N) \
+ unsigned X \
+ reduce_and_##X (unsigned X *in) \
+ { \
+ unsigned X acc = (unsigned X)-1; \
+ for (int i = 0; i < N; i++) \
+ acc &= in[i]; \
+ return acc; \
+ } \
+ unsigned X \
+ reduce_ior_##X (unsigned X *in) \
+ { \
+ unsigned X acc = 0; \
+ for (int i = 0; i < N; i++) \
+ acc |= in[i]; \
+ return acc; \
+ } \
+ unsigned X \
+ redue_xor_##X (unsigned X *in) \
+ { \
+ unsigned X acc = 0; \
+ for (int i = 0; i < N; i++) \
+ acc ^= in[i]; \
+ return acc; \
+ }
+
+T(char,16)
+
+T(short, 8)
+
+T(int,4)
+
+T(long,4)
+
+/* { dg-final { scan-tree-dump-times "\.REDUC_AND" 4 "optimized" } } */
+/* { dg-final { scan-tree-dump-times "\.REDUC_IOR" 4 "optimized" } } */
+/* { dg-final { scan-tree-dump-times "\.REDUC_XOR" 4 "optimized" } } */
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-O3 -mzarch -march=z14 -ftree-vectorize -fdump-tree-optimized" } */
+
+#define MAX(a, b) ((a) > (b) ? (a) : (b))
+#define MIN(a, b) ((a) > (b) ? (b) : (a))
+
+/* unsigned integers */
+
+unsigned char
+reduce_umax_char (unsigned char *p)
+{
+ unsigned char res = p[0];
+ for (int i = 0; i < 16; i++)
+ res = MAX (res, p[i]);
+ return res;
+}
+
+unsigned char
+reduce_umin_char (unsigned char *p)
+{
+ unsigned char res = p[0];
+ for (int i = 0; i < 16; i++)
+ res = MIN (res, p[i]);
+ return res;
+}
+
+unsigned short
+reduce_umax_short (unsigned short *p)
+{
+ unsigned short res = p[0];
+ for (int i = 0; i < 8; i++)
+ res = MAX (res, p[i]);
+ return res;
+}
+
+unsigned short
+reduce_umin_short (unsigned short *p)
+{
+ unsigned short res = p[0];
+ for (int i = 0; i < 8; i++)
+ res = MIN (res, p[i]);
+ return res;
+}
+
+unsigned int
+reduce_umax_int (unsigned int* p)
+{
+ unsigned int res = p[0];
+ for (int i = 0; i != 4; i++)
+ res = MAX (res, p[i]);
+ return res;
+}
+
+unsigned int
+reduce_umin_int (unsigned int* p)
+{
+ unsigned int res = p[0];
+ for (int i = 0; i != 4; i++)
+ res = MIN(res, p[i]);
+ return res;
+}
+
+unsigned long
+reduce_umax_long (unsigned long* p)
+{
+ unsigned long res = p[0];
+ for (int i = 0; i != 4; i++)
+ res = MAX (res, p[i]);
+ return res;
+}
+
+unsigned long
+reduce_umin_long (unsigned long* p)
+{
+ unsigned long res = p[0];
+ for (int i = 0; i != 4; i++)
+ res = MIN(res, p[i]);
+ return res;
+}
+
+/* signed integers */
+
+signed char
+reduce_smax_char (signed char *p)
+{
+ signed char res = p[0];
+ for (int i = 0; i < 16; i++)
+ res = MAX (res, p[i]);
+ return res;
+}
+
+signed char
+reduce_smin_char (signed char *p)
+{
+ signed char res = p[0];
+ for (int i = 0; i < 16; i++)
+ res = MIN (res, p[i]);
+ return res;
+}
+
+signed short
+reduce_smax_short (signed short *p)
+{
+ signed short res = p[0];
+ for (int i = 0; i < 8; i++)
+ res = MAX (res, p[i]);
+ return res;
+}
+
+signed short
+reduce_smin_short (signed short *p)
+{
+ signed short res = p[0];
+ for (int i = 0; i < 8; i++)
+ res = MIN (res, p[i]);
+ return res;
+}
+
+signed int
+reduce_smax_int (signed int* p)
+{
+ signed int res = p[0];
+ for (int i = 0; i != 4; i++)
+ res = MAX (res, p[i]);
+ return res;
+}
+
+signed int
+reduce_smin_int (signed int* p)
+{
+ signed int res = p[0];
+ for (int i = 0; i != 4; i++)
+ res = MIN(res, p[i]);
+ return res;
+}
+
+signed long
+reduce_smax_long (signed long* p)
+{
+ signed long res = p[0];
+ for (int i = 0; i != 4; i++)
+ res = MAX (res, p[i]);
+ return res;
+}
+
+signed long
+reduce_smin_long (signed long* p)
+{
+ signed long res = p[0];
+ for (int i = 0; i != 4; i++)
+ res = MIN(res, p[i]);
+ return res;
+}
+
+float
+__attribute__((optimize("Ofast")))
+reduce_smax_float (float* p)
+{
+ float res = p[0];
+ for (int i = 0; i != 4; i++)
+ res = MAX (res, p[i]);
+ return res;
+}
+
+float
+__attribute__((optimize("Ofast")))
+reduce_smin_float (float* p)
+{
+ float res = p[0];
+ for (int i = 0; i != 4; i++)
+ res = MIN (res, p[i]);
+ return res;
+}
+
+double
+__attribute__((optimize("Ofast")))
+reduce_smax_double (double* p)
+{
+ double res = p[0];
+ for (int i = 0; i != 4; i++)
+ res = MAX (res, p[i]);
+ return res;
+}
+
+double
+__attribute__((optimize("Ofast")))
+reduce_smin_double (double* p)
+{
+ double res = p[0];
+ for (int i = 0; i != 4; i++)
+ res = MIN (res, p[i]);
+ return res;
+}
+
+float
+reduce_fmax_float (float* p)
+{
+ float res = p[0];
+ for (int i = 0; i != 4; i++)
+ res = __builtin_fmaxf (res, p[i]);
+ return res;
+}
+
+float
+reduce_fmin_float (float* p)
+{
+ float res = p[0];
+ for (int i = 0; i != 4; i++)
+ res = __builtin_fminf (res, p[i]);
+ return res;
+}
+
+double
+reduce_fmax_double (double* p)
+{
+ double res = p[0];
+ for (int i = 0; i != 4; i++)
+ res = __builtin_fmax (res, p[i]);
+ return res;
+}
+
+double
+reduce_fmin_double (double* p)
+{
+ double res = p[0];
+ for (int i = 0; i != 4; i++)
+ res = __builtin_fmin (res, p[i]);
+ return res;
+}
+
+/* { dg-final { scan-tree-dump-times "\.REDUC_MAX" 10 "optimized" } } */
+/* { dg-final { scan-tree-dump-times "\.REDUC_MIN" 10 "optimized" } } */
+/* { dg-final { scan-tree-dump-times "\.REDUC_FMAX" 2 "optimized" } } */
+/* { dg-final { scan-tree-dump-times "\.REDUC_FMIN" 2 "optimized" } } */
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-O3 -mzarch -march=z14 -ftree-vectorize -fdump-tree-optimized" } */
+/* { dg-do run { target { s390_z14_hw } } } */
+
+/* signed integers */
+
+signed char
+__attribute__((noipa, optimize("Ofast")))
+reduce_add_char (signed char* p)
+{
+ signed char sum = 0;
+ for (int i = 0; i != 16; i++)
+ sum += p[i];
+ return sum;
+}
+
+short
+__attribute__((noipa, optimize("Ofast")))
+reduce_add_short (short* p)
+{
+ short sum = 0;
+ for (int i = 0; i != 16; i++)
+ sum += p[i];
+ return sum;
+}
+
+int
+__attribute__((noipa, optimize("Ofast")))
+reduce_add_int (int* p)
+{
+ int sum = 0;
+ for (int i = 0; i != 16; i++)
+ sum += p[i];
+ return sum;
+}
+
+long
+__attribute__((noipa, optimize("Ofast")))
+reduce_add_long (long* p)
+{
+ long sum = 0;
+ for (int i = 0; i != 16; i++)
+ sum += p[i];
+ return sum;
+}
+
+/* unsigned integers */
+
+unsigned char
+__attribute__((noipa, optimize("Ofast")))
+reduce_add_uchar (unsigned char* p)
+{
+ unsigned char sum = 0;
+ for (int i = 0; i != 16; i++)
+ sum += p[i];
+ return sum;
+}
+
+unsigned short
+__attribute__((noipa, optimize("Ofast")))
+reduce_add_ushort (unsigned short* p)
+{
+ unsigned short sum = 0;
+ for (int i = 0; i != 16; i++)
+ sum += p[i];
+ return sum;
+}
+
+unsigned int
+__attribute__((noipa, optimize("Ofast")))
+reduce_add_uint (unsigned int* p)
+{
+ unsigned int sum = 0;
+ for (int i = 0; i != 16; i++)
+ sum += p[i];
+ return sum;
+}
+
+unsigned long
+__attribute__((noipa, optimize("Ofast")))
+reduce_add_ulong (unsigned long* p)
+{
+ unsigned long sum = 0;
+ for (int i = 0; i != 16; i++)
+ sum += p[i];
+ return sum;
+}
+
+/* floating point */
+
+float
+__attribute__((noipa, optimize("Ofast")))
+reduce_add_float (float* p)
+{
+ float sum = 0;
+ for (int i = 0; i != 16; i++)
+ sum += p[i];
+ return sum;
+}
+
+double
+__attribute__((noipa, optimize("Ofast")))
+reduce_add_double (double* p)
+{
+ double sum = 0;
+ for (int i = 0; i != 16; i++)
+ sum += p[i];
+ return sum;
+}
+
+int
+main()
+{
+ signed char chararr[] = {-1,-2,-3,-4,-5,-6,-7,-8,-9,-10,-11,-12,-13,-14,-15,-16};
+ signed short shortarr[] = {-1,-2,-3,-4,-5,-6,-7,-8,-9,-10,-11,-12,-13,-14,-15,-16};
+ signed int intarr[] = {-1,-2,-3,-4,-5,-6,-7,-8,-9,-10,-11,-12,-13,-14,-15,-16};
+ signed long longarr[] = {-1,-2,-3,-4,-5,-6,-7,-8,-9,-10,-11,-12,-13,-14,-15,-16};
+
+ unsigned char uchararr[] = {1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16};
+ unsigned short ushortarr[] = {1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16};
+ unsigned int uintarr[] = {1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16};
+ unsigned long ulongarr[] = {1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16};
+
+ float floatarr[] = {1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16};
+ double doublearr[] = {-1,-2,-3,-4,-5,-6,-7,-8,-9,-10,-11,-12,-13,-14,-15,-16};
+
+ if (reduce_add_char (chararr) != (-136 & 0xff))
+ __builtin_abort();
+ if (reduce_add_short (shortarr) != -136)
+ __builtin_abort();
+ if (reduce_add_int (intarr) != -136)
+ __builtin_abort();
+ if (reduce_add_long (longarr) != -136)
+ __builtin_abort();
+
+ if (reduce_add_uchar (uchararr) != 136)
+ __builtin_abort();
+ if (reduce_add_ushort (ushortarr) != 136)
+ __builtin_abort();
+ if (reduce_add_uint (uintarr) != 136)
+ __builtin_abort();
+ if (reduce_add_ulong (ulongarr) != 136)
+ __builtin_abort();
+
+ if (reduce_add_float (floatarr) != 136)
+ __builtin_abort();
+ if (reduce_add_double (doublearr) != -136)
+ __builtin_abort();
+ return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "\.REDUC_PLUS" 10 "optimized" } } */
|| [istarget amdgcn-*-*]
|| [check_effective_target_riscv_v]
|| [check_effective_target_loongarch_sx]
- || [check_effective_target_x86]}]
+ || [check_effective_target_x86]
+ || [check_effective_target_s390_vx]}]
}
# Return 1 if the target supports the fold_extract_last optab.