From 188acc9e8bacdbba56ed2b32d09f191da759500a Mon Sep 17 00:00:00 2001 From: Juergen Christ Date: Mon, 23 Jun 2025 12:04:20 +0200 Subject: [PATCH] s390: Add some missing vector patterns. Some patterns that are detected by the autovectorizer can be supported by s390. Add expanders such that autovectorization of these patterns works. RTL for the builtins used unspec to represent highpart multiplication. Replace this by the correct RTL to allow further simplification. gcc/ChangeLog: * config/s390/s390.md: Removed unused unspecs. * config/s390/vector.md (avg3_ceil): New expander. (uavg3_ceil): New expander. (smul3_highpart): New expander. (umul3_highpart): New expander. * config/s390/vx-builtins.md (vec_umulh): Remove unspec. (vec_smulh): Remove unspec. gcc/testsuite/ChangeLog: * gcc.target/s390/vector/pattern-avg-1.c: New test. * gcc.target/s390/vector/pattern-mulh-1.c: New test. Signed-off-by: Juergen Christ --- gcc/config/s390/s390.md | 3 -- gcc/config/s390/vector.md | 26 +++++++++++++++++ gcc/config/s390/vx-builtins.md | 14 ++++----- .../gcc.target/s390/vector/pattern-avg-1.c | 26 +++++++++++++++++ .../gcc.target/s390/vector/pattern-mulh-1.c | 29 +++++++++++++++++++ 5 files changed, 87 insertions(+), 11 deletions(-) create mode 100644 gcc/testsuite/gcc.target/s390/vector/pattern-avg-1.c create mode 100644 gcc/testsuite/gcc.target/s390/vector/pattern-mulh-1.c diff --git a/gcc/config/s390/s390.md b/gcc/config/s390/s390.md index 97a4bdf96b2..440ce93574f 100644 --- a/gcc/config/s390/s390.md +++ b/gcc/config/s390/s390.md @@ -139,9 +139,6 @@ UNSPEC_LCBB ; Vector - UNSPEC_VEC_SMULT_HI - UNSPEC_VEC_UMULT_HI - UNSPEC_VEC_SMULT_LO UNSPEC_VEC_SMULT_EVEN UNSPEC_VEC_UMULT_EVEN UNSPEC_VEC_SMULT_ODD diff --git a/gcc/config/s390/vector.md b/gcc/config/s390/vector.md index 7251a76c3ae..7c706ecd89c 100644 --- a/gcc/config/s390/vector.md +++ b/gcc/config/s390/vector.md @@ -3576,3 +3576,29 @@ ; vec_unpacks_float_lo ; vec_unpacku_float_hi ; vec_unpacku_float_lo + +(define_expand "avg3_ceil" + [(set (match_operand:VIT_HW_VXE3_T 0 "register_operand") + (unspec:VIT_HW_VXE3_T [(match_operand:VIT_HW_VXE3_T 1 "register_operand") + (match_operand:VIT_HW_VXE3_T 2 "register_operand")] + UNSPEC_VEC_AVG))] + "TARGET_VX") + +(define_expand "uavg3_ceil" + [(set (match_operand:VIT_HW_VXE3_T 0 "register_operand") + (unspec:VIT_HW_VXE3_T [(match_operand:VIT_HW_VXE3_T 1 "register_operand") + (match_operand:VIT_HW_VXE3_T 2 "register_operand")] + UNSPEC_VEC_AVGU))] + "TARGET_VX") + +(define_expand "smul3_highpart" + [(set (match_operand:VIT_HW_VXE3_DT 0 "register_operand") + (smul_highpart:VIT_HW_VXE3_DT (match_operand:VIT_HW_VXE3_DT 1 "register_operand") + (match_operand:VIT_HW_VXE3_DT 2 "register_operand")))] + "TARGET_VX") + +(define_expand "umul3_highpart" + [(set (match_operand:VIT_HW_VXE3_DT 0 "register_operand") + (umul_highpart:VIT_HW_VXE3_DT (match_operand:VIT_HW_VXE3_DT 1 "register_operand") + (match_operand:VIT_HW_VXE3_DT 2 "register_operand")))] + "TARGET_VX") diff --git a/gcc/config/s390/vx-builtins.md b/gcc/config/s390/vx-builtins.md index a7bb7ff92f5..9e5d18bcb8f 100644 --- a/gcc/config/s390/vx-builtins.md +++ b/gcc/config/s390/vx-builtins.md @@ -982,20 +982,18 @@ ; vmhb, vmhh, vmhf, vmhg, vmhq (define_insn "vec_smulh" - [(set (match_operand:VIT_HW_VXE3_DT 0 "register_operand" "=v") - (unspec:VIT_HW_VXE3_DT [(match_operand:VIT_HW_VXE3_DT 1 "register_operand" "v") - (match_operand:VIT_HW_VXE3_DT 2 "register_operand" "v")] - UNSPEC_VEC_SMULT_HI))] + [(set (match_operand:VIT_HW_VXE3_DT 0 "register_operand" "=v") + (smul_highpart:VIT_HW_VXE3_DT (match_operand:VIT_HW_VXE3_DT 1 "register_operand" "v") + (match_operand:VIT_HW_VXE3_DT 2 "register_operand" "v")))] "TARGET_VX" "vmh\t%v0,%v1,%v2" [(set_attr "op_type" "VRR")]) ; vmlhb, vmlhh, vmlhf, vmlhg, vmlhq (define_insn "vec_umulh" - [(set (match_operand:VIT_HW_VXE3_DT 0 "register_operand" "=v") - (unspec:VIT_HW_VXE3_DT [(match_operand:VIT_HW_VXE3_DT 1 "register_operand" "v") - (match_operand:VIT_HW_VXE3_DT 2 "register_operand" "v")] - UNSPEC_VEC_UMULT_HI))] + [(set (match_operand:VIT_HW_VXE3_DT 0 "register_operand" "=v") + (umul_highpart:VIT_HW_VXE3_DT (match_operand:VIT_HW_VXE3_DT 1 "register_operand" "v") + (match_operand:VIT_HW_VXE3_DT 2 "register_operand" "v")))] "TARGET_VX" "vmlh\t%v0,%v1,%v2" [(set_attr "op_type" "VRR")]) diff --git a/gcc/testsuite/gcc.target/s390/vector/pattern-avg-1.c b/gcc/testsuite/gcc.target/s390/vector/pattern-avg-1.c new file mode 100644 index 00000000000..a15301aabe5 --- /dev/null +++ b/gcc/testsuite/gcc.target/s390/vector/pattern-avg-1.c @@ -0,0 +1,26 @@ +/* { dg-do compile } */ +/* { dg-options "-O3 -mzarch -march=z16 -ftree-vectorize -fdump-tree-optimized" } */ + +#define TEST(T1,T2,N) \ + void \ + avg##T1 (signed T1 *__restrict res, signed T1 *__restrict a, \ + signed T1 *__restrict b) \ + { \ + for (int i = 0; i < N; ++i) \ + res[i] = ((signed T2)a[i] + b[i] + 1) >> 1; \ + } \ + \ + void \ + uavg##T1 (unsigned T1 *__restrict res, unsigned T1 *__restrict a, \ + unsigned T1 *__restrict b) \ + { \ + for (int i = 0; i < N; ++i) \ + res[i] = ((unsigned T2)a[i] + b[i] + 1) >> 1; \ + } + +TEST(char,short,16) +TEST(short,int,8) +TEST(int,long,4) +TEST(long,__int128,2) + +/* { dg-final { scan-tree-dump-times "\.AVG_CEIL" 8 "optimized" } } */ diff --git a/gcc/testsuite/gcc.target/s390/vector/pattern-mulh-1.c b/gcc/testsuite/gcc.target/s390/vector/pattern-mulh-1.c new file mode 100644 index 00000000000..cd8e4e7d7a0 --- /dev/null +++ b/gcc/testsuite/gcc.target/s390/vector/pattern-mulh-1.c @@ -0,0 +1,29 @@ +/* { dg-do compile } */ +/* { dg-options "-O3 -mzarch -march=arch15 -ftree-vectorize -fdump-tree-optimized" } */ + +#define TEST(T1,T2,N,S) \ + void \ + mulh##T1 (signed T1 *__restrict res, \ + signed T1 *__restrict l, \ + signed T1 *__restrict r) \ + { \ + for (int i = 0; i < N; ++i) \ + res[i] = (signed T1) (((signed T2)l[i] * (signed T2)r[i]) >> S); \ + } \ + \ + void \ + umulh##T1 (unsigned T1 *__restrict res, \ + unsigned T1 *__restrict l, \ + unsigned T1 *__restrict r) \ + { \ + for (int i = 0; i < N; ++i) \ + res[i] = (unsigned T1) \ + (((unsigned T2)l[i] * (unsigned T2)r[i]) >> S); \ + } + +TEST(char,short,16,8) +TEST(short,int,8,16) +TEST(int,long,4,32) +TEST(long,__int128,2,64) + +/* { dg-final { scan-tree-dump-times "\.MULH" 8 "optimized" } } */ -- 2.47.3