]> git.ipfire.org Git - thirdparty/gcc.git/commitdiff
s390: Add some missing vector patterns.
authorJuergen Christ <jchrist@linux.ibm.com>
Mon, 23 Jun 2025 10:04:20 +0000 (12:04 +0200)
committerJuergen Christ <jchrist@linux.ibm.com>
Mon, 7 Jul 2025 11:10:26 +0000 (13:10 +0200)
Some patterns that are detected by the autovectorizer can be supported by
s390.  Add expanders such that autovectorization of these patterns works.

RTL for the builtins used unspec to represent highpart multiplication.
Replace this by the correct RTL to allow further simplification.

gcc/ChangeLog:

* config/s390/s390.md: Removed unused unspecs.
* config/s390/vector.md (avg<mode>3_ceil): New expander.
(uavg<mode>3_ceil): New expander.
(smul<mode>3_highpart): New expander.
(umul<mode>3_highpart): New expander.
* config/s390/vx-builtins.md (vec_umulh<mode>): Remove unspec.
(vec_smulh<mode>): Remove unspec.

gcc/testsuite/ChangeLog:

* gcc.target/s390/vector/pattern-avg-1.c: New test.
* gcc.target/s390/vector/pattern-mulh-1.c: New test.

Signed-off-by: Juergen Christ <jchrist@linux.ibm.com>
gcc/config/s390/s390.md
gcc/config/s390/vector.md
gcc/config/s390/vx-builtins.md
gcc/testsuite/gcc.target/s390/vector/pattern-avg-1.c [new file with mode: 0644]
gcc/testsuite/gcc.target/s390/vector/pattern-mulh-1.c [new file with mode: 0644]

index 97a4bdf96b2ddd7b9e7f2b41e022a4616938bbc7..440ce93574f4b934d26e430e651914c6dcd8505d 100644 (file)
    UNSPEC_LCBB
 
    ; Vector
-   UNSPEC_VEC_SMULT_HI
-   UNSPEC_VEC_UMULT_HI
-   UNSPEC_VEC_SMULT_LO
    UNSPEC_VEC_SMULT_EVEN
    UNSPEC_VEC_UMULT_EVEN
    UNSPEC_VEC_SMULT_ODD
index 7251a76c3aeaf581d8ae86ee799d55ce7b2b67ad..7c706ecd89c728bda4241f20e992bea640ea88f2 100644 (file)
 ; vec_unpacks_float_lo
 ; vec_unpacku_float_hi
 ; vec_unpacku_float_lo
+
+(define_expand "avg<mode>3_ceil"
+  [(set (match_operand:VIT_HW_VXE3_T                        0 "register_operand")
+       (unspec:VIT_HW_VXE3_T [(match_operand:VIT_HW_VXE3_T 1 "register_operand")
+                              (match_operand:VIT_HW_VXE3_T 2 "register_operand")]
+                             UNSPEC_VEC_AVG))]
+  "TARGET_VX")
+
+(define_expand "uavg<mode>3_ceil"
+  [(set (match_operand:VIT_HW_VXE3_T                        0 "register_operand")
+       (unspec:VIT_HW_VXE3_T [(match_operand:VIT_HW_VXE3_T 1 "register_operand")
+                              (match_operand:VIT_HW_VXE3_T 2 "register_operand")]
+                             UNSPEC_VEC_AVGU))]
+  "TARGET_VX")
+
+(define_expand "smul<mode>3_highpart"
+  [(set (match_operand:VIT_HW_VXE3_DT 0 "register_operand")
+       (smul_highpart:VIT_HW_VXE3_DT (match_operand:VIT_HW_VXE3_DT 1 "register_operand")
+                                     (match_operand:VIT_HW_VXE3_DT 2 "register_operand")))]
+  "TARGET_VX")
+
+(define_expand "umul<mode>3_highpart"
+  [(set (match_operand:VIT_HW_VXE3_DT 0 "register_operand")
+       (umul_highpart:VIT_HW_VXE3_DT (match_operand:VIT_HW_VXE3_DT 1 "register_operand")
+                                     (match_operand:VIT_HW_VXE3_DT 2 "register_operand")))]
+  "TARGET_VX")
index a7bb7ff92f5e71cb2d5dc23a32caf7cb485c12a4..9e5d18bcb8f474332cebdbc9f42cee48120c1ea1 100644 (file)
 
 ; vmhb, vmhh, vmhf, vmhg, vmhq
 (define_insn "vec_smulh<mode>"
-  [(set (match_operand:VIT_HW_VXE3_DT 0 "register_operand"                       "=v")
-       (unspec:VIT_HW_VXE3_DT [(match_operand:VIT_HW_VXE3_DT 1 "register_operand" "v")
-                               (match_operand:VIT_HW_VXE3_DT 2 "register_operand" "v")]
-                              UNSPEC_VEC_SMULT_HI))]
+  [(set (match_operand:VIT_HW_VXE3_DT 0 "register_operand"                              "=v")
+       (smul_highpart:VIT_HW_VXE3_DT (match_operand:VIT_HW_VXE3_DT 1 "register_operand" "v")
+                                     (match_operand:VIT_HW_VXE3_DT 2 "register_operand" "v")))]
   "TARGET_VX"
   "vmh<bhfgq>\t%v0,%v1,%v2"
   [(set_attr "op_type" "VRR")])
 
 ; vmlhb, vmlhh, vmlhf, vmlhg, vmlhq
 (define_insn "vec_umulh<mode>"
-  [(set (match_operand:VIT_HW_VXE3_DT 0 "register_operand"                       "=v")
-       (unspec:VIT_HW_VXE3_DT [(match_operand:VIT_HW_VXE3_DT 1 "register_operand" "v")
-                               (match_operand:VIT_HW_VXE3_DT 2 "register_operand" "v")]
-                              UNSPEC_VEC_UMULT_HI))]
+  [(set (match_operand:VIT_HW_VXE3_DT 0 "register_operand"                              "=v")
+       (umul_highpart:VIT_HW_VXE3_DT (match_operand:VIT_HW_VXE3_DT 1 "register_operand" "v")
+                                     (match_operand:VIT_HW_VXE3_DT 2 "register_operand" "v")))]
   "TARGET_VX"
   "vmlh<bhfgq>\t%v0,%v1,%v2"
   [(set_attr "op_type" "VRR")])
diff --git a/gcc/testsuite/gcc.target/s390/vector/pattern-avg-1.c b/gcc/testsuite/gcc.target/s390/vector/pattern-avg-1.c
new file mode 100644 (file)
index 0000000..a15301a
--- /dev/null
@@ -0,0 +1,26 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -mzarch -march=z16 -ftree-vectorize -fdump-tree-optimized" } */
+
+#define TEST(T1,T2,N)                                                   \
+  void                                                                  \
+  avg##T1 (signed T1 *__restrict res, signed T1 *__restrict a,          \
+           signed T1 *__restrict b)                                     \
+  {                                                                     \
+    for (int i = 0; i < N; ++i)                                         \
+      res[i] = ((signed T2)a[i] + b[i] + 1) >> 1;                       \
+  }                                                                     \
+                                                                        \
+  void                                                                  \
+  uavg##T1 (unsigned T1 *__restrict res, unsigned T1 *__restrict a,     \
+            unsigned T1 *__restrict b)                                  \
+  {                                                                     \
+    for (int i = 0; i < N; ++i)                                         \
+      res[i] = ((unsigned T2)a[i] + b[i] + 1) >> 1;                     \
+  }
+
+TEST(char,short,16)
+TEST(short,int,8)
+TEST(int,long,4)
+TEST(long,__int128,2)
+
+/* { dg-final { scan-tree-dump-times "\.AVG_CEIL" 8 "optimized" } } */
diff --git a/gcc/testsuite/gcc.target/s390/vector/pattern-mulh-1.c b/gcc/testsuite/gcc.target/s390/vector/pattern-mulh-1.c
new file mode 100644 (file)
index 0000000..cd8e4e7
--- /dev/null
@@ -0,0 +1,29 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -mzarch -march=arch15 -ftree-vectorize -fdump-tree-optimized" } */
+
+#define TEST(T1,T2,N,S)                                                 \
+  void                                                                  \
+  mulh##T1 (signed T1 *__restrict res,                                  \
+            signed T1 *__restrict l,                                    \
+            signed T1 *__restrict r)                                    \
+  {                                                                     \
+    for (int i = 0; i < N; ++i)                                         \
+      res[i] = (signed T1) (((signed T2)l[i] * (signed T2)r[i]) >> S);  \
+  }                                                                     \
+                                                                        \
+  void                                                                  \
+  umulh##T1 (unsigned T1 *__restrict res,                               \
+             unsigned T1 *__restrict l,                                 \
+             unsigned T1 *__restrict r)                                 \
+  {                                                                     \
+    for (int i = 0; i < N; ++i)                                         \
+      res[i] = (unsigned T1)                                            \
+        (((unsigned T2)l[i] * (unsigned T2)r[i]) >> S);                 \
+  }
+
+TEST(char,short,16,8)
+TEST(short,int,8,16)
+TEST(int,long,4,32)
+TEST(long,__int128,2,64)
+
+/* { dg-final { scan-tree-dump-times "\.MULH" 8 "optimized" } } */