]> git.ipfire.org Git - thirdparty/gcc.git/commitdiff
Add pattern match in match.pd for .AVG_CEIL
authorliuhongt <hongtao.liu@intel.com>
Wed, 26 Feb 2025 06:48:27 +0000 (22:48 -0800)
committerliuhongt <hongtao.liu@intel.com>
Wed, 21 May 2025 00:33:10 +0000 (17:33 -0700)
1) Optimize (a >> 1) + (b >> 1) + ((a | b) & 1) to .AVG_CEIL (a, b)
2) Optimize (a | b) - ((a ^ b) >> 1) to .AVG_CEIL (a, b)

gcc/ChangeLog:

PR middle-end/118994
* match.pd ((a >> 1) + (b >> 1) + ((a | b) & 1) to
.AVG_CEIL (a, b)): New pattern.
((a | b) - ((a ^ b) >> 1) to .AVG_CEIL (a, b)): Ditto.

gcc/testsuite/ChangeLog:

* gcc.target/i386/pr118994-1.c: New test.
* gcc.target/i386/pr118994-2.c: New test.

gcc/match.pd
gcc/testsuite/gcc.target/i386/pr118994-1.c [new file with mode: 0644]
gcc/testsuite/gcc.target/i386/pr118994-2.c [new file with mode: 0644]

index 3b827df4cd6cc173961d421fc4ecb7525dc20f51..27f662f9714bf272af36e7398e88513d78eb1b01 100644 (file)
@@ -11486,3 +11486,26 @@ and,
       }
       (if (full_perm_p)
        (vec_perm (op@3 @0 @1) @3 @2))))))
+
+#if GIMPLE
+/* Simplify (a >> 1) + (b >> 1) + ((a | b) & 1) to .AVG_CEIL (a, b).
+   Similar for (a | b) - ((a ^ b) >> 1).  */
+
+(simplify
+  (plus:c
+    (plus (rshift @0 integer_onep@1) (rshift @2 @1))
+    (bit_and (bit_ior @0 @2) integer_onep@3))
+  (if (cfun && (cfun->curr_properties & PROP_last_full_fold) != 0
+      && VECTOR_TYPE_P (type)
+      && direct_internal_fn_supported_p (IFN_AVG_CEIL, type, OPTIMIZE_FOR_BOTH))
+      (IFN_AVG_CEIL @0 @2)))
+
+(simplify
+  (minus
+    (bit_ior @0 @2)
+    (rshift (bit_xor @0 @2) integer_onep@1))
+  (if (cfun && (cfun->curr_properties & PROP_last_full_fold) != 0
+      && VECTOR_TYPE_P (type)
+      && direct_internal_fn_supported_p (IFN_AVG_CEIL, type, OPTIMIZE_FOR_BOTH))
+      (IFN_AVG_CEIL @0 @2)))
+#endif
diff --git a/gcc/testsuite/gcc.target/i386/pr118994-1.c b/gcc/testsuite/gcc.target/i386/pr118994-1.c
new file mode 100644 (file)
index 0000000..5f40aba
--- /dev/null
@@ -0,0 +1,37 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512bw -mavx512vl -O2 -fdump-tree-optimized" } */
+/* { dg-final { scan-tree-dump-times "\.AVG_CEIL" 6 "optimized"} } */
+
+#define VecRoundingAvg(a, b) ((a >> 1) + (b >> 1) + ((a | b) & 1))
+
+typedef unsigned char GccU8x16Vec __attribute__((__vector_size__(16)));
+typedef unsigned short GccU16x8Vec __attribute__((__vector_size__(16)));
+typedef unsigned char GccU8x32Vec __attribute__((__vector_size__(32)));
+typedef unsigned short GccU16x16Vec __attribute__((__vector_size__(32)));
+typedef unsigned char GccU8x64Vec __attribute__((__vector_size__(64)));
+typedef unsigned short GccU16x32Vec __attribute__((__vector_size__(64)));
+
+GccU8x16Vec U8x16VecRoundingAvg(GccU8x16Vec a, GccU8x16Vec b) {
+  return VecRoundingAvg(a, b);
+}
+
+GccU16x8Vec U16x8VecRoundingAvg(GccU16x8Vec a, GccU16x8Vec b) {
+  return VecRoundingAvg(a, b);
+}
+
+GccU8x32Vec U8x32VecRoundingAvg(GccU8x32Vec a, GccU8x32Vec b) {
+  return VecRoundingAvg(a, b);
+}
+
+GccU16x16Vec U16x16VecRoundingAvg(GccU16x16Vec a, GccU16x16Vec b) {
+  return VecRoundingAvg(a, b);
+}
+
+GccU8x64Vec U8x64VecRoundingAvg(GccU8x64Vec a, GccU8x64Vec b) {
+  return VecRoundingAvg(a, b);
+}
+
+GccU16x32Vec U16x32VecRoundingAvg(GccU16x32Vec a, GccU16x32Vec b) {
+  return VecRoundingAvg(a, b);
+}
+
diff --git a/gcc/testsuite/gcc.target/i386/pr118994-2.c b/gcc/testsuite/gcc.target/i386/pr118994-2.c
new file mode 100644 (file)
index 0000000..ba90e0a
--- /dev/null
@@ -0,0 +1,37 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512bw -mavx512vl -O2 -fdump-tree-optimized" } */
+/* { dg-final { scan-tree-dump-times "\.AVG_CEIL" 6 "optimized"} } */
+
+#define VecRoundingAvg(a, b) ((a | b) - ((a ^ b) >> 1))
+
+typedef unsigned char GccU8x16Vec __attribute__((__vector_size__(16)));
+typedef unsigned short GccU16x8Vec __attribute__((__vector_size__(16)));
+typedef unsigned char GccU8x32Vec __attribute__((__vector_size__(32)));
+typedef unsigned short GccU16x16Vec __attribute__((__vector_size__(32)));
+typedef unsigned char GccU8x64Vec __attribute__((__vector_size__(64)));
+typedef unsigned short GccU16x32Vec __attribute__((__vector_size__(64)));
+
+GccU8x16Vec U8x16VecRoundingAvg(GccU8x16Vec a, GccU8x16Vec b) {
+  return VecRoundingAvg(a, b);
+}
+
+GccU16x8Vec U16x8VecRoundingAvg(GccU16x8Vec a, GccU16x8Vec b) {
+  return VecRoundingAvg(a, b);
+}
+
+GccU8x32Vec U8x32VecRoundingAvg(GccU8x32Vec a, GccU8x32Vec b) {
+  return VecRoundingAvg(a, b);
+}
+
+GccU16x16Vec U16x16VecRoundingAvg(GccU16x16Vec a, GccU16x16Vec b) {
+  return VecRoundingAvg(a, b);
+}
+
+GccU8x64Vec U8x64VecRoundingAvg(GccU8x64Vec a, GccU8x64Vec b) {
+  return VecRoundingAvg(a, b);
+}
+
+GccU16x32Vec U16x32VecRoundingAvg(GccU16x32Vec a, GccU16x32Vec b) {
+  return VecRoundingAvg(a, b);
+}
+