match.pd: Allow FNMA fold through conversions

author Abhishek Kaushik <abhishek.kaushik@arm.com>

Thu, 14 May 2026 11:10:35 +0000 (11:10 +0000)

committer Alex Coplan <alex.coplan@arm.com>

Fri, 15 May 2026 09:18:31 +0000 (10:18 +0100)
author Abhishek Kaushik <abhishek.kaushik@arm.com>
Thu, 14 May 2026 11:10:35 +0000 (11:10 +0000)
committer Alex Coplan <alex.coplan@arm.com>
Fri, 15 May 2026 09:18:31 +0000 (10:18 +0100)
diff --git a/gcc/match.pd b/gcc/match.pd

index b037b1a28769632ebb8c28c2dd951744ff977d4f..4ed058f6e18a3a34d96d72c6d1e48d4622a4275f 100644 (file)
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -9927,7 +9927,21 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
    (simplify
     (negate (fmas@3 @0 @1 @2))
     (if (!HONOR_SIGN_DEPENDENT_ROUNDING (type) && single_use (@3))
-    (IFN_FNMS @0 @1 @2))))
+    (IFN_FNMS @0 @1 @2)))
+  (simplify
+   (fmas:c (nop_convert (negate @0)) @1 @2)
+   (with { tree t = TREE_TYPE (@0); }
+    (if ((!ANY_INTEGRAL_TYPE_P (type)
+         || TYPE_UNSIGNED (type)
+         || !TYPE_OVERFLOW_SANITIZED (type))
+         && (!ANY_INTEGRAL_TYPE_P (t)
+             || TYPE_UNSIGNED (t)
+             || !TYPE_OVERFLOW_SANITIZED (type)))
+   /* Move the negation into FNMA only when signed overflow is
+      unobservable for both the outer operation and the inner negate.  */
+     (with { tree utype = unsigned_type_for (type); }
+      (convert (IFN_FNMA (convert:utype @0) (convert:utype @1)
+                        (convert:utype @2))))))))
  
   (simplify
    (IFN_FMS:c (negate @0) @1 @2)
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/fnma_match.c b/gcc/testsuite/gcc.target/aarch64/sve/fnma_match.c

new file mode 100644 (file)

index 0000000..9b6d6fe
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/fnma_match.c
@@ -0,0 +1,59 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=armv9-a -msve-vector-bits=256" } */
+
+typedef __attribute__((__vector_size__(sizeof(int)*8))) signed int v8i;
+typedef __attribute__((__vector_size__(sizeof(int)*8))) unsigned int v8u;
+
+void g(v8i *a, v8i *b, v8u *c)
+{
+  *c = *c - (v8u)(*a * *b);
+}
+
+v8u g_(v8i a, v8i b, v8u c)
+{
+  return c - (v8u)(a * b);
+}
+
+void h(v8u *a, v8u *b, v8i *c)
+{
+  *c = *c - (v8i)(*a * *b);
+}
+
+v8i h_(v8u a, v8u b, v8i c)
+{
+  return c - (v8i)(a * b);
+}
+
+void x(v8u *a, v8u *b, v8i *c)
+{
+  *c = *c + ((v8i)(-*a) * (v8i)*b);
+}
+
+v8i x_(v8u a, v8u b, v8i c)
+{
+  return c + ((v8i)(-a) * (v8i)b);
+}
+
+void y(v8u *a, v8i *b,v8i *c)
+{
+  *c = *c + ((v8i)(-*a) * *b);
+}
+
+v8i y_(v8u a, v8i b, v8i c)
+{
+  return c + ((v8i)(-a) * b);
+}
+
+void z(v8i *a, v8u *b, v8u *c)
+{
+  *c = *c + ((v8u)(-*a) * *b);
+}
+
+v8u z_(v8i a, v8u b, v8u c)
+{
+  return c + ((v8u)(-a) * b);
+}
+
+/* { dg-final { scan-assembler-times "\\tmsb\\t" 5 } } */
+/* { dg-final { scan-assembler-times "\\tmls\\t" 5 } } */
+/* { dg-final { scan-assembler-not "\\tneg\\t" } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pr123897.c b/gcc/testsuite/gcc.target/aarch64/sve/pr123897.c

index d74efabb7f89320ff2d9a651bdcd20e9be061d38..45bc52522a9bb1b60b3a44bcf8b0104d2ab204e1 100644 (file)
--- a/gcc/testsuite/gcc.target/aarch64/sve/pr123897.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/pr123897.c
@@ -13,4 +13,5 @@ void g(v8i *a,v8i *b,v8u *c)
    *c = *c - (v8u)(*a * *b);
  }
  
-/* { dg-final { scan-tree-dump-times "\.FMA" 2 "widening_mul" } } */
+/* { dg-final { scan-tree-dump-times "\.FMA" 1 "widening_mul" } } */
+/* { dg-final { scan-tree-dump-times "\.FNMA" 1 "widening_mul" } } */
author	Abhishek Kaushik <abhishek.kaushik@arm.com>
	Thu, 14 May 2026 11:10:35 +0000 (11:10 +0000)
committer	Alex Coplan <alex.coplan@arm.com>
	Fri, 15 May 2026 09:18:31 +0000 (10:18 +0100)
gcc/match.pd		patch \| blob \| blame \| history
gcc/testsuite/gcc.target/aarch64/sve/fnma_match.c	[new file with mode: 0644]	patch \| blob
gcc/testsuite/gcc.target/aarch64/sve/pr123897.c		patch \| blob \| blame \| history