From: Uros Bizjak <ubizjak@gmail.com>
Date: Thu, 30 Oct 2025 07:59:32 +0000 (+0100)
Subject: 386: Make better use of overflow flags in codegen of min/max(a,add/sub(a, b)) [PR116815]
X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=c45a8fb11ae0c57bca052765742df6c8ad91f164;p=thirdparty%2Fgcc.git

386: Make better use of overflow flags in codegen of min/max(a,add/sub(a, b)) [PR116815]

Fold the following patterns:
- umax (a, add (a, b)) -> [sum,  ovf] = add (a, b); ovf ? a : sum
- umin (a, add (a, b)) -> [sum,  ovf] = add (a, b); ovf ? sum : a
- umax (a, sub (a, b)) -> [diff, udf] = sub (a, b); udf ? diff : a
- umin (a, sub (a, b)) -> [diff, udf] = sub (a, b); udf ? a : diff

Where ovf/udf is the carry flag that represents overflow in case of
add and underflow in case of sub.

Co-developed-by: Dhruv Chawla <dhruvc@nvidia.com>

	PR target/116815

gcc/ChangeLog:

	* config/i386/i386.md (ovf_add_cmp): New code attribute.
	(udf_sub_cmp): Ditto.
	(ovf_comm): New int iterator.
	(*plus_within_<code><mode>3_<ovf_comm>): New insn and split pattern.
	(*minus_within_<code><mode>3): Ditto.

gcc/testsuite/ChangeLog:

	* gcc.dg/pr116815.c: New test.
	* gcc.target/i386/pr116815.c: New test.
---

diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index 218377a1770..b812d8b3823 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -27353,6 +27353,72 @@
 	(match_dup 0))]
   "peep2_reg_dead_p (2, operands[0])"
   [(set (match_dup 2) (match_dup 1))])
+
+;; umax (a, add (a, b)) => [sum, ovf] = add (a, b); ovf ? a : sum
+;; umin (a, add (a, b)) => [sum, ovf] = add (a, b); ovf ? sum : a
+
+(define_code_attr ovf_add_cmp [(umax "geu") (umin "ltu")])
+
+(define_int_iterator ovf_comm [1 2])
+
+(define_insn_and_split "*plus_within_<code><mode>3_<ovf_comm>"
+  [(set (match_operand:SWI248 0 "register_operand")
+	(umaxmin:SWI248
+	  (plus:SWI248 (match_operand:SWI248 1 "nonimmediate_operand")
+		       (match_operand:SWI248 2 "<general_operand>"))
+	  (match_dup ovf_comm)))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_CMOVE
+   && ix86_pre_reload_split ()"
+  "#"
+  "&& 1"
+  [(parallel
+     [(set (reg:CCC FLAGS_REG)
+	   (compare:CCC
+	     (plus:SWI248 (match_dup 1) (match_dup 2))
+	     (match_dup ovf_comm)))
+      (set (match_dup 3)
+	   (plus:SWI248 (match_dup 1) (match_dup 2)))])
+   (set (match_dup 0)
+	(if_then_else:SWI248
+	  (<ovf_add_cmp> (reg:CCC FLAGS_REG) (const_int 0))
+	  (match_dup 3)
+	  (match_dup ovf_comm)))]
+{
+  operands[<ovf_comm>] = force_reg (<MODE>mode, operands[<ovf_comm>]);
+  operands[3] = gen_reg_rtx (<MODE>mode);
+})
+
+;; umax (a, sub (a, b)) => [diff, udf] = sub (a, b); udf ? diff : a
+;; umin (a, sub (a, b)) => [diff, udf] = sub (a, b); udf ? a : diff
+
+(define_code_attr udf_sub_cmp [(umax "ltu") (umin "geu")])
+
+(define_insn_and_split "*minus_within_<code><mode>3"
+  [(set (match_operand:SWI248 0 "register_operand")
+	(umaxmin:SWI248
+	  (minus:SWI248 (match_operand:SWI248 1 "nonimmediate_operand")
+			(match_operand:SWI248 2 "<general_operand>"))
+	  (match_dup 1)))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_CMOVE
+   && ix86_pre_reload_split ()"
+  "#"
+  "&& 1"
+  [(parallel
+     [(set (reg:CC FLAGS_REG)
+	   (compare:CC (match_dup 1) (match_dup 2)))
+      (set (match_dup 3)
+	   (minus:SWI248 (match_dup 1) (match_dup 2)))])
+   (set (match_dup 0)
+	(if_then_else:SWI248
+	  (<udf_sub_cmp> (reg:CC FLAGS_REG) (const_int 0))
+	  (match_dup 3)
+	  (match_dup 1)))]
+{
+  operands[1] = force_reg (<MODE>mode, operands[1]);
+  operands[3] = gen_reg_rtx (<MODE>mode);
+})
 
 ;; Misc patterns (?)
 
diff --git a/gcc/testsuite/gcc.dg/pr116815.c b/gcc/testsuite/gcc.dg/pr116815.c
new file mode 100644
index 00000000000..b5f1330b335
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/pr116815.c
@@ -0,0 +1,57 @@
+/* PR target/116815 */
+/* { dg-do run } */
+/* { dg-options "-O2" } */
+
+[[gnu::always_inline]]
+inline unsigned min (unsigned a, unsigned b)
+{
+  return (a < b) ? a : b;
+}
+
+[[gnu::always_inline]]
+inline unsigned max (unsigned a, unsigned b)
+{
+  return (a > b) ? a : b;
+}
+
+[[gnu::noipa]] unsigned
+umaxadd (unsigned a, unsigned b)
+{
+  return max (a + b, a);
+}
+
+[[gnu::noipa]] unsigned
+umaxsub (unsigned a, unsigned b)
+{
+  return max (a - b, a);
+}
+
+[[gnu::noipa]] unsigned
+uminadd (unsigned a, unsigned b)
+{
+  return min (a + b, a);
+}
+
+[[gnu::noipa]] unsigned
+uminsub (unsigned a, unsigned b)
+{
+  return min (a - b, a);
+}
+
+int
+main ()
+{
+  /* Overflows to 0x30000000.  */
+  if (umaxadd (0x90000000, 0xa0000000) != 0x90000000)
+    __builtin_abort ();
+
+  if (uminadd (0x90000000, 0xa0000000) != 0x30000000)
+    __builtin_abort ();
+
+  /* Underflows to 0x60000000.  */
+  if (umaxsub (0x00000000, 0xa0000000) != 0x60000000)
+    __builtin_abort ();
+
+  if (uminsub (0x00000000, 0xa0000000) != 0x00000000)
+    __builtin_abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/pr116815.c b/gcc/testsuite/gcc.target/i386/pr116815.c
new file mode 100644
index 00000000000..1cd2f72c3b4
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr116815.c
@@ -0,0 +1,31 @@
+/* PR target/116815 */
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+/* { dg-additional-options "-march=pentiumpro" { target ia32 } } */
+
+static inline __attribute__ ((always_inline))
+unsigned max (unsigned a, unsigned b) { return a > b ? a : b; }
+
+static inline __attribute__ ((always_inline))
+unsigned min (unsigned a, unsigned b) { return a < b ? a : b; }
+
+#define OPERATION(op, type, N, exp1, exp2) \
+  unsigned u##op##type##N (unsigned a, unsigned b) { return op (exp1, exp2); }
+
+OPERATION (max, add, 1, a, a + b)
+OPERATION (max, add, 2, a, b + a)
+OPERATION (max, add, 3, a + b, a)
+OPERATION (max, add, 4, b + a, a)
+
+OPERATION (min, add, 1, a, a + b)
+OPERATION (min, add, 2, a, b + a)
+OPERATION (min, add, 3, a + b, a)
+OPERATION (min, add, 4, b + a, a)
+
+OPERATION (max, sub, 1, a, a - b)
+OPERATION (max, sub, 2, a - b, a)
+
+OPERATION (min, sub, 1, a, a - b)
+OPERATION (min, sub, 2, a - b, a)
+
+/* { dg-final { scan-assembler-not "cmp" } } */