]> git.ipfire.org Git - thirdparty/gcc.git/commitdiff
PR target/91681: zero_extendditi2 pattern for more optimizations on x86.
authorRoger Sayle <roger@nextmovesoftware.com>
Mon, 25 Jul 2022 16:33:48 +0000 (17:33 +0100)
committerRoger Sayle <roger@nextmovesoftware.com>
Mon, 25 Jul 2022 16:33:48 +0000 (17:33 +0100)
Technically, PR target/91681 has already been resolved; we now recognize the
highpart multiplication at the tree-level, we no longer use the stack, and
we currently generate the same number of instructions as LLVM.  However, it
is still possible to do better, the current x86_64 code to generate a double
word addition of a zero extended operand, looks like:

        xorl    %r11d, %r11d
        addq    %r10, %rax
        adcq    %r11, %rdx

when it's possible (as LLVM does) to use an immediate constant:

        addq    %r10, %rax
        adcq    $0, %rdx

This is implemented by introducing a zero_extendditi2 pattern,
for zero extension from DImode to TImode on TARGET_64BIT that is
split after reload.  With zero extension now visible to combine,
we add two new define_insn_and_split that add/subtract a zero
extended operand in double word mode.  These apply to both 32-bit
and 64-bit code generation, to produce adc $0 and sbb $0.

One consequence of this is that these new patterns interfere with
the optimization that recognizes DW:DI = (HI:SI<<32)+LO:SI as a pair
of register moves, or more accurately the combine splitter no longer
triggers as we're now converting two instructions into two instructions
(not three instructions into two instructions).  This is easily
repaired (and extended to handle TImode) by changing from a pair
of define_split (that handle operand commutativity) to a set of
four define_insn_and_split (again to handle operand commutativity).

2022-07-25  Roger Sayle  <roger@nextmovesoftware.com>
    Uroš Bizjak  <ubizjak@gmail.com>

gcc/ChangeLog
PR target/91681
* config/i386/i386-expand.cc (split_double_concat): A new helper
function for setting a double word value from two word values.
* config/i386/i386-protos.h (split_double_concat): Prototype here.
* config/i386/i386.md (zero_extendditi2): New define_insn_and_split.
(*add<dwi>3_doubleword_zext): New define_insn_and_split.
(*sub<dwi>3_doubleword_zext): New define_insn_and_split.
(*concat<mode><dwi>3_1): New define_insn_and_split replacing
previous define_split for implementing DST = (HI<<32)|LO as
pair of move instructions, setting lopart and hipart.
(*concat<mode><dwi>3_2): Likewise.
(*concat<mode><dwi>3_3): Likewise, where HI is zero_extended.
(*concat<mode><dwi>3_4): Likewise, where HI is zero_extended.

gcc/testsuite/ChangeLog
PR target/91681
* g++.target/i386/pr91681.C: New test case (from the PR).
* gcc.target/i386/pr91681-1.c: New int128 test case.
* gcc.target/i386/pr91681-2.c: Likewise.
* gcc.target/i386/pr91681-3.c: Likewise, but for ia32.

gcc/config/i386/i386-expand.cc
gcc/config/i386/i386-protos.h
gcc/config/i386/i386.md
gcc/testsuite/g++.target/i386/pr91681.C [new file with mode: 0644]
gcc/testsuite/gcc.target/i386/pr91681-1.c [new file with mode: 0644]
gcc/testsuite/gcc.target/i386/pr91681-2.c [new file with mode: 0644]
gcc/testsuite/gcc.target/i386/pr91681-3.c [new file with mode: 0644]

index 40f821e7a11f266ccec179d74bfe1adb20a2b3d5..66d8f28984c85de0a441da1dc82254da0823cbc3 100644 (file)
@@ -165,6 +165,46 @@ split_double_mode (machine_mode mode, rtx operands[],
     }
 }
 
+/* Emit the double word assignment DST = { LO, HI }.  */
+
+void
+split_double_concat (machine_mode mode, rtx dst, rtx lo, rtx hi)
+{
+  rtx dlo, dhi;
+  int deleted_move_count = 0;
+  split_double_mode (mode, &dst, 1, &dlo, &dhi);
+  if (!rtx_equal_p (dlo, hi))
+    {
+      if (!rtx_equal_p (dlo, lo))
+       emit_move_insn (dlo, lo);
+      else
+       deleted_move_count++;
+      if (!rtx_equal_p (dhi, hi))
+       emit_move_insn (dhi, hi);
+      else
+       deleted_move_count++;
+    }
+  else if (!rtx_equal_p (lo, dhi))
+    {
+      if (!rtx_equal_p (dhi, hi))
+       emit_move_insn (dhi, hi);
+      else
+       deleted_move_count++;
+      if (!rtx_equal_p (dlo, lo))
+       emit_move_insn (dlo, lo);
+      else
+       deleted_move_count++;
+    }
+  else if (mode == TImode)
+    emit_insn (gen_swapdi (dlo, dhi));
+  else
+    emit_insn (gen_swapsi (dlo, dhi));
+
+  if (deleted_move_count == 2)
+    emit_note (NOTE_INSN_DELETED);
+}
+
+
 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
    for the target.  */
 
index cf847751ac5cde4880b0af5e914643c2debc501e..e27c14ff78316fe8100b0d48ac85b26913f7df85 100644 (file)
@@ -85,6 +85,7 @@ extern void print_reg (rtx, int, FILE*);
 extern void ix86_print_operand (FILE *, rtx, int);
 
 extern void split_double_mode (machine_mode, rtx[], int, rtx[], rtx[]);
+extern void split_double_concat (machine_mode, rtx, rtx lo, rtx);
 
 extern const char *output_set_got (rtx, rtx);
 extern const char *output_387_binary_op (rtx_insn *, rtx*);
index 9aaeb695f0fcdaed89409cc2f15faf81541c37f3..fab6aed5e42598cae2c211033c8b48d433961b55 100644 (file)
 
 ;; Zero extension instructions
 
+(define_insn_and_split "zero_extendditi2"
+  [(set (match_operand:TI 0 "nonimmediate_operand" "=r,o")
+       (zero_extend:TI (match_operand:DI 1 "nonimmediate_operand" "rm,r")))]
+  "TARGET_64BIT"
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 3) (match_dup 1))
+   (set (match_dup 4) (const_int 0))]
+  "split_double_mode (TImode, &operands[0], 1, &operands[3], &operands[4]);")
+
 (define_expand "zero_extendsidi2"
   [(set (match_operand:DI 0 "nonimmediate_operand")
        (zero_extend:DI (match_operand:SI 1 "nonimmediate_operand")))])
     }
 })
 
+(define_insn_and_split "*add<dwi>3_doubleword_zext"
+  [(set (match_operand:<DWI> 0 "nonimmediate_operand" "=r,o")
+       (plus:<DWI>
+         (zero_extend:<DWI>
+           (match_operand:DWIH 2 "nonimmediate_operand" "rm,r")) 
+         (match_operand:<DWI> 1 "nonimmediate_operand" "0,0")))
+   (clobber (reg:CC FLAGS_REG))]
+  "ix86_binary_operator_ok (UNKNOWN, <DWI>mode, operands)"
+  "#"
+  "&& reload_completed"
+  [(parallel [(set (reg:CCC FLAGS_REG)
+                  (compare:CCC
+                    (plus:DWIH (match_dup 1) (match_dup 2))
+                    (match_dup 1)))
+             (set (match_dup 0)
+                  (plus:DWIH (match_dup 1) (match_dup 2)))])
+   (parallel [(set (match_dup 3)
+                  (plus:DWIH
+                    (plus:DWIH
+                      (ltu:DWIH (reg:CC FLAGS_REG) (const_int 0))
+                      (match_dup 4))
+                    (const_int 0)))
+             (clobber (reg:CC FLAGS_REG))])]
+ "split_double_mode (<DWI>mode, &operands[0], 2, &operands[0], &operands[3]);")
+
 (define_insn "*add<mode>_1"
   [(set (match_operand:SWI48 0 "nonimmediate_operand" "=rm,r,r,r")
        (plus:SWI48
     }
 })
 
+(define_insn_and_split "*sub<dwi>3_doubleword_zext"
+  [(set (match_operand:<DWI> 0 "nonimmediate_operand" "=r,o")
+       (minus:<DWI>
+         (match_operand:<DWI> 1 "nonimmediate_operand" "0,0")
+         (zero_extend:<DWI>
+           (match_operand:DWIH 2 "nonimmediate_operand" "rm,r"))))
+   (clobber (reg:CC FLAGS_REG))]
+  "ix86_binary_operator_ok (UNKNOWN, <DWI>mode, operands)"
+  "#"
+  "&& reload_completed"
+  [(parallel [(set (reg:CC FLAGS_REG)
+                  (compare:CC (match_dup 1) (match_dup 2)))
+             (set (match_dup 0)
+                  (minus:DWIH (match_dup 1) (match_dup 2)))])
+   (parallel [(set (match_dup 3)
+                  (minus:DWIH
+                    (minus:DWIH
+                      (match_dup 4)
+                      (ltu:DWIH (reg:CC FLAGS_REG) (const_int 0)))
+                    (const_int 0)))
+             (clobber (reg:CC FLAGS_REG))])]
+  "split_double_mode (<DWI>mode, &operands[0], 2, &operands[0], &operands[3]);")
+
 (define_insn "*sub<mode>_1"
   [(set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m,<r>")
        (minus:SWI
 
 ;; Split DST = (HI<<32)|LO early to minimize register usage.
 (define_code_iterator any_or_plus [plus ior xor])
-(define_split
-  [(set (match_operand:DI 0 "register_operand")
-       (any_or_plus:DI
-         (ashift:DI (match_operand:DI 1 "register_operand")
-                    (const_int 32))
-         (zero_extend:DI (match_operand:SI 2 "register_operand"))))]
-  "!TARGET_64BIT"
-  [(set (match_dup 3) (match_dup 4))
-   (set (match_dup 5) (match_dup 2))]
+(define_insn_and_split "*concat<mode><dwi>3_1"
+  [(set (match_operand:<DWI> 0 "nonimmediate_operand" "=ro")
+       (any_or_plus:<DWI>
+         (ashift:<DWI> (match_operand:<DWI> 1 "register_operand" "r")
+                       (match_operand:<DWI> 2 "const_int_operand"))
+         (zero_extend:<DWI> (match_operand:DWIH 3 "register_operand" "r"))))]
+  "INTVAL (operands[2]) == <MODE_SIZE> * BITS_PER_UNIT"
+  "#"
+  "&& reload_completed"
+  [(clobber (const_int 0))]
 {
-  operands[3] = gen_highpart (SImode, operands[0]);
-  operands[4] = gen_lowpart (SImode, operands[1]);
-  operands[5] = gen_lowpart (SImode, operands[0]);
+  split_double_concat (<DWI>mode, operands[0], operands[3],
+                      gen_lowpart (<MODE>mode, operands[1]));
+  DONE;
 })
 
-(define_split
-  [(set (match_operand:DI 0 "register_operand")
-       (any_or_plus:DI
-         (zero_extend:DI (match_operand:SI 1 "register_operand"))
-         (ashift:DI (match_operand:DI 2 "register_operand")
-                    (const_int 32))))]
-  "!TARGET_64BIT"
-  [(set (match_dup 3) (match_dup 4))
-   (set (match_dup 5) (match_dup 1))]
+(define_insn_and_split "*concat<mode><dwi>3_2"
+  [(set (match_operand:<DWI> 0 "nonimmediate_operand" "=ro")
+       (any_or_plus:<DWI>
+         (zero_extend:<DWI> (match_operand:DWIH 1 "register_operand" "r"))
+         (ashift:<DWI> (match_operand:<DWI> 2 "register_operand" "r")
+                       (match_operand:<DWI> 3 "const_int_operand"))))]
+  "INTVAL (operands[3]) == <MODE_SIZE> * BITS_PER_UNIT"
+  "#"
+  "&& reload_completed"
+  [(clobber (const_int 0))]
+{
+  split_double_concat (<DWI>mode, operands[0], operands[1],
+                      gen_lowpart (<MODE>mode, operands[2]));
+  DONE;
+})
+
+(define_insn_and_split "*concat<mode><dwi>3_3"
+  [(set (match_operand:<DWI> 0 "nonimmediate_operand" "=ro")
+       (any_or_plus:<DWI>
+         (ashift:<DWI>
+           (zero_extend:<DWI> (match_operand:DWIH 1 "register_operand" "r"))
+           (match_operand:<DWI> 2 "const_int_operand"))
+         (zero_extend:<DWI> (match_operand:DWIH 3 "register_operand" "r"))))]
+  "INTVAL (operands[2]) == <MODE_SIZE> * BITS_PER_UNIT"
+  "#"
+  "&& reload_completed"
+  [(clobber (const_int 0))]
 {
-  operands[3] = gen_highpart (SImode, operands[0]);
-  operands[4] = gen_lowpart (SImode, operands[2]);
-  operands[5] = gen_lowpart (SImode, operands[0]);
+  split_double_concat (<DWI>mode, operands[0], operands[3], operands[1]);
+  DONE;
+})
+
+(define_insn_and_split "*concat<mode><dwi>3_4"
+  [(set (match_operand:<DWI> 0 "nonimmediate_operand" "=ro")
+       (any_or_plus:<DWI>
+         (zero_extend:<DWI> (match_operand:DWIH 1 "register_operand" "r"))
+         (ashift:<DWI>
+           (zero_extend:<DWI> (match_operand:DWIH 2 "register_operand" "r"))
+           (match_operand:<DWI> 3 "const_int_operand"))))]
+  "INTVAL (operands[3]) == <MODE_SIZE> * BITS_PER_UNIT"
+  "#"
+  "&& reload_completed"
+  [(clobber (const_int 0))]
+{
+  split_double_concat (<DWI>mode, operands[0], operands[1], operands[2]);
+  DONE;
 })
 \f
 ;; Negation instructions
diff --git a/gcc/testsuite/g++.target/i386/pr91681.C b/gcc/testsuite/g++.target/i386/pr91681.C
new file mode 100644 (file)
index 0000000..0271e43
--- /dev/null
@@ -0,0 +1,20 @@
+/* { dg-do compile { target int128 } } */
+/* { dg-options "-O2" } */
+
+void multiply128x64x2_3 ( 
+    const unsigned long a, 
+    const unsigned long b, 
+    const unsigned long c, 
+    const unsigned long d, 
+    __uint128_t o[2])
+{
+    __uint128_t B0 = (__uint128_t) b * c;
+    __uint128_t B2 = (__uint128_t) a * c;
+    __uint128_t B1 = (__uint128_t) b * d;
+    __uint128_t B3 = (__uint128_t) a * d;
+
+    o[0] = B2 + (B0 >> 64);
+    o[1] = B3 + (B1 >> 64);
+}
+
+/* { dg-final { scan-assembler-not "xor" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr91681-1.c b/gcc/testsuite/gcc.target/i386/pr91681-1.c
new file mode 100644 (file)
index 0000000..ab83cc4
--- /dev/null
@@ -0,0 +1,20 @@
+/* { dg-do compile { target int128 } } */
+/* { dg-options "-O2" } */
+unsigned __int128 m;
+
+unsigned __int128 foo(unsigned __int128 x, unsigned long long y)
+{
+    return x + y;
+}
+
+void bar(unsigned __int128 x, unsigned long long y)
+{
+    m = x + y;
+}
+
+void baz(unsigned long long y)
+{
+    m += y;
+}
+
+/* { dg-final { scan-assembler-not "xor" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr91681-2.c b/gcc/testsuite/gcc.target/i386/pr91681-2.c
new file mode 100644 (file)
index 0000000..ea52c72
--- /dev/null
@@ -0,0 +1,20 @@
+/* { dg-do compile { target int128 } } */
+/* { dg-options "-O2" } */
+unsigned __int128 m;
+
+unsigned __int128 foo(unsigned __int128 x, unsigned long long y)
+{
+    return x - y;
+}
+
+void bar(unsigned __int128 x, unsigned long long y)
+{
+    m = x - y;
+}
+
+void baz(unsigned long long y)
+{
+    m -= y;
+}
+
+/* { dg-final { scan-assembler-not "xor" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr91681-3.c b/gcc/testsuite/gcc.target/i386/pr91681-3.c
new file mode 100644 (file)
index 0000000..22a03c2
--- /dev/null
@@ -0,0 +1,16 @@
+/* { dg-do compile { target ia32 } } */
+/* { dg-options "-O2" } */
+
+unsigned long long m;
+
+unsigned long long foo(unsigned long long x, unsigned int y)
+{
+    return x - y;
+}
+
+void bar(unsigned long long x, unsigned int y)
+{
+    m = x - y;
+}
+
+/* { dg-final { scan-assembler-not "xor" } } */