From: Kewen Lin <linkw@linux.ibm.com>
Date: Mon, 14 Dec 2020 09:38:49 +0000 (-0600)
Subject: rs6000: Use rldimi for vec init instead of shift + ior
X-Git-Tag: basepoints/gcc-12~919
X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=f4a3cea3fb025735c09af5c7f14d61152c4c5794;p=thirdparty%2Fgcc.git

rs6000: Use rldimi for vec init instead of shift + ior

This patch is to teach unsigned int vector init to use rldimi to
merge two integers instead of shift and ior.  It also adds one
required splitter made by Segher.

An rl*imi is usually written as an IOR of an ASHIFT or similar, and
an AND of a register with a constant mask.  In some cases combine
knows that that AND doesn't do anything (because all zero bits in
that mask correspond to bits known to be already zero), and then no
pattern matches.  This patch adds a define_split for such cases.

It uses nonzero_bits in the condition of the splitter, but does not
need it afterwards for the instruction to be recognised.  This is
necessary because later passes can see fewer nonzero_bits.

Because it is a splitter, combine will only use it when starting with
three insns (or more), even though the result is just one.  This isn't
a huge problem in practice, but some possible combinations still won't
happen.

Bootstrapped/regtested on powerpc64le-linux-gnu P9 and
powerpc64-linux-gnu P8, also SPEC2017 build/run passed on P9.

gcc/ChangeLog:

2020-02-23  Segher Boessenkool  <segher@kernel.crashing.org>
	    Kewen Lin  <linkw@gcc.gnu.org>

	* config/rs6000/rs6000.md (*rotl<mode>3_insert_3): Renamed to...
	(rotl<mode>3_insert_3): ...this.
	(plus_ior_xor): New code_iterator.
	(define_split for GPR rl*imi): New splitter.
	* config/rs6000/vsx.md (vsx_init_v4si): Use gen_rotldi3_insert_3
	for integer merging.

gcc/testsuite/ChangeLog:

	* gcc.target/powerpc/vec-init-10.c: New test.
---

diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
index a1315523fecd..543a6cdd8835 100644
--- a/gcc/config/rs6000/rs6000.md
+++ b/gcc/config/rs6000/rs6000.md
@@ -4068,7 +4068,7 @@
   [(set_attr "type" "insert")])
 
 ; There are also some forms without one of the ANDs.
-(define_insn "*rotl<mode>3_insert_3"
+(define_insn "rotl<mode>3_insert_3"
   [(set (match_operand:GPR 0 "gpc_reg_operand" "=r")
 	(ior:GPR (and:GPR (match_operand:GPR 3 "gpc_reg_operand" "0")
 			  (match_operand:GPR 4 "const_int_operand" "n"))
@@ -4083,6 +4083,24 @@
 }
   [(set_attr "type" "insert")])
 
+(define_code_iterator plus_ior_xor [plus ior xor])
+
+(define_split
+  [(set (match_operand:GPR 0 "gpc_reg_operand")
+	(plus_ior_xor:GPR (ashift:GPR (match_operand:GPR 1 "gpc_reg_operand")
+				      (match_operand:SI 2 "const_int_operand"))
+			  (match_operand:GPR 3 "gpc_reg_operand")))]
+  "nonzero_bits (operands[3], <MODE>mode)
+   < HOST_WIDE_INT_1U << INTVAL (operands[2])"
+  [(set (match_dup 0)
+	(ior:GPR (and:GPR (match_dup 3)
+			  (match_dup 4))
+		 (ashift:GPR (match_dup 1)
+			     (match_dup 2))))]
+{
+  operands[4] = GEN_INT ((HOST_WIDE_INT_1U << INTVAL (operands[2])) - 1);
+})
+
 (define_insn "*rotl<mode>3_insert_4"
   [(set (match_operand:GPR 0 "gpc_reg_operand" "=r")
 	(ior:GPR (and:GPR (match_operand:GPR 3 "gpc_reg_operand" "0")
diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
index 3e0518631dfb..ad673968584f 100644
--- a/gcc/config/rs6000/vsx.md
+++ b/gcc/config/rs6000/vsx.md
@@ -3030,28 +3030,22 @@
    (use (match_operand:SI 4 "gpc_reg_operand"))]
    "VECTOR_MEM_VSX_P (V4SImode) && TARGET_DIRECT_MOVE_64BIT"
 {
-  rtx a = gen_reg_rtx (DImode);
-  rtx b = gen_reg_rtx (DImode);
-  rtx c = gen_reg_rtx (DImode);
-  rtx d = gen_reg_rtx (DImode);
-  emit_insn (gen_zero_extendsidi2 (a, operands[1]));
-  emit_insn (gen_zero_extendsidi2 (b, operands[2]));
-  emit_insn (gen_zero_extendsidi2 (c, operands[3]));
-  emit_insn (gen_zero_extendsidi2 (d, operands[4]));
+  rtx a = gen_lowpart_SUBREG (DImode, operands[1]);
+  rtx b = gen_lowpart_SUBREG (DImode, operands[2]);
+  rtx c = gen_lowpart_SUBREG (DImode, operands[3]);
+  rtx d = gen_lowpart_SUBREG (DImode, operands[4]);
   if (!BYTES_BIG_ENDIAN)
     {
       std::swap (a, b);
       std::swap (c, d);
     }
 
-  rtx aa = gen_reg_rtx (DImode);
   rtx ab = gen_reg_rtx (DImode);
-  rtx cc = gen_reg_rtx (DImode);
   rtx cd = gen_reg_rtx (DImode);
-  emit_insn (gen_ashldi3 (aa, a, GEN_INT (32)));
-  emit_insn (gen_ashldi3 (cc, c, GEN_INT (32)));
-  emit_insn (gen_iordi3 (ab, aa, b));
-  emit_insn (gen_iordi3 (cd, cc, d));
+  emit_insn (gen_rotldi3_insert_3 (ab, a, GEN_INT (32), b,
+				   GEN_INT (0xffffffff)));
+  emit_insn (gen_rotldi3_insert_3 (cd, c, GEN_INT (32), d,
+				   GEN_INT (0xffffffff)));
 
   rtx abcd = gen_reg_rtx (V2DImode);
   emit_insn (gen_vsx_concat_v2di (abcd, ab, cd));
diff --git a/gcc/testsuite/gcc.target/powerpc/vec-init-10.c b/gcc/testsuite/gcc.target/powerpc/vec-init-10.c
new file mode 100644
index 000000000000..23587b3ac05a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/vec-init-10.c
@@ -0,0 +1,24 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target lp64 } */
+/* { dg-require-effective-target powerpc_p8vector_ok } */
+/* { dg-options "-O2 -mdejagnu-cpu=power8" } */
+
+/* Check that we can optimize sldi + or to rldimi for vector int init.  */
+
+vector unsigned int
+testu (unsigned int i1, unsigned int i2, unsigned int i3, unsigned int i4)
+{
+  vector unsigned int v = {i1, i2, i3, i4};
+  return v;
+}
+
+vector signed int
+tests (signed int i1, signed int i2, signed int i3, signed int i4)
+{
+  vector signed int v = {i1, i2, i3, i4};
+  return v;
+}
+
+/* { dg-final { scan-assembler-not {\msldi\M} } } */
+/* { dg-final { scan-assembler-not {\mor\M} } } */
+/* { dg-final { scan-assembler-times {\mrldimi\M} 4 } } */