]> git.ipfire.org Git - thirdparty/gcc.git/commitdiff
rs6000: Generate mfvsrwz for all platforms and remove redundant zero extend
authorHaochen Gui <guihaoc@gcc.gnu.org>
Wed, 16 Aug 2023 06:21:09 +0000 (14:21 +0800)
committerHaochen Gui <guihaoc@gcc.gnu.org>
Wed, 16 Aug 2023 06:23:38 +0000 (14:23 +0800)
mfvsrwz has lower latency than xxextractuw or vextuw[lr]x.  So it should be
generated even with p9 vector enabled.  Also the instruction is already
zero extended.  A combine pattern is needed to eliminate redundant zero
extend instructions.

gcc/
PR target/106769
* config/rs6000/vsx.md (expand vsx_extract_<mode>): Set it only
for V8HI and V16QI.
(vsx_extract_v4si): New expand for V4SI extraction.
(vsx_extract_v4si_w1): New insn pattern for V4SI extraction on
word 1 from BE order.
(*mfvsrwz): New insn pattern for mfvsrwz.
(*vsx_extract_<mode>_di_p9): Assert that it won't be generated on
word 1 from BE order.
(*vsx_extract_si): Remove.
(*vsx_extract_v4si_w023): New insn and split pattern on word 0, 2,
3 from BE order.

gcc/testsuite/
PR target/106769
* gcc.target/powerpc/pr106769.h: New.
* gcc.target/powerpc/pr106769-p8.c: New.
* gcc.target/powerpc/pr106769-p9.c: New.

gcc/config/rs6000/vsx.md
gcc/testsuite/gcc.target/powerpc/pr106769-p8.c [new file with mode: 0644]
gcc/testsuite/gcc.target/powerpc/pr106769-p9.c [new file with mode: 0644]
gcc/testsuite/gcc.target/powerpc/pr106769.h [new file with mode: 0644]

index 9cd7be23a5fa17cf9ac1b599041060f48ffbcf2f..e4192c008fb9d3b3528d9001d2525e6bd904bec5 100644 (file)
 (define_expand  "vsx_extract_<mode>"
   [(parallel [(set (match_operand:<VEC_base> 0 "gpc_reg_operand")
                   (vec_select:<VEC_base>
-                   (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand")
+                   (match_operand:VSX_EXTRACT_I2 1 "gpc_reg_operand")
                    (parallel [(match_operand:QI 2 "const_int_operand")])))
-             (clobber (match_scratch:VSX_EXTRACT_I 3))])]
+             (clobber (match_scratch:VSX_EXTRACT_I2 3))])]
   "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT"
 {
   /* If we have ISA 3.0, we can do a xxextractuw/vextractu{b,h}.  */
     }
 })
 
+(define_expand  "vsx_extract_v4si"
+  [(parallel [(set (match_operand:SI 0 "gpc_reg_operand")
+                  (vec_select:SI
+                   (match_operand:V4SI 1 "gpc_reg_operand")
+                   (parallel [(match_operand:QI 2 "const_0_to_3_operand")])))
+             (clobber (match_scratch:V4SI 3))])]
+  "TARGET_DIRECT_MOVE_64BIT"
+{
+  /* The word 1 (BE order) can be extracted by mfvsrwz/stxsiwx.  So just
+     fall through to vsx_extract_v4si_w1.  */
+  if (TARGET_P9_VECTOR
+      && INTVAL (operands[2]) != (BYTES_BIG_ENDIAN ? 1 : 2))
+    {
+      emit_insn (gen_vsx_extract_v4si_p9 (operands[0], operands[1],
+                                         operands[2]));
+      DONE;
+    }
+})
+
+;; Extract from word 1 (BE order).
+(define_insn "vsx_extract_v4si_w1"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=r,wa,Z,wa")
+       (vec_select:SI
+        (match_operand:V4SI 1 "gpc_reg_operand" "v,v,v,0")
+        (parallel [(match_operand:QI 2 "const_0_to_3_operand" "n,n,n,n")])))
+   (clobber (match_scratch:V4SI 3 "=v,v,v,v"))]
+  "TARGET_DIRECT_MOVE_64BIT
+   && INTVAL (operands[2]) == (BYTES_BIG_ENDIAN ? 1 : 2)"
+{
+   if (which_alternative == 0)
+     return "mfvsrwz %0,%x1";
+
+   if (which_alternative == 1)
+     return "xxlor %x0,%x1,%x1";
+
+   if (which_alternative == 2)
+     return "stxsiwx %x1,%y0";
+
+   return ASM_COMMENT_START " vec_extract to same register";
+}
+  [(set_attr "type" "mfvsr,veclogical,fpstore,*")
+   (set_attr "length" "4,4,4,0")
+   (set_attr "isa" "p8v,*,p8v,*")])
+
+(define_insn "*mfvsrwz"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+       (zero_extend:DI
+         (vec_select:SI
+           (match_operand:V4SI 1 "vsx_register_operand" "wa")
+           (parallel [(match_operand:QI 2 "const_int_operand" "n")]))))
+   (clobber (match_scratch:V4SI 3 "=v"))]
+  "TARGET_DIRECT_MOVE_64BIT
+   && INTVAL (operands[2]) == (BYTES_BIG_ENDIAN ? 1 : 2)"
+  "mfvsrwz %0,%x1"
+  [(set_attr "type" "mfvsr")
+   (set_attr "isa" "p8v")])
+
 (define_insn "vsx_extract_<mode>_p9"
   [(set (match_operand:<VEC_base> 0 "gpc_reg_operand" "=r,<VSX_EX>")
        (vec_select:<VEC_base>
                    (parallel [(match_dup 2)])))
              (clobber (match_dup 3))])]
 {
+  gcc_assert (<MODE>mode != V4SImode
+             || INTVAL (operands[2]) != (BYTES_BIG_ENDIAN ? 1 : 2));
+
   operands[4] = gen_rtx_REG (<VEC_base>mode, REGNO (operands[0]));
 }
   [(set_attr "isa" "p9v,*")])
    (set (match_dup 0)
        (match_dup 3))])
 
-(define_insn_and_split  "*vsx_extract_si"
+;; Extract from word 0, 2, 3 (BE order).
+(define_insn_and_split "*vsx_extract_v4si_w023"
   [(set (match_operand:SI 0 "nonimmediate_operand" "=r,wa,Z")
        (vec_select:SI
         (match_operand:V4SI 1 "gpc_reg_operand" "v,v,v")
         (parallel [(match_operand:QI 2 "const_0_to_3_operand" "n,n,n")])))
    (clobber (match_scratch:V4SI 3 "=v,v,v"))]
-  "VECTOR_MEM_VSX_P (V4SImode) && TARGET_DIRECT_MOVE_64BIT && !TARGET_P9_VECTOR"
+  "TARGET_DIRECT_MOVE_64BIT"
   "#"
-  "&& reload_completed"
+  "&& INTVAL (operands[2]) != (BYTES_BIG_ENDIAN ? 1 : 2)"
   [(const_int 0)]
 {
+  gcc_assert (!TARGET_P9_VECTOR);
+
   rtx dest = operands[0];
   rtx src = operands[1];
   rtx element = operands[2];
-  rtx vec_tmp = operands[3];
-  int value;
+  rtx vec_tmp;
+
+  if (GET_CODE (operands[3]) == SCRATCH)
+    vec_tmp = gen_reg_rtx (V4SImode);
+  else
+    vec_tmp = operands[3];
 
   /* Adjust index for LE element ordering, the below minuend 3 is computed by
      GET_MODE_NUNITS (V4SImode) - 1.  */
   if (!BYTES_BIG_ENDIAN)
     element = GEN_INT (3 - INTVAL (element));
 
-  /* If the value is in the correct position, we can avoid doing the VSPLT<x>
-     instruction.  */
-  value = INTVAL (element);
-  if (value != 1)
-    emit_insn (gen_altivec_vspltw_direct (vec_tmp, src, element));
-  else
-    vec_tmp = src;
+  emit_insn (gen_altivec_vspltw_direct (vec_tmp, src, element));
 
-  if (MEM_P (operands[0]))
-    {
-      if (can_create_pseudo_p ())
-       dest = rs6000_force_indexed_or_indirect_mem (dest);
-
-      if (TARGET_P8_VECTOR)
-       emit_move_insn (dest, gen_rtx_REG (SImode, REGNO (vec_tmp)));
-      else
-       emit_insn (gen_stfiwx (dest, gen_rtx_REG (DImode, REGNO (vec_tmp))));
-    }
-
-  else if (TARGET_P8_VECTOR)
-    emit_move_insn (dest, gen_rtx_REG (SImode, REGNO (vec_tmp)));
-  else
-    emit_move_insn (gen_rtx_REG (DImode, REGNO (dest)),
-                   gen_rtx_REG (DImode, REGNO (vec_tmp)));
+  int value = BYTES_BIG_ENDIAN ? 1 : 2;
+  emit_insn (gen_vsx_extract_v4si_w1 (dest, vec_tmp, GEN_INT (value)));
 
   DONE;
-}
-  [(set_attr "type" "mfvsr,vecperm,fpstore")
-   (set_attr "length" "8")
-   (set_attr "isa" "*,p8v,*")])
+})
 
 (define_insn_and_split  "*vsx_extract_<mode>_p8"
   [(set (match_operand:<VEC_base> 0 "nonimmediate_operand" "=r")
diff --git a/gcc/testsuite/gcc.target/powerpc/pr106769-p8.c b/gcc/testsuite/gcc.target/powerpc/pr106769-p8.c
new file mode 100644 (file)
index 0000000..e7cdbc7
--- /dev/null
@@ -0,0 +1,11 @@
+/* { dg-do compile } */
+/* { dg-skip-if "" { powerpc*-*-darwin* } } */
+/* { dg-require-effective-target powerpc_p8vector_ok } */
+/* { dg-options "-mdejagnu-cpu=power8 -O2" } */
+/* { dg-require-effective-target has_arch_ppc64 } */
+
+#include "pr106769.h"
+
+/* { dg-final { scan-assembler {\mmfvsrwz\M} } } */
+/* { dg-final { scan-assembler {\mstxsiwx\M} } } */
+/* { dg-final { scan-assembler-not {\mrldicl\M} } } */
diff --git a/gcc/testsuite/gcc.target/powerpc/pr106769-p9.c b/gcc/testsuite/gcc.target/powerpc/pr106769-p9.c
new file mode 100644 (file)
index 0000000..2248b52
--- /dev/null
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-skip-if "" { powerpc*-*-darwin* } } */
+/* { dg-require-effective-target powerpc_p9vector_ok } */
+/* { dg-options "-mdejagnu-cpu=power9 -O2" } */
+/* { dg-require-effective-target has_arch_ppc64 } */
+
+#include "pr106769.h"
+
+/* { dg-final { scan-assembler {\mmfvsrwz\M} } } */
+/* { dg-final { scan-assembler {\mstxsiwx\M} } } */
+/* { dg-final { scan-assembler-not {\mrldicl\M} } } */
+/* { dg-final { scan-assembler-not {\mxxextractuw\M} } } */
+/* { dg-final { scan-assembler-not {\mvextuw[rl]x\M} } } */
diff --git a/gcc/testsuite/gcc.target/powerpc/pr106769.h b/gcc/testsuite/gcc.target/powerpc/pr106769.h
new file mode 100644 (file)
index 0000000..1c8c8a0
--- /dev/null
@@ -0,0 +1,17 @@
+#include <altivec.h>
+
+#ifdef __BIG_ENDIAN__
+#define LANE 1
+#else
+#define LANE 2
+#endif
+
+unsigned int foo1 (vector unsigned int v)
+{
+   return vec_extract(v, LANE);
+}
+
+void foo2 (vector unsigned int v, unsigned int* p)
+{
+   *p = vec_extract(v, LANE);
+}