vector.md (vec_realign_load<mode>): Generate vperm directly to circumvent subtract...

author Bill Schmidt <wschmidt@linux.vnet.ibm.com>

Fri, 11 Oct 2013 18:52:07 +0000 (18:52 +0000)

committer William Schmidt <wschmidt@gcc.gnu.org>

Fri, 11 Oct 2013 18:52:07 +0000 (18:52 +0000)
author Bill Schmidt <wschmidt@linux.vnet.ibm.com>
Fri, 11 Oct 2013 18:52:07 +0000 (18:52 +0000)
committer William Schmidt <wschmidt@gcc.gnu.org>
Fri, 11 Oct 2013 18:52:07 +0000 (18:52 +0000)
diff --git a/gcc/ChangeLog b/gcc/ChangeLog

index a5bde2c900e7584d14ae49d31974f1ef7e403a0b..5a6fff651cf8c8a5711b50d9230ee322041675ad 100644 (file)
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,20 @@
+2013-10-11  Bill Schmidt  <wschmidt@linux.vnet.ibm.com>
+
+       * config/rs6000/vector.md (vec_realign_load<mode>): Generate vperm
+       directly to circumvent subtract from splat{31} workaround.
+       * config/rs6000/rs6000-protos.h (altivec_expand_vec_perm_le): New
+       prototype.
+       * config/rs6000/rs6000.c (altivec_expand_vec_perm_le): New.
+       * config/rs6000/altivec.md (define_c_enum "unspec"): Add
+       UNSPEC_VPERM_X and UNSPEC_VPERM_UNS_X.
+       (altivec_vperm_<mode>): Convert to define_insn_and_split to
+       separate big and little endian logic.
+       (*altivec_vperm_<mode>_internal): New define_insn.
+       (altivec_vperm_<mode>_uns): Convert to define_insn_and_split to
+       separate big and little endian logic.
+       (*altivec_vperm_<mode>_uns_internal): New define_insn.
+       (vec_permv16qi): Add little endian logic.
+
  2013-10-11  Marc Glisse  <marc.glisse@inria.fr>
  
         * doc/extend.texi (returns_nonnull): Remove arguments.
diff --git a/gcc/config/rs6000/altivec.md b/gcc/config/rs6000/altivec.md

index 47a3a6894e15ef17e2775bc889b1ff74ddeace48..49f908161dfb4725c9fdfa69ba51fa2efd4038f8 100644 (file)
--- a/gcc/config/rs6000/altivec.md
+++ b/gcc/config/rs6000/altivec.md
@@ -59,6 +59,8 @@
     UNSPEC_VSUMSWS
     UNSPEC_VPERM
     UNSPEC_VPERM_UNS
+   UNSPEC_VPERM_X
+   UNSPEC_VPERM_UNS_X
     UNSPEC_VRFIN
     UNSPEC_VCFUX
     UNSPEC_VCFSX
@@ -1279,21 +1281,91 @@
    "vrfiz %0,%1"
    [(set_attr "type" "vecfloat")])
  
-(define_insn "altivec_vperm_<mode>"
+(define_insn_and_split "altivec_vperm_<mode>"
    [(set (match_operand:VM 0 "register_operand" "=v")
         (unspec:VM [(match_operand:VM 1 "register_operand" "v")
                     (match_operand:VM 2 "register_operand" "v")
                     (match_operand:V16QI 3 "register_operand" "v")]
+                  UNSPEC_VPERM_X))]
+  "TARGET_ALTIVEC"
+  "#"
+  "!reload_in_progress && !reload_completed"
+  [(set (match_dup 0) (match_dup 4))]
+{
+  if (BYTES_BIG_ENDIAN)
+    operands[4] = gen_rtx_UNSPEC (<MODE>mode,
+                                  gen_rtvec (3, operands[1],
+                                            operands[2], operands[3]),
+                                  UNSPEC_VPERM);
+  else
+    {
+      /* We want to subtract from 31, but we can't vspltisb 31 since
+         it's out of range.  -1 works as well because only the low-order
+         five bits of the permute control vector elements are used.  */
+      rtx splat = gen_rtx_VEC_DUPLICATE (V16QImode,
+                                         gen_rtx_CONST_INT (QImode, -1));
+      rtx tmp = gen_reg_rtx (V16QImode);
+      emit_move_insn (tmp, splat);
+      rtx sel = gen_rtx_MINUS (V16QImode, tmp, operands[3]);
+      emit_move_insn (tmp, sel);
+      operands[4] = gen_rtx_UNSPEC (<MODE>mode,
+                                    gen_rtvec (3, operands[2],
+                                              operands[1], tmp),
+                                   UNSPEC_VPERM);
+    }
+}
+  [(set_attr "type" "vecperm")])
+
+(define_insn "*altivec_vperm_<mode>_internal"
+  [(set (match_operand:VM 0 "register_operand" "=v")
+       (unspec:VM [(match_operand:VM 1 "register_operand" "v")
+                   (match_operand:VM 2 "register_operand" "v")
+                   (match_operand:V16QI 3 "register_operand" "+v")]
                    UNSPEC_VPERM))]
    "TARGET_ALTIVEC"
    "vperm %0,%1,%2,%3"
    [(set_attr "type" "vecperm")])
  
-(define_insn "altivec_vperm_<mode>_uns"
+(define_insn_and_split "altivec_vperm_<mode>_uns"
    [(set (match_operand:VM 0 "register_operand" "=v")
         (unspec:VM [(match_operand:VM 1 "register_operand" "v")
                     (match_operand:VM 2 "register_operand" "v")
                     (match_operand:V16QI 3 "register_operand" "v")]
+                  UNSPEC_VPERM_UNS_X))]
+  "TARGET_ALTIVEC"
+  "#"
+  "!reload_in_progress && !reload_completed"
+  [(set (match_dup 0) (match_dup 4))]
+{
+  if (BYTES_BIG_ENDIAN)
+    operands[4] = gen_rtx_UNSPEC (<MODE>mode,
+                                  gen_rtvec (3, operands[1],
+                                            operands[2], operands[3]),
+                                  UNSPEC_VPERM_UNS);
+  else
+    {
+      /* We want to subtract from 31, but we can't vspltisb 31 since
+         it's out of range.  -1 works as well because only the low-order
+         five bits of the permute control vector elements are used.  */
+      rtx splat = gen_rtx_VEC_DUPLICATE (V16QImode,
+                                         gen_rtx_CONST_INT (QImode, -1));
+      rtx tmp = gen_reg_rtx (V16QImode);
+      emit_move_insn (tmp, splat);
+      rtx sel = gen_rtx_MINUS (V16QImode, tmp, operands[3]);
+      emit_move_insn (tmp, sel);
+      operands[4] = gen_rtx_UNSPEC (<MODE>mode,
+                                    gen_rtvec (3, operands[2],
+                                              operands[1], tmp),
+                                   UNSPEC_VPERM_UNS);
+    }
+}
+  [(set_attr "type" "vecperm")])
+
+(define_insn "*altivec_vperm_<mode>_uns_internal"
+  [(set (match_operand:VM 0 "register_operand" "=v")
+       (unspec:VM [(match_operand:VM 1 "register_operand" "v")
+                   (match_operand:VM 2 "register_operand" "v")
+                   (match_operand:V16QI 3 "register_operand" "+v")]
                    UNSPEC_VPERM_UNS))]
    "TARGET_ALTIVEC"
    "vperm %0,%1,%2,%3"
@@ -1306,7 +1378,12 @@
                        (match_operand:V16QI 3 "register_operand" "")]
                       UNSPEC_VPERM))]
    "TARGET_ALTIVEC"
-  "")
+{
+  if (!BYTES_BIG_ENDIAN) {
+    altivec_expand_vec_perm_le (operands);
+    DONE;
+  }
+})
  
  (define_expand "vec_perm_constv16qi"
    [(match_operand:V16QI 0 "register_operand" "")
diff --git a/gcc/config/rs6000/rs6000-protos.h b/gcc/config/rs6000/rs6000-protos.h

index 9aa9429b83c546fa29b189164f9f974c8c3c07cd..1fc890340947e2c0711a6be8d2ed0b31e9d95ffa 100644 (file)
--- a/gcc/config/rs6000/rs6000-protos.h
+++ b/gcc/config/rs6000/rs6000-protos.h
@@ -56,6 +56,7 @@ extern void paired_expand_vector_init (rtx, rtx);
  extern void rs6000_expand_vector_set (rtx, rtx, int);
  extern void rs6000_expand_vector_extract (rtx, rtx, int);
  extern bool altivec_expand_vec_perm_const (rtx op[4]);
+extern void altivec_expand_vec_perm_le (rtx op[4]);
  extern bool rs6000_expand_vec_perm_const (rtx op[4]);
  extern void rs6000_expand_extract_even (rtx, rtx, rtx);
  extern void rs6000_expand_interleave (rtx, rtx, rtx, bool);
diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c

index 0cf6ed87f0c14a2325c1abc75bd4ca488a4457a5..3f56136d983f6dbe52ec2d852a0f94c2bf7c8eea 100644 (file)
--- a/gcc/config/rs6000/rs6000.c
+++ b/gcc/config/rs6000/rs6000.c
@@ -28608,6 +28608,54 @@ altivec_expand_vec_perm_const_le (rtx operands[4])
    emit_move_insn (target, unspec);
  }
  
+/* Similarly to altivec_expand_vec_perm_const_le, we must adjust the
+   permute control vector.  But here it's not a constant, so we must
+   generate a vector splat/subtract to do the adjustment.  */
+
+void
+altivec_expand_vec_perm_le (rtx operands[4])
+{
+  rtx splat, unspec;
+  rtx target = operands[0];
+  rtx op0 = operands[1];
+  rtx op1 = operands[2];
+  rtx sel = operands[3];
+  rtx tmp = target;
+
+  /* Get everything in regs so the pattern matches.  */
+  if (!REG_P (op0))
+    op0 = force_reg (V16QImode, op0);
+  if (!REG_P (op1))
+    op1 = force_reg (V16QImode, op1);
+  if (!REG_P (sel))
+    sel = force_reg (V16QImode, sel);
+  if (!REG_P (target))
+    tmp = gen_reg_rtx (V16QImode);
+
+  /* SEL = splat(31) - SEL.  */
+  /* We want to subtract from 31, but we can't vspltisb 31 since
+     it's out of range.  -1 works as well because only the low-order
+     five bits of the permute control vector elements are used.  */
+  splat = gen_rtx_VEC_DUPLICATE (V16QImode,
+                                gen_rtx_CONST_INT (QImode, -1));
+  emit_move_insn (tmp, splat);
+  sel = gen_rtx_MINUS (V16QImode, tmp, sel);
+  emit_move_insn (tmp, sel);
+
+  /* Permute with operands reversed and adjusted selector.  */
+  unspec = gen_rtx_UNSPEC (V16QImode, gen_rtvec (3, op1, op0, tmp),
+                          UNSPEC_VPERM);
+
+  /* Copy into target, possibly by way of a register.  */
+  if (!REG_P (target))
+    {
+      emit_move_insn (tmp, unspec);
+      unspec = tmp;
+    }
+
+  emit_move_insn (target, unspec);
+}
+
  /* Expand an Altivec constant permutation.  Return true if we match
     an efficient implementation; false to fall back to VPERM.  */
  
diff --git a/gcc/config/rs6000/vector.md b/gcc/config/rs6000/vector.md

index cbb1f4f8e7246a6c142fa2503d3729a65189e5b3..e88d879591e3e27b755dfc002fb17673444c384c 100644 (file)
--- a/gcc/config/rs6000/vector.md
+++ b/gcc/config/rs6000/vector.md
@@ -950,8 +950,15 @@
      emit_insn (gen_altivec_vperm_<mode> (operands[0], operands[1],
                                          operands[2], operands[3]));
    else
-    emit_insn (gen_altivec_vperm_<mode> (operands[0], operands[2],
-                                        operands[1], operands[3]));
+    {
+      /* Avoid the "subtract from splat31" workaround for vperm since
+         we have changed lvsr to lvsl instead.  */
+      rtx unspec = gen_rtx_UNSPEC (<MODE>mode,
+                                   gen_rtvec (3, operands[2],
+                                              operands[1], operands[3]),
+                                   UNSPEC_VPERM);
+      emit_move_insn (operands[0], unspec);
+    }
    DONE;
  })
author	Bill Schmidt <wschmidt@linux.vnet.ibm.com>
	Fri, 11 Oct 2013 18:52:07 +0000 (18:52 +0000)
committer	William Schmidt <wschmidt@gcc.gnu.org>
	Fri, 11 Oct 2013 18:52:07 +0000 (18:52 +0000)
gcc/ChangeLog		patch \| blob \| blame \| history
gcc/config/rs6000/altivec.md		patch \| blob \| blame \| history
gcc/config/rs6000/rs6000-protos.h		patch \| blob \| blame \| history
gcc/config/rs6000/rs6000.c		patch \| blob \| blame \| history
gcc/config/rs6000/vector.md		patch \| blob \| blame \| history