rs6000.c (altivec_expand_vec_perm_const_le): New.

author Bill Schmidt <wschmidt@linux.vnet.ibm.com>

Mon, 7 Oct 2013 13:01:28 +0000 (13:01 +0000)

committer William Schmidt <wschmidt@gcc.gnu.org>

Mon, 7 Oct 2013 13:01:28 +0000 (13:01 +0000)
author Bill Schmidt <wschmidt@linux.vnet.ibm.com>
Mon, 7 Oct 2013 13:01:28 +0000 (13:01 +0000)
committer William Schmidt <wschmidt@gcc.gnu.org>
Mon, 7 Oct 2013 13:01:28 +0000 (13:01 +0000)
diff --git a/gcc/ChangeLog b/gcc/ChangeLog

index f025d8319685aeea16c86b9e28dde0d78c239b9e..aa0325f0e964b51ffdfabec5dee2a36570fff00e 100644 (file)
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,8 @@
+2013-10-07  Bill Schmidt  <wschmidt@linux.vnet.ibm.com>
+
+       * config/rs6000/rs6000.c (altivec_expand_vec_perm_const_le): New.
+       (altivec_expand_vec_perm_const): Call it.
+
  2013-10-07  Bill Schmidt  <wschmidt@linux.vnet.ibm.com>
  
         * config/rs6000/vector.md (mov<mode>): Emit permuted move
diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c

index a2028e2c33c5fa6f2decd6282dfc347b620652c2..0cf6ed87f0c14a2325c1abc75bd4ca488a4457a5 100644 (file)
--- a/gcc/config/rs6000/rs6000.c
+++ b/gcc/config/rs6000/rs6000.c
@@ -28526,6 +28526,88 @@ rs6000_emit_parity (rtx dst, rtx src)
      }
  }
  
+/* Expand an Altivec constant permutation for little endian mode.
+   There are two issues: First, the two input operands must be
+   swapped so that together they form a double-wide array in LE
+   order.  Second, the vperm instruction has surprising behavior
+   in LE mode:  it interprets the elements of the source vectors
+   in BE mode ("left to right") and interprets the elements of
+   the destination vector in LE mode ("right to left").  To
+   correct for this, we must subtract each element of the permute
+   control vector from 31.
+
+   For example, suppose we want to concatenate vr10 = {0, 1, 2, 3}
+   with vr11 = {4, 5, 6, 7} and extract {0, 2, 4, 6} using a vperm.
+   We place {0,1,2,3,8,9,10,11,16,17,18,19,24,25,26,27} in vr12 to
+   serve as the permute control vector.  Then, in BE mode,
+
+     vperm 9,10,11,12
+
+   places the desired result in vr9.  However, in LE mode the 
+   vector contents will be
+
+     vr10 = 00000003 00000002 00000001 00000000
+     vr11 = 00000007 00000006 00000005 00000004
+
+   The result of the vperm using the same permute control vector is
+
+     vr9  = 05000000 07000000 01000000 03000000
+
+   That is, the leftmost 4 bytes of vr10 are interpreted as the
+   source for the rightmost 4 bytes of vr9, and so on.
+
+   If we change the permute control vector to
+
+     vr12 = {31,20,29,28,23,22,21,20,15,14,13,12,7,6,5,4}
+
+   and issue
+
+     vperm 9,11,10,12
+
+   we get the desired
+
+   vr9  = 00000006 00000004 00000002 00000000.  */
+
+void
+altivec_expand_vec_perm_const_le (rtx operands[4])
+{
+  unsigned int i;
+  rtx perm[16];
+  rtx constv, unspec;
+  rtx target = operands[0];
+  rtx op0 = operands[1];
+  rtx op1 = operands[2];
+  rtx sel = operands[3];
+
+  /* Unpack and adjust the constant selector.  */
+  for (i = 0; i < 16; ++i)
+    {
+      rtx e = XVECEXP (sel, 0, i);
+      unsigned int elt = 31 - (INTVAL (e) & 31);
+      perm[i] = GEN_INT (elt);
+    }
+
+  /* Expand to a permute, swapping the inputs and using the
+     adjusted selector.  */
+  if (!REG_P (op0))
+    op0 = force_reg (V16QImode, op0);
+  if (!REG_P (op1))
+    op1 = force_reg (V16QImode, op1);
+
+  constv = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, perm));
+  constv = force_reg (V16QImode, constv);
+  unspec = gen_rtx_UNSPEC (V16QImode, gen_rtvec (3, op1, op0, constv),
+                          UNSPEC_VPERM);
+  if (!REG_P (target))
+    {
+      rtx tmp = gen_reg_rtx (V16QImode);
+      emit_move_insn (tmp, unspec);
+      unspec = tmp;
+    }
+
+  emit_move_insn (target, unspec);
+}
+
  /* Expand an Altivec constant permutation.  Return true if we match
     an efficient implementation; false to fall back to VPERM.  */
  
@@ -28706,6 +28788,12 @@ altivec_expand_vec_perm_const (rtx operands[4])
         }
      }
  
+  if (!BYTES_BIG_ENDIAN)
+    {
+      altivec_expand_vec_perm_const_le (operands);
+      return true;
+    }
+
    return false;
  }
author	Bill Schmidt <wschmidt@linux.vnet.ibm.com>
	Mon, 7 Oct 2013 13:01:28 +0000 (13:01 +0000)
committer	William Schmidt <wschmidt@gcc.gnu.org>
	Mon, 7 Oct 2013 13:01:28 +0000 (13:01 +0000)
gcc/ChangeLog		patch \| blob \| blame \| history
gcc/config/rs6000/rs6000.c		patch \| blob \| blame \| history