]> git.ipfire.org Git - thirdparty/gcc.git/commitdiff
i386-protos.h (ix86_expand_sse4_unpack): New.
authorH.J. Lu <hongjiu.lu@intel.com>
Sat, 26 May 2007 14:34:21 +0000 (14:34 +0000)
committerH.J. Lu <hjl@gcc.gnu.org>
Sat, 26 May 2007 14:34:21 +0000 (07:34 -0700)
2007-05-26  H.J. Lu  <hongjiu.lu@intel.com>

* config/i386/i386-protos.h (ix86_expand_sse4_unpack): New.

* config/i386/i386.c (ix86_expand_sse4_unpack): New.

* config/i386/sse.md (vec_unpacku_hi_v16qi): Call
ix86_expand_sse4_unpack if SSE4.1 is enabled.
(vec_unpacks_hi_v16qi): Likewise.
(vec_unpacku_lo_v16qi): Likewise.
(vec_unpacks_lo_v16qi): Likewise.
(vec_unpacku_hi_v8hi): Likewise.
(vec_unpacks_hi_v8hi): Likewise.
(vec_unpacku_lo_v8hi): Likewise.
(vec_unpacks_lo_v8hi): Likewise.
(vec_unpacku_hi_v4si): Likewise.
(vec_unpacks_hi_v4si): Likewise.
(vec_unpacku_lo_v4si): Likewise.
(vec_unpacks_lo_v4si): Likewise.

From-SVN: r125093

gcc/ChangeLog
gcc/config/i386/i386-protos.h
gcc/config/i386/i386.c
gcc/config/i386/sse.md

index 85ddc22a55e73b60b51d4e72d2d7e9f85171fecf..cdd0b84ffc9aaab6173dba2378d9bebaa7edbd4c 100644 (file)
@@ -1,3 +1,23 @@
+2007-05-26  H.J. Lu  <hongjiu.lu@intel.com>
+
+       * config/i386/i386-protos.h (ix86_expand_sse4_unpack): New.
+
+       * config/i386/i386.c (ix86_expand_sse4_unpack): New.
+
+       * config/i386/sse.md (vec_unpacku_hi_v16qi): Call
+       ix86_expand_sse4_unpack if SSE4.1 is enabled.
+       (vec_unpacks_hi_v16qi): Likewise.
+       (vec_unpacku_lo_v16qi): Likewise.
+       (vec_unpacks_lo_v16qi): Likewise.
+       (vec_unpacku_hi_v8hi): Likewise.
+       (vec_unpacks_hi_v8hi): Likewise.
+       (vec_unpacku_lo_v8hi): Likewise.
+       (vec_unpacks_lo_v8hi): Likewise.
+       (vec_unpacku_hi_v4si): Likewise.
+       (vec_unpacks_hi_v4si): Likewise.
+       (vec_unpacku_lo_v4si): Likewise.
+       (vec_unpacks_lo_v4si): Likewise.
+
 2007-05-26  Kazu Hirata  <kazu@codesourcery.com>
 
        * c-typeck.c, config/arm/arm.c, config/darwin.c,
index 2f320391943feeebf79bb72c6e8ed5c5e5fd2c32..60b495582aa0f7c25bd2e1027bea9a77e10c8aca 100644 (file)
@@ -112,6 +112,7 @@ extern int ix86_expand_fp_movcc (rtx[]);
 extern bool ix86_expand_fp_vcond (rtx[]);
 extern bool ix86_expand_int_vcond (rtx[]);
 extern void ix86_expand_sse_unpack (rtx[], bool, bool);
+extern void ix86_expand_sse4_unpack (rtx[], bool, bool);
 extern int ix86_expand_int_addcc (rtx[]);
 extern void ix86_expand_call (rtx, rtx, rtx, rtx, rtx, int);
 extern void x86_initialize_trampoline (rtx, rtx, rtx);
index 7bc5fe04c8941272da2cecc96ec1d812a3244414..b0db9504e3d40387e160c7682a11071fb1bcd1e1 100644 (file)
@@ -12843,6 +12843,55 @@ ix86_expand_sse_unpack (rtx operands[2], bool unsigned_p, bool high_p)
   emit_insn (unpack (dest, operands[1], se));
 }
 
+/* This function performs the same task as ix86_expand_sse_unpack,
+   but with SSE4.1 instructions.  */
+
+void
+ix86_expand_sse4_unpack (rtx operands[2], bool unsigned_p, bool high_p)
+{
+  enum machine_mode imode = GET_MODE (operands[1]);
+  rtx (*unpack)(rtx, rtx);
+  rtx src, dest;
+
+  switch (imode)
+    {
+    case V16QImode:
+      if (unsigned_p)
+       unpack = gen_sse4_1_zero_extendv8qiv8hi2;
+      else
+       unpack = gen_sse4_1_extendv8qiv8hi2;
+      break;
+    case V8HImode:
+      if (unsigned_p)
+       unpack = gen_sse4_1_zero_extendv4hiv4si2;
+      else
+       unpack = gen_sse4_1_extendv4hiv4si2;
+      break;
+    case V4SImode:
+      if (unsigned_p)
+       unpack = gen_sse4_1_zero_extendv2siv2di2;
+      else
+       unpack = gen_sse4_1_extendv2siv2di2;
+      break;
+    default:
+      gcc_unreachable ();
+    }
+
+  dest = operands[0];
+  if (high_p)
+    {
+      /* Shift higher 8 bytes to lower 8 bytes.  */
+      src = gen_reg_rtx (imode);
+      emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, src),
+                                  gen_lowpart (TImode, operands[1]),
+                                  GEN_INT (64)));
+    }
+  else
+    src = operands[1];
+
+  emit_insn (unpack (dest, src));
+}
+
 /* Expand conditional increment or decrement using adb/sbb instructions.
    The default case using setcc followed by the conditional move can be
    done by generic code.  */
index 800807cde17c146fc66e3ed4661b0bead5363b96..81ff9258ef9f270b6b4bf65e5e4ef6f2951fea5f 100644 (file)
    (match_operand:V16QI 1 "register_operand" "")]
   "TARGET_SSE2"
 {
-  ix86_expand_sse_unpack (operands, true, true);
+  if (TARGET_SSE4_1)
+    ix86_expand_sse4_unpack (operands, true, true);
+  else
+    ix86_expand_sse_unpack (operands, true, true);
   DONE;
 })
 
    (match_operand:V16QI 1 "register_operand" "")]
   "TARGET_SSE2"
 {
-  ix86_expand_sse_unpack (operands, false, true);
+  if (TARGET_SSE4_1)
+    ix86_expand_sse4_unpack (operands, false, true);
+  else
+    ix86_expand_sse_unpack (operands, false, true);
   DONE;
 })
 
    (match_operand:V16QI 1 "register_operand" "")]
   "TARGET_SSE2"
 {
-  ix86_expand_sse_unpack (operands, true, false);
+  if (TARGET_SSE4_1)
+    ix86_expand_sse4_unpack (operands, true, false);
+  else
+    ix86_expand_sse_unpack (operands, true, false);
   DONE;
 })
 
    (match_operand:V16QI 1 "register_operand" "")]
   "TARGET_SSE2"
 {
-  ix86_expand_sse_unpack (operands, false, false);
+  if (TARGET_SSE4_1)
+    ix86_expand_sse4_unpack (operands, false, false);
+  else
+    ix86_expand_sse_unpack (operands, false, false);
   DONE;
 })
 
    (match_operand:V8HI 1 "register_operand" "")]
   "TARGET_SSE2"
 {
-  ix86_expand_sse_unpack (operands, true, true);
+  if (TARGET_SSE4_1)
+    ix86_expand_sse4_unpack (operands, true, true);
+  else
+    ix86_expand_sse_unpack (operands, true, true);
   DONE;
 })
 
    (match_operand:V8HI 1 "register_operand" "")]
   "TARGET_SSE2"
 {
-  ix86_expand_sse_unpack (operands, false, true);
+  if (TARGET_SSE4_1)
+    ix86_expand_sse4_unpack (operands, false, true);
+  else
+    ix86_expand_sse_unpack (operands, false, true);
   DONE;
 })
 
    (match_operand:V8HI 1 "register_operand" "")]
   "TARGET_SSE2"
 {
-  ix86_expand_sse_unpack (operands, true, false);
+  if (TARGET_SSE4_1)
+    ix86_expand_sse4_unpack (operands, true, false);
+  else
+    ix86_expand_sse_unpack (operands, true, false);
   DONE;
 })
 
    (match_operand:V8HI 1 "register_operand" "")]
   "TARGET_SSE2"
 {
-  ix86_expand_sse_unpack (operands, false, false);
+  if (TARGET_SSE4_1)
+    ix86_expand_sse4_unpack (operands, false, false);
+  else
+    ix86_expand_sse_unpack (operands, false, false);
   DONE;
 })
 
    (match_operand:V4SI 1 "register_operand" "")]
   "TARGET_SSE2"
 {
-  ix86_expand_sse_unpack (operands, true, true);
+  if (TARGET_SSE4_1)
+    ix86_expand_sse4_unpack (operands, true, true);
+  else
+    ix86_expand_sse_unpack (operands, true, true);
   DONE;
 })
 
    (match_operand:V4SI 1 "register_operand" "")]
   "TARGET_SSE2"
 {
-  ix86_expand_sse_unpack (operands, false, true);
+  if (TARGET_SSE4_1)
+    ix86_expand_sse4_unpack (operands, false, true);
+  else
+    ix86_expand_sse_unpack (operands, false, true);
   DONE;
 })
 
    (match_operand:V4SI 1 "register_operand" "")]
   "TARGET_SSE2"
 {
-  ix86_expand_sse_unpack (operands, true, false);
+  if (TARGET_SSE4_1)
+    ix86_expand_sse4_unpack (operands, true, false);
+  else
+    ix86_expand_sse_unpack (operands, true, false);
   DONE;
 })
 
    (match_operand:V4SI 1 "register_operand" "")]
   "TARGET_SSE2"
 {
-  ix86_expand_sse_unpack (operands, false, false);
+  if (TARGET_SSE4_1)
+    ix86_expand_sse4_unpack (operands, false, false);
+  else
+    ix86_expand_sse_unpack (operands, false, false);
   DONE;
 })