]> git.ipfire.org Git - thirdparty/gcc.git/commitdiff
i386: Emulate MMX punpcklXX/punpckhXX with SSE punpcklXX
authorhjl <hjl@138bc75d-0d04-0410-961f-82ee72b054a4>
Wed, 15 May 2019 15:05:07 +0000 (15:05 +0000)
committerhjl <hjl@138bc75d-0d04-0410-961f-82ee72b054a4>
Wed, 15 May 2019 15:05:07 +0000 (15:05 +0000)
Emulate MMX punpcklXX/punpckhXX with SSE punpcklXX.  For MMX punpckhXX,
move bits 64:127 to bits 0:63 in SSE register.  Only SSE register source
operand is allowed.

PR target/89021
* config/i386/i386-expand.c (ix86_split_mmx_punpck): New function.
* config/i386/i386-protos.h (ix86_split_mmx_punpck): New
prototype.
* config/i386/mmx.m (mmx_punpckhbw): Changed to
define_insn_and_split to support SSE emulation.
(mmx_punpcklbw): Likewise.
(mmx_punpckhwd): Likewise.
(mmx_punpcklwd): Likewise.
(mmx_punpckhdq): Likewise.
(mmx_punpckldq): Likewise.

git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@271216 138bc75d-0d04-0410-961f-82ee72b054a4

gcc/ChangeLog
gcc/config/i386/i386-expand.c
gcc/config/i386/i386-protos.h
gcc/config/i386/mmx.md

index 01783a9b773539a4c780d561203ad4233be65084..3e5e2d2439c2b5917e9038b975557e931fedf550 100644 (file)
@@ -1,3 +1,17 @@
+2019-05-15  H.J. Lu  <hongjiu.lu@intel.com>
+
+       PR target/89021
+       * config/i386/i386-expand.c (ix86_split_mmx_punpck): New function.
+       * config/i386/i386-protos.h (ix86_split_mmx_punpck): New
+       prototype.
+       * config/i386/mmx.m (mmx_punpckhbw): Changed to
+       define_insn_and_split to support SSE emulation.
+       (mmx_punpcklbw): Likewise.
+       (mmx_punpckhwd): Likewise.
+       (mmx_punpcklwd): Likewise.
+       (mmx_punpckhdq): Likewise.
+       (mmx_punpckldq): Likewise.
+
 2019-05-15  H.J. Lu  <hongjiu.lu@intel.com>
            Uros Bizjak  <ubizjak@gmail.com>
 
index f1e05937f46dd1c5c3887cb971251922cc036654..81300f6f2c73a5cc4c1705c508235d36e4791bb0 100644 (file)
@@ -716,6 +716,83 @@ ix86_split_mmx_pack (rtx operands[], enum rtx_code code)
   ix86_move_vector_high_sse_to_mmx (op0);
 }
 
+/* Split MMX punpcklXX/punpckhXX with SSE punpcklXX.  */
+
+void
+ix86_split_mmx_punpck (rtx operands[], bool high_p)
+{
+  rtx op0 = operands[0];
+  rtx op1 = operands[1];
+  rtx op2 = operands[2];
+  machine_mode mode = GET_MODE (op0);
+  rtx mask;
+  /* The corresponding SSE mode.  */
+  machine_mode sse_mode, double_sse_mode;
+
+  switch (mode)
+    {
+    case E_V8QImode:
+      sse_mode = V16QImode;
+      double_sse_mode = V32QImode;
+      mask = gen_rtx_PARALLEL (VOIDmode,
+                              gen_rtvec (16,
+                                         GEN_INT (0), GEN_INT (16),
+                                         GEN_INT (1), GEN_INT (17),
+                                         GEN_INT (2), GEN_INT (18),
+                                         GEN_INT (3), GEN_INT (19),
+                                         GEN_INT (4), GEN_INT (20),
+                                         GEN_INT (5), GEN_INT (21),
+                                         GEN_INT (6), GEN_INT (22),
+                                         GEN_INT (7), GEN_INT (23)));
+      break;
+
+    case E_V4HImode:
+      sse_mode = V8HImode;
+      double_sse_mode = V16HImode;
+      mask = gen_rtx_PARALLEL (VOIDmode,
+                              gen_rtvec (8,
+                                         GEN_INT (0), GEN_INT (8),
+                                         GEN_INT (1), GEN_INT (9),
+                                         GEN_INT (2), GEN_INT (10),
+                                         GEN_INT (3), GEN_INT (11)));
+      break;
+
+    case E_V2SImode:
+      sse_mode = V4SImode;
+      double_sse_mode = V8SImode;
+      mask = gen_rtx_PARALLEL (VOIDmode,
+                              gen_rtvec (4,
+                                         GEN_INT (0), GEN_INT (4),
+                                         GEN_INT (1), GEN_INT (5)));
+      break;
+
+    default:
+      gcc_unreachable ();
+    }
+
+  /* Generate SSE punpcklXX.  */
+  rtx dest = lowpart_subreg (sse_mode, op0, GET_MODE (op0));
+  op1 = lowpart_subreg (sse_mode, op1, GET_MODE (op1));
+  op2 = lowpart_subreg (sse_mode, op2, GET_MODE (op2));
+
+  op1 = gen_rtx_VEC_CONCAT (double_sse_mode, op1, op2);
+  op2 = gen_rtx_VEC_SELECT (sse_mode, op1, mask);
+  rtx insn = gen_rtx_SET (dest, op2);
+  emit_insn (insn);
+
+  if (high_p)
+    {
+      /* Move bits 64:127 to bits 0:63.  */
+      mask = gen_rtx_PARALLEL (VOIDmode,
+                              gen_rtvec (4, GEN_INT (2), GEN_INT (3),
+                                         GEN_INT (0), GEN_INT (0)));
+      dest = lowpart_subreg (V4SImode, dest, GET_MODE (dest));
+      op1 = gen_rtx_VEC_SELECT (V4SImode, dest, mask);
+      insn = gen_rtx_SET (dest, op1);
+      emit_insn (insn);
+    }
+}
+
 /* Helper function of ix86_fixup_binary_operands to canonicalize
    operand order.  Returns true if the operands should be swapped.  */
 
index 760f530bf503ab8655c69938f99f3ea4c7b5508c..b9de1e7983e5b9994b7a5477019688cc53dcb312 100644 (file)
@@ -202,6 +202,7 @@ extern rtx ix86_split_stack_guard (void);
 
 extern void ix86_move_vector_high_sse_to_mmx (rtx);
 extern void ix86_split_mmx_pack (rtx[], enum rtx_code);
+extern void ix86_split_mmx_punpck (rtx[], bool);
 
 #ifdef TREE_CODE
 extern void init_cumulative_args (CUMULATIVE_ARGS *, tree, rtx, tree, int);
index 28c9aa744ed3b5f02f49dd30b243de634fd16097..b3fc7f3e67b4cc9c30255f43c229aa34964fac61 100644 (file)
    (set_attr "type" "mmxshft,sselog,sselog")
    (set_attr "mode" "DI,TI,TI")])
 
-(define_insn "mmx_punpckhbw"
-  [(set (match_operand:V8QI 0 "register_operand" "=y")
+(define_insn_and_split "mmx_punpckhbw"
+  [(set (match_operand:V8QI 0 "register_operand" "=y,x,Yv")
        (vec_select:V8QI
          (vec_concat:V16QI
-           (match_operand:V8QI 1 "register_operand" "0")
-           (match_operand:V8QI 2 "nonimmediate_operand" "ym"))
+           (match_operand:V8QI 1 "register_operand" "0,0,Yv")
+           (match_operand:V8QI 2 "register_mmxmem_operand" "ym,x,Yv"))
           (parallel [(const_int 4) (const_int 12)
                      (const_int 5) (const_int 13)
                      (const_int 6) (const_int 14)
                      (const_int 7) (const_int 15)])))]
-  "TARGET_MMX"
-  "punpckhbw\t{%2, %0|%0, %2}"
-  [(set_attr "type" "mmxcvt")
-   (set_attr "mode" "DI")])
+  "TARGET_MMX || TARGET_MMX_WITH_SSE"
+  "@
+   punpckhbw\t{%2, %0|%0, %2}
+   #
+   #"
+  "TARGET_MMX_WITH_SSE && reload_completed"
+  [(const_int 0)]
+  "ix86_split_mmx_punpck (operands, true); DONE;"
+  [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
+   (set_attr "type" "mmxcvt,sselog,sselog")
+   (set_attr "mode" "DI,TI,TI")])
 
-(define_insn "mmx_punpcklbw"
-  [(set (match_operand:V8QI 0 "register_operand" "=y")
+(define_insn_and_split "mmx_punpcklbw"
+  [(set (match_operand:V8QI 0 "register_operand" "=y,x,Yv")
        (vec_select:V8QI
          (vec_concat:V16QI
-           (match_operand:V8QI 1 "register_operand" "0")
-           (match_operand:V8QI 2 "nonimmediate_operand" "ym"))
+           (match_operand:V8QI 1 "register_operand" "0,0,Yv")
+           (match_operand:V8QI 2 "register_mmxmem_operand" "ym,x,Yv"))
           (parallel [(const_int 0) (const_int 8)
                      (const_int 1) (const_int 9)
                      (const_int 2) (const_int 10)
                      (const_int 3) (const_int 11)])))]
-  "TARGET_MMX"
-  "punpcklbw\t{%2, %0|%0, %k2}"
-  [(set_attr "type" "mmxcvt")
-   (set_attr "mode" "DI")])
+  "TARGET_MMX || TARGET_MMX_WITH_SSE"
+  "@
+   punpcklbw\t{%2, %0|%0, %k2}
+   #
+   #"
+  "TARGET_MMX_WITH_SSE && reload_completed"
+  [(const_int 0)]
+  "ix86_split_mmx_punpck (operands, false); DONE;"
+  [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
+   (set_attr "type" "mmxcvt,sselog,sselog")
+   (set_attr "mode" "DI,TI,TI")])
 
-(define_insn "mmx_punpckhwd"
-  [(set (match_operand:V4HI 0 "register_operand" "=y")
+(define_insn_and_split "mmx_punpckhwd"
+  [(set (match_operand:V4HI 0 "register_operand" "=y,x,Yv")
        (vec_select:V4HI
          (vec_concat:V8HI
-           (match_operand:V4HI 1 "register_operand" "0")
-           (match_operand:V4HI 2 "nonimmediate_operand" "ym"))
+           (match_operand:V4HI 1 "register_operand" "0,0,Yv")
+           (match_operand:V4HI 2 "register_mmxmem_operand" "ym,x,Yv"))
           (parallel [(const_int 2) (const_int 6)
                      (const_int 3) (const_int 7)])))]
-  "TARGET_MMX"
-  "punpckhwd\t{%2, %0|%0, %2}"
-  [(set_attr "type" "mmxcvt")
-   (set_attr "mode" "DI")])
+  "TARGET_MMX || TARGET_MMX_WITH_SSE"
+  "@
+   punpckhwd\t{%2, %0|%0, %2}
+   #
+   #"
+  "TARGET_MMX_WITH_SSE && reload_completed"
+  [(const_int 0)]
+  "ix86_split_mmx_punpck (operands, true); DONE;"
+  [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
+   (set_attr "type" "mmxcvt,sselog,sselog")
+   (set_attr "mode" "DI,TI,TI")])
 
-(define_insn "mmx_punpcklwd"
-  [(set (match_operand:V4HI 0 "register_operand" "=y")
+(define_insn_and_split "mmx_punpcklwd"
+  [(set (match_operand:V4HI 0 "register_operand" "=y,x,Yv")
        (vec_select:V4HI
          (vec_concat:V8HI
-           (match_operand:V4HI 1 "register_operand" "0")
-           (match_operand:V4HI 2 "nonimmediate_operand" "ym"))
+           (match_operand:V4HI 1 "register_operand" "0,0,Yv")
+           (match_operand:V4HI 2 "register_mmxmem_operand" "ym,x,Yv"))
           (parallel [(const_int 0) (const_int 4)
                      (const_int 1) (const_int 5)])))]
-  "TARGET_MMX"
-  "punpcklwd\t{%2, %0|%0, %k2}"
-  [(set_attr "type" "mmxcvt")
-   (set_attr "mode" "DI")])
+  "TARGET_MMX || TARGET_MMX_WITH_SSE"
+  "@
+   punpcklwd\t{%2, %0|%0, %k2}
+   #
+   #"
+  "TARGET_MMX_WITH_SSE && reload_completed"
+  [(const_int 0)]
+  "ix86_split_mmx_punpck (operands, false); DONE;"
+  [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
+   (set_attr "type" "mmxcvt,sselog,sselog")
+   (set_attr "mode" "DI,TI,TI")])
 
-(define_insn "mmx_punpckhdq"
-  [(set (match_operand:V2SI 0 "register_operand" "=y")
+(define_insn_and_split "mmx_punpckhdq"
+  [(set (match_operand:V2SI 0 "register_operand" "=y,x,Yv")
        (vec_select:V2SI
          (vec_concat:V4SI
-           (match_operand:V2SI 1 "register_operand" "0")
-           (match_operand:V2SI 2 "nonimmediate_operand" "ym"))
+           (match_operand:V2SI 1 "register_operand" "0,0,Yv")
+           (match_operand:V2SI 2 "register_mmxmem_operand" "ym,x,Yv"))
          (parallel [(const_int 1)
                     (const_int 3)])))]
-  "TARGET_MMX"
-  "punpckhdq\t{%2, %0|%0, %2}"
-  [(set_attr "type" "mmxcvt")
-   (set_attr "mode" "DI")])
+  "TARGET_MMX || TARGET_MMX_WITH_SSE"
+  "@
+   punpckhdq\t{%2, %0|%0, %2}
+   #
+   #"
+  "TARGET_MMX_WITH_SSE && reload_completed"
+  [(const_int 0)]
+  "ix86_split_mmx_punpck (operands, true); DONE;"
+  [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
+   (set_attr "type" "mmxcvt,sselog,sselog")
+   (set_attr "mode" "DI,TI,TI")])
 
-(define_insn "mmx_punpckldq"
-  [(set (match_operand:V2SI 0 "register_operand" "=y")
+(define_insn_and_split "mmx_punpckldq"
+  [(set (match_operand:V2SI 0 "register_operand" "=y,x,Yv")
        (vec_select:V2SI
          (vec_concat:V4SI
-           (match_operand:V2SI 1 "register_operand" "0")
-           (match_operand:V2SI 2 "nonimmediate_operand" "ym"))
+           (match_operand:V2SI 1 "register_operand" "0,0,Yv")
+           (match_operand:V2SI 2 "register_mmxmem_operand" "ym,x,Yv"))
          (parallel [(const_int 0)
                     (const_int 2)])))]
-  "TARGET_MMX"
-  "punpckldq\t{%2, %0|%0, %k2}"
-  [(set_attr "type" "mmxcvt")
-   (set_attr "mode" "DI")])
+  "TARGET_MMX || TARGET_MMX_WITH_SSE"
+  "@
+   punpckldq\t{%2, %0|%0, %k2}
+   #
+   #"
+  "TARGET_MMX_WITH_SSE && reload_completed"
+  [(const_int 0)]
+  "ix86_split_mmx_punpck (operands, false); DONE;"
+  [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
+   (set_attr "type" "mmxcvt,sselog,sselog")
+   (set_attr "mode" "DI,TI,TI")])
 
 (define_expand "mmx_pinsrw"
   [(set (match_operand:V4HI 0 "register_operand")