]> git.ipfire.org Git - thirdparty/gcc.git/commitdiff
re PR target/54349 (_mm_cvtsi128_si64 unnecessary stores value at stack)
authorUros Bizjak <ubizjak@gmail.com>
Mon, 29 Apr 2013 11:00:10 +0000 (13:00 +0200)
committerUros Bizjak <uros@gcc.gnu.org>
Mon, 29 Apr 2013 11:00:10 +0000 (13:00 +0200)
PR target/54349
* config/i386/i386.h (enum ix86_tune_indices)
<X86_TUNE_INTER_UNIT_MOVES_TO_VEC, X86_TUNE_INTER_UNIT_MOVES_FROM_VEC>:
New, split from X86_TUNE_INTER_UNIT_MOVES.
<X86_TUNE_INTER_UNIT_MOVES>: Remove.
(TARGET_INTER_UNIT_MOVES_TO_VEC): New define.
(TARGET_INTER_UNIT_MOVES_FROM_VEC): Ditto.
(TARGET_INTER_UNIT_MOVES): Remove.
* config/i386/i386.c (initial_ix86_tune_features): Update.
Disable X86_TUNE_INTER_UNIT_MOVES_FROM_VEC for m_ATHLON_K8 only.
(ix86_expand_convert_uns_didf_sse): Use
TARGET_INTER_UNIT_MOVES_TO_VEC instead of TARGET_INTER_UNIT_MOVES.
(ix86_expand_vector_init_one_nonzero): Ditto.
(ix86_expand_vector_init_interleave): Ditto.
(inline_secondary_memory_needed): Return true for moves from SSE class
registers for !TARGET_INTER_UNIT_MOVES_FROM_VEC targets and for moves
to SSE class registers for !TARGET_INTER_UNIT_MOVES_TO_VEC targets.
* config/i386/constraints.md (Yi, Ym): Depend on
TARGET_INTER_UNIT_MOVES_TO_VEC.
(Yj, Yn): New constraints.
* config/i386/i386.md (*movdi_internal): Change constraints of
operand 1 from Yi to Yj and from Ym to Yn.
(*movsi_internal): Ditto.
(*movdf_internal): Ditto.
(*movsf_internal): Ditto.
(*float<SWI48x:mode><X87MODEF:mode>2_1): Use
TARGET_INTER_UNIT_MOVES_TO_VEC instead of TARGET_INTER_UNIT_MOVES.
(*float<SWI48x:mode><X87MODEF:mode>2_1 splitters): Ditto.
(floatdi<X87MODEF:mode>2_i387_with_xmm): Ditto.
(floatdi<X87MODEF:mode>2_i387_with_xmm splitters): Ditto.
* config/i386/sse.md (movdi_to_sse): Ditto.
(sse2_stored): Change constraint of operand 1 from Yi to Yj.
Use TARGET_INTER_UNIT_MOVES_FROM_VEC instead of
TARGET_INTER_UNIT_MOVES.
(sse_storeq_rex64): Change constraint of operand 1 from Yi to Yj.
(sse_storeq_rex64 splitter): Use TARGET_INTER_UNIT_MOVES_FROM_VEC
instead of TARGET_INTER_UNIT_MOVES.
* config/i386/mmx.md (*mov<mode>_internal): Change constraint of
operand 1 from Yi to Yj and from Ym to Yn.

From-SVN: r198401

gcc/ChangeLog
gcc/config/i386/constraints.md
gcc/config/i386/i386.c
gcc/config/i386/i386.h
gcc/config/i386/i386.md
gcc/config/i386/mmx.md
gcc/config/i386/sse.md

index 5f0e495ef6608d843d04a254cfadc9c769f2020f..2ee4a0ce1686562648c1bdac4eb3167eff8191e9 100644 (file)
@@ -1,3 +1,45 @@
+2013-04-29  Uros Bizjak  <ubizjak@gmail.com>
+
+       PR target/54349
+       * config/i386/i386.h (enum ix86_tune_indices)
+       <X86_TUNE_INTER_UNIT_MOVES_TO_VEC, X86_TUNE_INTER_UNIT_MOVES_FROM_VEC>:
+       New, split from X86_TUNE_INTER_UNIT_MOVES.
+       <X86_TUNE_INTER_UNIT_MOVES>: Remove.
+       (TARGET_INTER_UNIT_MOVES_TO_VEC): New define.
+       (TARGET_INTER_UNIT_MOVES_FROM_VEC): Ditto.
+       (TARGET_INTER_UNIT_MOVES): Remove.
+       * config/i386/i386.c (initial_ix86_tune_features): Update.
+       Disable X86_TUNE_INTER_UNIT_MOVES_FROM_VEC for m_ATHLON_K8 only.
+       (ix86_expand_convert_uns_didf_sse): Use
+       TARGET_INTER_UNIT_MOVES_TO_VEC instead of TARGET_INTER_UNIT_MOVES.
+       (ix86_expand_vector_init_one_nonzero): Ditto.
+       (ix86_expand_vector_init_interleave): Ditto.
+       (inline_secondary_memory_needed): Return true for moves from SSE class
+       registers for !TARGET_INTER_UNIT_MOVES_FROM_VEC targets and for moves
+       to SSE class registers for !TARGET_INTER_UNIT_MOVES_TO_VEC targets.
+       * config/i386/constraints.md (Yi, Ym): Depend on
+       TARGET_INTER_UNIT_MOVES_TO_VEC.
+       (Yj, Yn): New constraints.
+       * config/i386/i386.md (*movdi_internal): Change constraints of
+       operand 1 from Yi to Yj and from Ym to Yn.
+       (*movsi_internal): Ditto.
+       (*movdf_internal): Ditto.
+       (*movsf_internal): Ditto.
+       (*float<SWI48x:mode><X87MODEF:mode>2_1): Use
+       TARGET_INTER_UNIT_MOVES_TO_VEC instead of TARGET_INTER_UNIT_MOVES.
+       (*float<SWI48x:mode><X87MODEF:mode>2_1 splitters): Ditto.
+       (floatdi<X87MODEF:mode>2_i387_with_xmm): Ditto.
+       (floatdi<X87MODEF:mode>2_i387_with_xmm splitters): Ditto.
+       * config/i386/sse.md (movdi_to_sse): Ditto.
+       (sse2_stored): Change constraint of operand 1 from Yi to Yj.
+       Use TARGET_INTER_UNIT_MOVES_FROM_VEC instead of
+       TARGET_INTER_UNIT_MOVES.
+       (sse_storeq_rex64): Change constraint of operand 1 from Yi to Yj.
+       (sse_storeq_rex64 splitter): Use TARGET_INTER_UNIT_MOVES_FROM_VEC
+       instead of TARGET_INTER_UNIT_MOVES.
+       * config/i386/mmx.md (*mov<mode>_internal): Change constraint of
+       operand 1 from Yi to Yj and from Ym to Yn.
+
 2013-04-29  James Greenhalgh  <james.greenhalgh@arm.com>
 
        * config/aarch64/aarch64-simd-builtins.def (vec_unpacks_hi_): New.
index ec7c856c1acb566561ad36cab80c8f3574ce6110..6cb53b8aafbc447066ecd6dea37f1ed8b1657b1f 100644 (file)
 
 ;; We use the Y prefix to denote any number of conditional register sets:
 ;;  z  First SSE register.
-;;  i  SSE2 inter-unit moves enabled
-;;  m  MMX inter-unit moves enabled
+;;  i  SSE2 inter-unit moves to SSE register enabled
+;;  j  SSE2 inter-unit moves from SSE register enabled
+;;  m  MMX inter-unit moves to MMX register enabled
+;;  n  MMX inter-unit moves from MMX register enabled
 ;;  a  Integer register when zero extensions with AND are disabled
 ;;  p  Integer register when TARGET_PARTIAL_REG_STALL is disabled
 ;;  d  Integer register when integer DFmode moves are enabled
  "First SSE register (@code{%xmm0}).")
 
 (define_register_constraint "Yi"
- "TARGET_SSE2 && TARGET_INTER_UNIT_MOVES ? SSE_REGS : NO_REGS"
- "@internal Any SSE register, when SSE2 and inter-unit moves are enabled.")
+ "TARGET_SSE2 && TARGET_INTER_UNIT_MOVES_TO_VEC ? SSE_REGS : NO_REGS"
+ "@internal Any SSE register, when SSE2 and inter-unit moves to vector registers are enabled.")
+
+(define_register_constraint "Yj"
+ "TARGET_SSE2 && TARGET_INTER_UNIT_MOVES_FROM_VEC ? SSE_REGS : NO_REGS"
+ "@internal Any SSE register, when SSE2 and inter-unit moves from vector registers are enabled.")
 
 (define_register_constraint "Ym"
- "TARGET_MMX && TARGET_INTER_UNIT_MOVES ? MMX_REGS : NO_REGS"
- "@internal Any MMX register, when inter-unit moves are enabled.")
+ "TARGET_MMX && TARGET_INTER_UNIT_MOVES_TO_VEC ? MMX_REGS : NO_REGS"
+ "@internal Any MMX register, when inter-unit moves to vector registers are enabled.")
+
+(define_register_constraint "Yn"
+ "TARGET_MMX && TARGET_INTER_UNIT_MOVES_FROM_VEC ? MMX_REGS : NO_REGS"
+ "@internal Any MMX register, when inter-unit moves from vector registers are enabled.")
 
 (define_register_constraint "Yp"
  "TARGET_PARTIAL_REG_STALL ? NO_REGS : GENERAL_REGS"
index 06aeecfaa7ce4383fb3ea28285f1df0a4e22a445..7a58a76f08b08b55cc01b7c9444e517a85546eee 100644 (file)
@@ -1931,9 +1931,12 @@ static unsigned int initial_ix86_tune_features[X86_TUNE_LAST] = {
   /* X86_TUNE_USE_FFREEP */
   m_AMD_MULTIPLE,
 
-  /* X86_TUNE_INTER_UNIT_MOVES */
+  /* X86_TUNE_INTER_UNIT_MOVES_TO_VEC */
   ~(m_AMD_MULTIPLE | m_GENERIC),
 
+  /* X86_TUNE_INTER_UNIT_MOVES_FROM_VEC */
+  ~m_ATHLON_K8,
+
   /* X86_TUNE_INTER_UNIT_CONVERSIONS */
   ~(m_AMDFAM10 | m_BDVER ),
 
@@ -17867,7 +17870,7 @@ ix86_expand_convert_uns_didf_sse (rtx target, rtx input)
   rtx x;
 
   int_xmm = gen_reg_rtx (V4SImode);
-  if (TARGET_INTER_UNIT_MOVES)
+  if (TARGET_INTER_UNIT_MOVES_TO_VEC)
     emit_insn (gen_movdi_to_sse (int_xmm, input));
   else if (TARGET_SSE_SPLIT_REGS)
     {
@@ -33668,7 +33671,8 @@ inline_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
 
       /* If the target says that inter-unit moves are more expensive
         than moving through memory, then don't generate them.  */
-      if (!TARGET_INTER_UNIT_MOVES)
+      if ((SSE_CLASS_P (class1) && !TARGET_INTER_UNIT_MOVES_FROM_VEC)
+         || (SSE_CLASS_P (class2) && !TARGET_INTER_UNIT_MOVES_TO_VEC))
        return true;
 
       /* Between SSE and general, we have moves no larger than word size.  */
@@ -35891,9 +35895,8 @@ ix86_expand_vector_init_one_nonzero (bool mmx_ok, enum machine_mode mode,
       /* For SSE4.1, we normally use vector set.  But if the second
         element is zero and inter-unit moves are OK, we use movq
         instead.  */
-      use_vector_set = (TARGET_64BIT
-                       && TARGET_SSE4_1
-                       && !(TARGET_INTER_UNIT_MOVES
+      use_vector_set = (TARGET_64BIT && TARGET_SSE4_1
+                       && !(TARGET_INTER_UNIT_MOVES_TO_VEC
                             && one_var == 0));
       break;
     case V16QImode:
@@ -36428,7 +36431,7 @@ half:
 
       /* Don't use ix86_expand_vector_init_interleave if we can't
         move from GPR to SSE register directly.  */
-      if (!TARGET_INTER_UNIT_MOVES)
+      if (!TARGET_INTER_UNIT_MOVES_TO_VEC)
        break;
 
       n = GET_MODE_NUNITS (mode);
index d0f5f6f72ab5f6bd10c6851a0743817a48dbd3c8..6601567676d800fa0d61266fda63e7eaff564d0c 100644 (file)
@@ -304,7 +304,8 @@ enum ix86_tune_indices {
   X86_TUNE_EPILOGUE_USING_MOVE,
   X86_TUNE_SHIFT1,
   X86_TUNE_USE_FFREEP,
-  X86_TUNE_INTER_UNIT_MOVES,
+  X86_TUNE_INTER_UNIT_MOVES_TO_VEC,
+  X86_TUNE_INTER_UNIT_MOVES_FROM_VEC,
   X86_TUNE_INTER_UNIT_CONVERSIONS,
   X86_TUNE_FOUR_JUMP_LIMIT,
   X86_TUNE_SCHEDULE,
@@ -395,8 +396,11 @@ extern unsigned char ix86_tune_features[X86_TUNE_LAST];
        ix86_tune_features[X86_TUNE_EPILOGUE_USING_MOVE]
 #define TARGET_SHIFT1          ix86_tune_features[X86_TUNE_SHIFT1]
 #define TARGET_USE_FFREEP      ix86_tune_features[X86_TUNE_USE_FFREEP]
-#define TARGET_INTER_UNIT_MOVES        ix86_tune_features[X86_TUNE_INTER_UNIT_MOVES]
-#define TARGET_INTER_UNIT_CONVERSIONS\
+#define TARGET_INTER_UNIT_MOVES_TO_VEC \
+       ix86_tune_features[X86_TUNE_INTER_UNIT_MOVES_TO_VEC]
+#define TARGET_INTER_UNIT_MOVES_FROM_VEC \
+       ix86_tune_features[X86_TUNE_INTER_UNIT_MOVES_FROM_VEC]
+#define TARGET_INTER_UNIT_CONVERSIONS \
        ix86_tune_features[X86_TUNE_INTER_UNIT_CONVERSIONS]
 #define TARGET_FOUR_JUMP_LIMIT ix86_tune_features[X86_TUNE_FOUR_JUMP_LIMIT]
 #define TARGET_SCHEDULE                ix86_tune_features[X86_TUNE_SCHEDULE]
index 759c867c29058e176a2097ae71ffd5fce3ee9d17..f6ffc019ce28de09566f84fa496ebcc5f131670e 100644 (file)
   [(set (match_operand:DI 0 "nonimmediate_operand"
     "=r  ,o  ,r,r  ,r,m ,*y,*y,?*y,?m,?r ,?*Ym,*x,*x,*x,m ,?r ,?*Yi,?*Ym,?*Yi")
        (match_operand:DI 1 "general_operand"
-    "riFo,riF,Z,rem,i,re,C ,*y,m  ,*y,*Ym,r   ,C ,*x,m ,*x,*Yi,r   ,*Yi ,*Ym"))]
+    "riFo,riF,Z,rem,i,re,C ,*y,m  ,*y,*Yn,r   ,C ,*x,m ,*x,*Yj,r   ,*Yj ,*Yn"))]
   "!(MEM_P (operands[0]) && MEM_P (operands[1]))"
 {
   switch (get_attr_type (insn))
   [(set (match_operand:SI 0 "nonimmediate_operand"
                        "=r,m ,*y,*y,?rm,?*y,*x,*x,*x,m ,?r ,?*Yi")
        (match_operand:SI 1 "general_operand"
-                       "g ,re,C ,*y,*y ,rm ,C ,*x,m ,*x,*Yi,r"))]
+                       "g ,re,C ,*y,*y ,rm ,C ,*x,m ,*x,*Yj,r"))]
   "!(MEM_P (operands[0]) && MEM_P (operands[1]))"
 {
   switch (get_attr_type (insn))
   [(set (match_operand:DF 0 "nonimmediate_operand"
     "=Yf*f,m   ,Yf*f,?Yd*r ,!o   ,?r,?m,?r,?r,x,x,x,m,*x,*x,*x,m ,r ,Yi")
        (match_operand:DF 1 "general_operand"
-    "Yf*fm,Yf*f,G   ,Yd*roF,Yd*rF,rm,rC,C ,F ,C,x,m,x,C ,*x,m ,*x,Yi,r"))]
+    "Yf*fm,Yf*f,G   ,Yd*roF,Yd*rF,rm,rC,C ,F ,C,x,m,x,C ,*x,m ,*x,Yj,r"))]
   "!(MEM_P (operands[0]) && MEM_P (operands[1]))
    && (!can_create_pseudo_p ()
        || (ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_LARGE)
   [(set (match_operand:SF 0 "nonimmediate_operand"
          "=Yf*f,m   ,Yf*f,?r ,?m,x,x,x,m,?r,?Yi,!*y,!*y,!m,!r ,!*Ym")
        (match_operand:SF 1 "general_operand"
-         "Yf*fm,Yf*f,G   ,rmF,rF,C,x,m,x,Yi,r  ,*y ,m  ,*y,*Ym,r"))]
+         "Yf*fm,Yf*f,G   ,rmF,rF,C,x,m,x,Yj,r  ,*y ,m  ,*y,*Yn,r"))]
   "!(MEM_P (operands[0]) && MEM_P (operands[1]))
    && (!can_create_pseudo_p ()
        || (ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_LARGE)
   /* Avoid store forwarding (partial memory) stall penalty
      by passing DImode value through XMM registers.  */
   if (<SWI48x:MODE>mode == DImode && !TARGET_64BIT
-      && TARGET_80387 && TARGET_SSE2 && TARGET_INTER_UNIT_MOVES
+      && TARGET_80387 && TARGET_SSE2 && TARGET_INTER_UNIT_MOVES_TO_VEC
       && optimize_function_for_speed_p (cfun))
     {
       emit_insn (gen_floatdi<X87MODEF:mode>2_i387_with_xmm (operands[0],
   if (GET_CODE (op1) == SUBREG)
     op1 = SUBREG_REG (op1);
 
-  if (GENERAL_REG_P (op1) && TARGET_INTER_UNIT_MOVES)
+  if (GENERAL_REG_P (op1) && TARGET_INTER_UNIT_MOVES_TO_VEC)
     {
       operands[4] = simplify_gen_subreg (V4SImode, operands[0], <MODE>mode, 0);
       emit_insn (gen_sse2_loadld (operands[4],
   if (GENERAL_REG_P (op1))
     {
       operands[4] = simplify_gen_subreg (V4SImode, operands[0], <MODE>mode, 0);
-      if (TARGET_INTER_UNIT_MOVES)
+      if (TARGET_INTER_UNIT_MOVES_TO_VEC)
        emit_insn (gen_sse2_loadld (operands[4],
                                    CONST0_RTX (V4SImode), operands[1]));
       else
    (clobber (match_scratch:V4SI 4 "=X,x"))
    (clobber (match_operand:DI 2 "memory_operand" "=X,m"))]
   "TARGET_80387 && X87_ENABLE_FLOAT (<X87MODEF:MODE>mode, DImode)
-   && TARGET_SSE2 && TARGET_INTER_UNIT_MOVES
+   && TARGET_SSE2 && TARGET_INTER_UNIT_MOVES_TO_VEC
    && !TARGET_64BIT && optimize_function_for_speed_p (cfun)"
   "#"
   [(set_attr "type" "multi")
    (clobber (match_scratch:V4SI 4))
    (clobber (match_operand:DI 2 "memory_operand"))]
   "TARGET_80387 && X87_ENABLE_FLOAT (<X87MODEF:MODE>mode, DImode)
-   && TARGET_SSE2 && TARGET_INTER_UNIT_MOVES
+   && TARGET_SSE2 && TARGET_INTER_UNIT_MOVES_TO_VEC
    && !TARGET_64BIT && optimize_function_for_speed_p (cfun)
    && reload_completed"
   [(set (match_dup 2) (match_dup 3))
    (clobber (match_scratch:V4SI 4))
    (clobber (match_operand:DI 2 "memory_operand"))]
   "TARGET_80387 && X87_ENABLE_FLOAT (<X87MODEF:MODE>mode, DImode)
-   && TARGET_SSE2 && TARGET_INTER_UNIT_MOVES
+   && TARGET_SSE2 && TARGET_INTER_UNIT_MOVES_TO_VEC
    && !TARGET_64BIT && optimize_function_for_speed_p (cfun)
    && reload_completed"
   [(set (match_dup 0) (float:X87MODEF (match_dup 1)))])
 
 ;; Avoid store forwarding (partial memory) stall penalty by extending
 ;; SImode value to DImode through XMM register instead of pushing two
-;; SImode values to stack. Note that even !TARGET_INTER_UNIT_MOVES
+;; SImode values to stack. Note that even !TARGET_INTER_UNIT_MOVES_TO_VEC
 ;; targets benefit from this optimization. Also note that fild
 ;; loads from memory only.
 
index fb75d49570a6c857eea196a64a503bd5f3186227..4911cb296e2f45674b8f6d9cbd2df3b7d4198544 100644 (file)
@@ -80,7 +80,7 @@
   [(set (match_operand:MMXMODE 0 "nonimmediate_operand"
     "=r ,o ,r,r ,m ,?!y,!y,?!y,m  ,r   ,?!Ym,x,x,x,m,*x,*x,*x,m ,r ,Yi,!Ym,*Yi")
        (match_operand:MMXMODE 1 "vector_move_operand"
-    "rCo,rC,C,rm,rC,C  ,!y,m  ,?!y,?!Ym,r   ,C,x,m,x,C ,*x,m ,*x,Yi,r ,*Yi,!Ym"))]
+    "rCo,rC,C,rm,rC,C  ,!y,m  ,?!y,?!Yn,r   ,C,x,m,x,C ,*x,m ,*x,Yj,r ,*Yj,!Yn"))]
   "TARGET_MMX
    && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
 {
index f630303ecab28da88fdcd016a144acef315b22b5..354d4c909a70638dc1e8c976d00cf6220a5be5db 100644 (file)
     [(set (match_operand:V4SI 0 "register_operand" "=?x,x")
          (subreg:V4SI (match_operand:DI 1 "nonimmediate_operand" "r,m") 0))
      (clobber (match_scratch:V4SI 2 "=&x,X"))])]
-  "!TARGET_64BIT && TARGET_SSE2 && TARGET_INTER_UNIT_MOVES"
+  "!TARGET_64BIT && TARGET_SSE2 && TARGET_INTER_UNIT_MOVES_TO_VEC"
   "#"
   "&& reload_completed"
   [(const_int 0)]
 (define_insn_and_split "sse2_stored"
   [(set (match_operand:SI 0 "nonimmediate_operand" "=xm,r")
        (vec_select:SI
-         (match_operand:V4SI 1 "register_operand" "x,Yi")
+         (match_operand:V4SI 1 "register_operand" "x,Yj")
          (parallel [(const_int 0)])))]
   "TARGET_SSE"
   "#"
   "&& reload_completed
-   && (TARGET_INTER_UNIT_MOVES
+   && (TARGET_INTER_UNIT_MOVES_FROM_VEC
        || MEM_P (operands [0])
        || !GENERAL_REGNO_P (true_regnum (operands [0])))"
   [(set (match_dup 0) (match_dup 1))]
 (define_insn "*sse2_storeq_rex64"
   [(set (match_operand:DI 0 "nonimmediate_operand" "=xm,*r,r")
        (vec_select:DI
-         (match_operand:V2DI 1 "nonimmediate_operand" "x,Yi,o")
+         (match_operand:V2DI 1 "nonimmediate_operand" "x,Yj,o")
          (parallel [(const_int 0)])))]
   "TARGET_64BIT && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
   "@
          (parallel [(const_int 0)])))]
   "TARGET_SSE
    && reload_completed
-   && (TARGET_INTER_UNIT_MOVES
+   && (TARGET_INTER_UNIT_MOVES_FROM_VEC
        || MEM_P (operands [0])
        || !GENERAL_REGNO_P (true_regnum (operands [0])))"
   [(set (match_dup 0) (match_dup 1))]