;; Without these patterns, we'll try the unsigned SI conversion which
;; is complex for SSE, rather than the signed SI conversion, which isn't.
-(define_expand "fixuns_truncsfhi2"
+(define_expand "fixuns_trunc<mode>hi2"
[(set (match_dup 2)
- (fix:SI (match_operand:SF 1 "nonimmediate_operand" "")))
+ (fix:SI (match_operand:SSEMODEF 1 "nonimmediate_operand" "")))
(set (match_operand:HI 0 "nonimmediate_operand" "")
(subreg:HI (match_dup 2) 0))]
- "TARGET_SSE_MATH"
- "operands[2] = gen_reg_rtx (SImode);")
-
-(define_expand "fixuns_truncdfhi2"
- [(set (match_dup 2)
- (fix:SI (match_operand:DF 1 "nonimmediate_operand" "")))
- (set (match_operand:HI 0 "nonimmediate_operand" "")
- (subreg:HI (match_dup 2) 0))]
- "TARGET_SSE_MATH && TARGET_SSE2"
+ "SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH"
"operands[2] = gen_reg_rtx (SImode);")
;; When SSE is available, it is always faster to use it!
-(define_insn "fix_truncsfdi_sse"
+(define_insn "fix_trunc<mode>di_sse"
[(set (match_operand:DI 0 "register_operand" "=r,r")
- (fix:DI (match_operand:SF 1 "nonimmediate_operand" "x,xm")))]
- "TARGET_64BIT && TARGET_SSE && (!TARGET_FISTTP || TARGET_SSE_MATH)"
- "cvttss2si{q}\t{%1, %0|%0, %1}"
- [(set_attr "type" "sseicvt")
- (set_attr "mode" "SF")
- (set_attr "athlon_decode" "double,vector")
- (set_attr "amdfam10_decode" "double,double")])
-
-(define_insn "fix_truncdfdi_sse"
- [(set (match_operand:DI 0 "register_operand" "=r,r")
- (fix:DI (match_operand:DF 1 "nonimmediate_operand" "x,xm")))]
- "TARGET_64BIT && TARGET_SSE2 && (!TARGET_FISTTP || TARGET_SSE_MATH)"
- "cvttsd2si{q}\t{%1, %0|%0, %1}"
- [(set_attr "type" "sseicvt")
- (set_attr "mode" "DF")
- (set_attr "athlon_decode" "double,vector")
- (set_attr "amdfam10_decode" "double,double")])
-
-(define_insn "fix_truncsfsi_sse"
- [(set (match_operand:SI 0 "register_operand" "=r,r")
- (fix:SI (match_operand:SF 1 "nonimmediate_operand" "x,xm")))]
- "TARGET_SSE && (!TARGET_FISTTP || TARGET_SSE_MATH)"
- "cvttss2si\t{%1, %0|%0, %1}"
+ (fix:DI (match_operand:SSEMODEF 1 "nonimmediate_operand" "x,xm")))]
+ "TARGET_64BIT && SSE_FLOAT_MODE_P (<MODE>mode)
+ && (!TARGET_FISTTP || TARGET_SSE_MATH)"
+ "cvtts<ssemodefsuffix>2si{q}\t{%1, %0|%0, %1}"
[(set_attr "type" "sseicvt")
- (set_attr "mode" "DF")
+ (set_attr "mode" "<MODE>")
(set_attr "athlon_decode" "double,vector")
(set_attr "amdfam10_decode" "double,double")])
-(define_insn "fix_truncdfsi_sse"
+(define_insn "fix_trunc<mode>si_sse"
[(set (match_operand:SI 0 "register_operand" "=r,r")
- (fix:SI (match_operand:DF 1 "nonimmediate_operand" "x,xm")))]
- "TARGET_SSE2 && (!TARGET_FISTTP || TARGET_SSE_MATH)"
- "cvttsd2si\t{%1, %0|%0, %1}"
+ (fix:SI (match_operand:SSEMODEF 1 "nonimmediate_operand" "x,xm")))]
+ "SSE_FLOAT_MODE_P (<MODE>mode)
+ && (!TARGET_FISTTP || TARGET_SSE_MATH)"
+ "cvtts<ssemodefsuffix>2si\t{%1, %0|%0, %1}"
[(set_attr "type" "sseicvt")
- (set_attr "mode" "DF")
+ (set_attr "mode" "<MODE>")
(set_attr "athlon_decode" "double,vector")
(set_attr "amdfam10_decode" "double,double")])
;; Shorten x87->SSE reload sequences of fix_trunc?f?i_sse patterns.
(define_peephole2
- [(set (match_operand:DF 0 "register_operand" "")
- (match_operand:DF 1 "memory_operand" ""))
- (set (match_operand:SSEMODEI24 2 "register_operand" "")
- (fix:SSEMODEI24 (match_dup 0)))]
- "!TARGET_K8
- && peep2_reg_dead_p (2, operands[0])"
- [(set (match_dup 2) (fix:SSEMODEI24 (match_dup 1)))]
- "")
-
-(define_peephole2
- [(set (match_operand:SF 0 "register_operand" "")
- (match_operand:SF 1 "memory_operand" ""))
+ [(set (match_operand:SSEMODEF 0 "register_operand" "")
+ (match_operand:SSEMODEF 1 "memory_operand" ""))
(set (match_operand:SSEMODEI24 2 "register_operand" "")
(fix:SSEMODEI24 (match_dup 0)))]
"!TARGET_K8
;; Avoid vector decoded forms of the instruction.
(define_peephole2
- [(match_scratch:DF 2 "Y")
+ [(match_scratch:DF 2 "Y2")
(set (match_operand:SSEMODEI24 0 "register_operand" "")
(fix:SSEMODEI24 (match_operand:DF 1 "memory_operand" "")))]
"(TARGET_K8 || TARGET_GENERIC64) && !optimize_size"