From: Richard Henderson Date: Wed, 13 Apr 2005 04:59:33 +0000 (-0700) Subject: i386.c (ix86_prepare_sse_fp_compare_args): Split ... X-Git-Tag: misc/cutover-cvs2svn~4126 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=ab8efbd82444a0246d024e7b1709820ba289b2a8;p=thirdparty%2Fgcc.git i386.c (ix86_prepare_sse_fp_compare_args): Split ... * config/i386/i386.c (ix86_prepare_sse_fp_compare_args): Split ... (ix86_expand_sse_fp_minmax): ... from ... (ix86_expand_fp_movcc): ... here. (ix86_expand_sse_movcc): Rewrite from ix86_split_sse_movcc. * config/i386/i386-protos.h: Update. * config/i386/i386.md (UNSPEC_IEEE_MIN, UNSPEC_IEEE_MAX): New. (sse_setccsf, sse_setccdf): Allow before reload. (movsfcc_1_sse_min, movsfcc_1_sse_max, movsfcc_1_sse): Remove. (movdfcc_1_sse_min, movdfcc_1_sse_max, movdfcc_1_sse): Remove. (ieee_sminsf3, ieee_smaxsf3, ieee_smindf3, ieee_smaxdf3): New. * config/i386/sse.md (andsf3, nandsf3, iorsf3, xorsf3): New. (anddf3, nanddf3, iordf3, xordf3): New. From-SVN: r98068 --- diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 86e3f9703bc4..523237ac94fd 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,18 @@ +2005-04-12 Richard Henderson + + * config/i386/i386.c (ix86_prepare_sse_fp_compare_args): Split ... + (ix86_expand_sse_fp_minmax): ... from ... + (ix86_expand_fp_movcc): ... here. + (ix86_expand_sse_movcc): Rewrite from ix86_split_sse_movcc. + * config/i386/i386-protos.h: Update. + * config/i386/i386.md (UNSPEC_IEEE_MIN, UNSPEC_IEEE_MAX): New. + (sse_setccsf, sse_setccdf): Allow before reload. + (movsfcc_1_sse_min, movsfcc_1_sse_max, movsfcc_1_sse): Remove. + (movdfcc_1_sse_min, movdfcc_1_sse_max, movdfcc_1_sse): Remove. + (ieee_sminsf3, ieee_smaxsf3, ieee_smindf3, ieee_smaxdf3): New. + * config/i386/sse.md (andsf3, nandsf3, iorsf3, xorsf3): New. + (anddf3, nanddf3, iordf3, xordf3): New. + 2005-04-12 Jeff Law * Makefile.in (OBJS-common): Add tree-ssa-uncprop.o. diff --git a/gcc/config/i386/i386-protos.h b/gcc/config/i386/i386-protos.h index fa9c03af9261..41a70a8e3a29 100644 --- a/gcc/config/i386/i386-protos.h +++ b/gcc/config/i386/i386-protos.h @@ -150,7 +150,6 @@ extern void ix86_expand_branch (enum rtx_code, rtx); extern int ix86_expand_setcc (enum rtx_code, rtx); extern int ix86_expand_int_movcc (rtx[]); extern int ix86_expand_fp_movcc (rtx[]); -extern void ix86_split_sse_movcc (rtx[]); extern int ix86_expand_int_addcc (rtx[]); extern void ix86_expand_call (rtx, rtx, rtx, rtx, rtx, int); extern void x86_initialize_trampoline (rtx, rtx, rtx); diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index b567069d92d6..e82cefc76eca 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -10025,6 +10025,180 @@ ix86_expand_int_movcc (rtx operands[]) return 1; /* DONE */ } +/* Swap, force into registers, or otherwise massage the two operands + to an sse comparison with a mask result. Thus we differ a bit from + ix86_prepare_fp_compare_args which expects to produce a flags result. + + The DEST operand exists to help determine whether to commute commutative + operators. The POP0/POP1 operands are updated in place. The new + comparison code is returned, or UNKNOWN if not implementable. */ + +static enum rtx_code +ix86_prepare_sse_fp_compare_args (rtx dest, enum rtx_code code, + rtx *pop0, rtx *pop1) +{ + rtx tmp; + + switch (code) + { + case LTGT: + case UNEQ: + /* We have no LTGT as an operator. We could implement it with + NE & ORDERED, but this requires an extra temporary. It's + not clear that it's worth it. */ + return UNKNOWN; + + case LT: + case LE: + case UNGT: + case UNGE: + /* These are supported directly. */ + break; + + case EQ: + case NE: + case UNORDERED: + case ORDERED: + /* For commutative operators, try to canonicalize the destination + operand to be first in the comparison - this helps reload to + avoid extra moves. */ + if (!dest || !rtx_equal_p (dest, *pop1)) + break; + /* FALLTHRU */ + + case GE: + case GT: + case UNLE: + case UNLT: + /* These are not supported directly. Swap the comparison operands + to transform into something that is supported. */ + tmp = *pop0; + *pop0 = *pop1; + *pop1 = tmp; + code = swap_condition (code); + break; + + default: + gcc_unreachable (); + } + + return code; +} + +/* Detect conditional moves that exactly match min/max operational + semantics. Note that this is IEEE safe, as long as we don't + interchange the operands. + + Returns FALSE if this conditional move doesn't match a MIN/MAX, + and TRUE if the operation is successful and instructions are emitted. */ + +static bool +ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0, + rtx cmp_op1, rtx if_true, rtx if_false) +{ + enum machine_mode mode; + bool is_min; + rtx tmp; + + if (code == LT) + ; + else if (code == UNGE) + { + tmp = if_true; + if_true = if_false; + if_false = tmp; + } + else + return false; + + if (rtx_equal_p (cmp_op0, if_true) && rtx_equal_p (cmp_op1, if_false)) + is_min = true; + else if (rtx_equal_p (cmp_op1, if_true) && rtx_equal_p (cmp_op0, if_false)) + is_min = false; + else + return false; + + mode = GET_MODE (dest); + + /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here, + but MODE may be a vector mode and thus not appropriate. */ + if (!flag_finite_math_only || !flag_unsafe_math_optimizations) + { + int u = is_min ? UNSPEC_IEEE_MIN : UNSPEC_IEEE_MAX; + rtvec v; + + if_true = force_reg (mode, if_true); + v = gen_rtvec (2, if_true, if_false); + tmp = gen_rtx_UNSPEC (mode, v, u); + } + else + { + code = is_min ? SMIN : SMAX; + tmp = gen_rtx_fmt_ee (code, mode, if_true, if_false); + } + + emit_insn (gen_rtx_SET (VOIDmode, dest, tmp)); + return true; +} + +static void +ix86_expand_sse_movcc (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1, + rtx op_true, rtx op_false) +{ + enum machine_mode mode = GET_MODE (dest); + rtx t1, t2, t3, x; + + cmp_op0 = force_reg (mode, cmp_op0); + if (!nonimmediate_operand (cmp_op1, mode)) + cmp_op1 = force_reg (mode, cmp_op1); + + if (optimize + || reg_overlap_mentioned_p (dest, op_true) + || reg_overlap_mentioned_p (dest, op_false)) + t1 = gen_reg_rtx (mode); + else + t1 = dest; + + x = gen_rtx_fmt_ee (code, mode, cmp_op0, cmp_op1); + gcc_assert (sse_comparison_operator (x, VOIDmode)); + emit_insn (gen_rtx_SET (VOIDmode, t1, x)); + + if (op_false == CONST0_RTX (mode)) + { + op_true = force_reg (mode, op_true); + x = gen_rtx_AND (mode, t1, op_true); + emit_insn (gen_rtx_SET (VOIDmode, dest, x)); + } + else if (op_true == CONST0_RTX (mode)) + { + op_false = force_reg (mode, op_false); + x = gen_rtx_NOT (mode, t1); + x = gen_rtx_AND (mode, x, op_false); + emit_insn (gen_rtx_SET (VOIDmode, dest, x)); + } + else + { + op_true = force_reg (mode, op_true); + op_false = force_reg (mode, op_false); + + t2 = gen_reg_rtx (mode); + if (optimize) + t3 = gen_reg_rtx (mode); + else + t3 = dest; + + x = gen_rtx_AND (mode, op_true, t1); + emit_insn (gen_rtx_SET (VOIDmode, t2, x)); + + x = gen_rtx_NOT (mode, t1); + x = gen_rtx_AND (mode, x, op_false); + emit_insn (gen_rtx_SET (VOIDmode, t3, x)); + + x = gen_rtx_IOR (mode, t3, t2); + emit_insn (gen_rtx_SET (VOIDmode, dest, x)); + } +} + int ix86_expand_fp_movcc (rtx operands[]) { @@ -10034,88 +10208,30 @@ ix86_expand_fp_movcc (rtx operands[]) if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode)) { - rtx cmp_op0, cmp_op1, if_true, if_false; - rtx clob; - enum machine_mode vmode, cmode; - bool is_minmax = false; - - cmp_op0 = ix86_compare_op0; - cmp_op1 = ix86_compare_op1; - if_true = operands[2]; - if_false = operands[3]; + enum machine_mode cmode; /* Since we've no cmove for sse registers, don't force bad register allocation just to gain access to it. Deny movcc when the comparison mode doesn't match the move mode. */ - cmode = GET_MODE (cmp_op0); + cmode = GET_MODE (ix86_compare_op0); if (cmode == VOIDmode) - cmode = GET_MODE (cmp_op1); + cmode = GET_MODE (ix86_compare_op1); if (cmode != mode) return 0; - /* We have no LTGT as an operator. We could implement it with - NE & ORDERED, but this requires an extra temporary. It's - not clear that it's worth it. */ - if (code == LTGT || code == UNEQ) + code = ix86_prepare_sse_fp_compare_args (operands[0], code, + &ix86_compare_op0, + &ix86_compare_op1); + if (code == UNKNOWN) return 0; - /* Massage condition to satisfy sse_comparison_operator. Try - to canonicalize the destination operand to be first in the - comparison - this helps reload to avoid extra moves. */ - if (!sse_comparison_operator (operands[1], VOIDmode) - || (COMMUTATIVE_P (operands[1]) - && rtx_equal_p (operands[0], cmp_op1))) - { - tmp = cmp_op0; - cmp_op0 = cmp_op1; - cmp_op1 = tmp; - code = swap_condition (code); - } - - /* Detect conditional moves that exactly match min/max operational - semantics. Note that this is IEEE safe, as long as we don't - interchange the operands. Which is why we keep this in the form - if an IF_THEN_ELSE instead of reducing to SMIN/SMAX. */ - if ((code == LT || code == UNGE) && REG_P (cmp_op0) && REG_P (cmp_op1)) - { - if (((cmp_op0 == if_true && cmp_op1 == if_false) - || (cmp_op0 == if_false && cmp_op1 == if_true))) - { - is_minmax = true; - if (code == UNGE) - { - code = LT; - tmp = if_true; - if_true = if_false; - if_false = tmp; - } - } - } - - if (mode == SFmode) - vmode = V4SFmode; - else if (mode == DFmode) - vmode = V2DFmode; - else - gcc_unreachable (); - - cmp_op0 = force_reg (mode, cmp_op0); - if (!nonimmediate_operand (cmp_op1, mode)) - cmp_op1 = force_reg (mode, cmp_op1); - - tmp = gen_rtx_fmt_ee (code, mode, cmp_op0, cmp_op1); - gcc_assert (sse_comparison_operator (tmp, VOIDmode)); - - tmp = gen_rtx_IF_THEN_ELSE (mode, tmp, if_true, if_false); - tmp = gen_rtx_SET (VOIDmode, operands[0], tmp); - - if (!is_minmax) - { - clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (vmode)); - tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob)); - } + if (ix86_expand_sse_fp_minmax (operands[0], code, ix86_compare_op0, + ix86_compare_op1, operands[2], + operands[3])) + return 1; - emit_insn (tmp); + ix86_expand_sse_movcc (operands[0], code, ix86_compare_op0, + ix86_compare_op1, operands[2], operands[3]); return 1; } @@ -10166,100 +10282,6 @@ ix86_expand_fp_movcc (rtx operands[]) return 1; } -void -ix86_split_sse_movcc (rtx operands[]) -{ - rtx dest, scratch, cmp, op_true, op_false, x; - enum machine_mode mode, vmode; - - /* Note that the operator CMP has been set up with matching constraints - such that dest is valid for the comparison. Unless one of the true - or false operands are zero, the true operand has already been placed - in SCRATCH. */ - dest = operands[0]; - scratch = operands[1]; - op_true = operands[2]; - op_false = operands[3]; - cmp = operands[4]; - - mode = GET_MODE (dest); - vmode = GET_MODE (scratch); - - /* We need to make sure that the TRUE and FALSE operands are out of the - way of the destination. Marking the destination earlyclobber doesn't - work, since we want matching constraints for the actual comparison, so - at some point we always wind up having to do a copy ourselves here. - We very much prefer the TRUE value to be in SCRATCH. If it turns out - that FALSE overlaps DEST, then we invert the comparison so that we - still only have to do one move. */ - if (rtx_equal_p (op_false, dest)) - { - enum rtx_code code; - - if (rtx_equal_p (op_true, dest)) - { - /* ??? Really ought not happen. It means some optimizer managed - to prove the operands were identical, but failed to fold the - conditional move to a straight move. Do so here, because - otherwise we'll generate incorrect code. And since they're - both already in the destination register, nothing to do. */ - return; - } - - x = gen_rtx_REG (mode, REGNO (scratch)); - emit_move_insn (x, op_false); - op_false = op_true; - op_true = x; - - code = GET_CODE (cmp); - code = reverse_condition_maybe_unordered (code); - cmp = gen_rtx_fmt_ee (code, mode, XEXP (cmp, 0), XEXP (cmp, 1)); - } - else if (op_true == CONST0_RTX (mode)) - ; - else if (op_false == CONST0_RTX (mode) && !rtx_equal_p (op_true, dest)) - ; - else - { - x = gen_rtx_REG (mode, REGNO (scratch)); - emit_move_insn (x, op_true); - op_true = x; - } - - emit_insn (gen_rtx_SET (VOIDmode, dest, cmp)); - dest = simplify_gen_subreg (vmode, dest, mode, 0); - - if (op_false == CONST0_RTX (mode)) - { - op_true = simplify_gen_subreg (vmode, op_true, mode, 0); - x = gen_rtx_AND (vmode, dest, op_true); - emit_insn (gen_rtx_SET (VOIDmode, dest, x)); - } - else - { - op_false = simplify_gen_subreg (vmode, op_false, mode, 0); - - if (op_true == CONST0_RTX (mode)) - { - x = gen_rtx_NOT (vmode, dest); - x = gen_rtx_AND (vmode, x, op_false); - emit_insn (gen_rtx_SET (VOIDmode, dest, x)); - } - else - { - x = gen_rtx_AND (vmode, scratch, dest); - emit_insn (gen_rtx_SET (VOIDmode, scratch, x)); - - x = gen_rtx_NOT (vmode, dest); - x = gen_rtx_AND (vmode, x, op_false); - emit_insn (gen_rtx_SET (VOIDmode, dest, x)); - - x = gen_rtx_IOR (vmode, dest, scratch); - emit_insn (gen_rtx_SET (VOIDmode, dest, x)); - } - } -} - /* Expand conditional increment or decrement using adb/sbb instructions. The default case using setcc followed by the conditional move can be done by generic code. */ diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index 62812b2ea3ba..098e32665739 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -104,6 +104,8 @@ ; Generic math support (UNSPEC_COPYSIGN 50) + (UNSPEC_IEEE_MIN 51) ; not commutative + (UNSPEC_IEEE_MAX 52) ; not commutative ; x87 Floating point (UNSPEC_SIN 60) @@ -12462,17 +12464,14 @@ ;; The SSE store flag instructions saves 0 or 0xffffffff to the result. ;; subsequent logical operations are used to imitate conditional moves. ;; 0xffffffff is NaN, but not in normalized form, so we can't represent -;; it directly. Further holding this value in pseudo register might bring -;; problem in implicit normalization in spill code. -;; So we don't define FLOAT_STORE_FLAG_VALUE and create these -;; instructions after reload by splitting the conditional move patterns. +;; it directly. (define_insn "*sse_setccsf" [(set (match_operand:SF 0 "register_operand" "=x") (match_operator:SF 1 "sse_comparison_operator" [(match_operand:SF 2 "register_operand" "0") (match_operand:SF 3 "nonimmediate_operand" "xm")]))] - "TARGET_SSE && reload_completed" + "TARGET_SSE" "cmp%D1ss\t{%3, %0|%0, %3}" [(set_attr "type" "ssecmp") (set_attr "mode" "SF")]) @@ -12482,7 +12481,7 @@ (match_operator:DF 1 "sse_comparison_operator" [(match_operand:DF 2 "register_operand" "0") (match_operand:DF 3 "nonimmediate_operand" "Ym")]))] - "TARGET_SSE2 && reload_completed" + "TARGET_SSE2" "cmp%D1sd\t{%3, %0|%0, %3}" [(set_attr "type" "ssecmp") (set_attr "mode" "DF")]) @@ -17707,51 +17706,6 @@ "(TARGET_80387 && TARGET_CMOVE) || TARGET_SSE_MATH" "if (! ix86_expand_fp_movcc (operands)) FAIL; DONE;") -;; These versions of min/max are aware of the instruction's behavior -;; wrt -0.0 and NaN inputs. If we don't care about either, then we -;; should have used the smin/smax expanders in the first place. -(define_insn "*movsfcc_1_sse_min" - [(set (match_operand:SF 0 "register_operand" "=x") - (if_then_else:SF - (lt:SF (match_operand:SF 1 "register_operand" "0") - (match_operand:SF 2 "nonimmediate_operand" "xm")) - (match_dup 1) - (match_dup 2)))] - "TARGET_SSE_MATH" - "minss\t{%2, %0|%0, %2}" - [(set_attr "type" "sseadd") - (set_attr "mode" "SF")]) - -(define_insn "*movsfcc_1_sse_max" - [(set (match_operand:SF 0 "register_operand" "=x") - (if_then_else:SF - (lt:SF (match_operand:SF 2 "nonimmediate_operand" "xm") - (match_operand:SF 1 "nonimmediate_operand" "0")) - (match_dup 1) - (match_dup 2)))] - "TARGET_SSE_MATH" - "maxss\t{%2, %0|%0, %2}" - [(set_attr "type" "sseadd") - (set_attr "mode" "SF")]) - -(define_insn_and_split "*movsfcc_1_sse" - [(set (match_operand:SF 0 "register_operand" "=x,x,x") - (if_then_else:SF - (match_operator:SF 4 "sse_comparison_operator" - [(match_operand:SF 5 "register_operand" "0,0,0") - (match_operand:SF 6 "nonimmediate_operand" "xm,xm,xm")]) - (match_operand:SF 2 "reg_or_0_operand" "C,x,x") - (match_operand:SF 3 "reg_or_0_operand" "x,C,x"))) - (clobber (match_scratch:V4SF 1 "=&x,&x,&x"))] - "TARGET_SSE_MATH" - "#" - "&& reload_completed" - [(const_int 0)] -{ - ix86_split_sse_movcc (operands); - DONE; -}) - (define_insn "*movsfcc_1_387" [(set (match_operand:SF 0 "register_operand" "=f#r,f#r,r#f,r#f") (if_then_else:SF (match_operator 1 "fcmov_comparison_operator" @@ -17776,51 +17730,6 @@ "(TARGET_80387 && TARGET_CMOVE) || (TARGET_SSE2 && TARGET_SSE_MATH)" "if (! ix86_expand_fp_movcc (operands)) FAIL; DONE;") -;; These versions of min/max are aware of the instruction's behavior -;; wrt -0.0 and NaN inputs. If we don't care about either, then we -;; should have used the smin/smax expanders in the first place. -(define_insn "*movdfcc_1_sse_min" - [(set (match_operand:DF 0 "register_operand" "=x") - (if_then_else:DF - (lt:DF (match_operand:DF 1 "register_operand" "0") - (match_operand:DF 2 "nonimmediate_operand" "xm")) - (match_dup 1) - (match_dup 2)))] - "TARGET_SSE2 && TARGET_SSE_MATH" - "minsd\t{%2, %0|%0, %2}" - [(set_attr "type" "sseadd") - (set_attr "mode" "DF")]) - -(define_insn "*movdfcc_1_sse_max" - [(set (match_operand:DF 0 "register_operand" "=x") - (if_then_else:DF - (lt:DF (match_operand:DF 2 "nonimmediate_operand" "xm") - (match_operand:DF 1 "nonimmediate_operand" "0")) - (match_dup 1) - (match_dup 2)))] - "TARGET_SSE2 && TARGET_SSE_MATH" - "maxsd\t{%2, %0|%0, %2}" - [(set_attr "type" "sseadd") - (set_attr "mode" "DF")]) - -(define_insn_and_split "*movdfcc_1_sse" - [(set (match_operand:DF 0 "register_operand" "=x,x,x") - (if_then_else:DF - (match_operator:DF 4 "sse_comparison_operator" - [(match_operand:DF 5 "register_operand" "0,0,0") - (match_operand:DF 6 "nonimmediate_operand" "xm,xm,xm")]) - (match_operand:DF 2 "reg_or_0_operand" "C,x,x") - (match_operand:DF 3 "reg_or_0_operand" "x,C,x"))) - (clobber (match_scratch:V2DF 1 "=&x,&x,&x"))] - "TARGET_SSE2 && TARGET_SSE_MATH" - "#" - "&& reload_completed" - [(const_int 0)] -{ - ix86_split_sse_movcc (operands); - DONE; -}) - (define_insn "*movdfcc_1" [(set (match_operand:DF 0 "register_operand" "=f#r,f#r,&r#f,&r#f") (if_then_else:DF (match_operator 1 "fcmov_comparison_operator" @@ -17935,6 +17844,52 @@ [(set_attr "type" "sseadd") (set_attr "mode" "DF")]) +;; These versions of the min/max patterns implement exactly the operations +;; min = (op1 < op2 ? op1 : op2) +;; max = (!(op1 < op2) ? op1 : op2) +;; Their operands are not commutative, and thus they may be used in the +;; presence of -0.0 and NaN. + +(define_insn "*ieee_sminsf3" + [(set (match_operand:SF 0 "register_operand" "=x") + (unspec:SF [(match_operand:SF 1 "register_operand" "0") + (match_operand:SF 2 "nonimmediate_operand" "xm")] + UNSPEC_IEEE_MIN))] + "TARGET_SSE_MATH" + "minss\t{%2, %0|%0, %2}" + [(set_attr "type" "sseadd") + (set_attr "mode" "SF")]) + +(define_insn "*ieee_smaxsf3" + [(set (match_operand:SF 0 "register_operand" "=x") + (unspec:SF [(match_operand:SF 1 "register_operand" "0") + (match_operand:SF 2 "nonimmediate_operand" "xm")] + UNSPEC_IEEE_MAX))] + "TARGET_SSE_MATH" + "maxss\t{%2, %0|%0, %2}" + [(set_attr "type" "sseadd") + (set_attr "mode" "SF")]) + +(define_insn "*ieee_smindf3" + [(set (match_operand:DF 0 "register_operand" "=x") + (unspec:DF [(match_operand:DF 1 "register_operand" "0") + (match_operand:DF 2 "nonimmediate_operand" "xm")] + UNSPEC_IEEE_MIN))] + "TARGET_SSE2 && TARGET_SSE_MATH" + "minsd\t{%2, %0|%0, %2}" + [(set_attr "type" "sseadd") + (set_attr "mode" "DF")]) + +(define_insn "*ieee_smaxdf3" + [(set (match_operand:DF 0 "register_operand" "=x") + (unspec:DF [(match_operand:DF 1 "register_operand" "0") + (match_operand:DF 2 "nonimmediate_operand" "xm")] + UNSPEC_IEEE_MAX))] + "TARGET_SSE2 && TARGET_SSE_MATH" + "maxsd\t{%2, %0|%0, %2}" + [(set_attr "type" "sseadd") + (set_attr "mode" "DF")]) + ;; Conditional addition patterns (define_expand "addqicc" [(match_operand:QI 0 "register_operand" "") diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index e147b71028b7..dc8f9e4dec00 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -773,6 +773,47 @@ [(set_attr "type" "sselog") (set_attr "mode" "V4SF")]) +;; Also define scalar versions. These are used for abs, neg, and +;; conditional move. Using subregs into vector modes causes regiser +;; allocation lossage. These patterns do not allow memory operands +;; because the native instructions read the full 128-bits. + +(define_insn "*andsf3" + [(set (match_operand:SF 0 "register_operand" "=x") + (and:SF (match_operand:SF 1 "register_operand" "0") + (match_operand:SF 2 "register_operand" "x")))] + "TARGET_SSE" + "andps\t{%2, %0|%0, %2}" + [(set_attr "type" "sselog") + (set_attr "mode" "V4SF")]) + +(define_insn "*nandsf3" + [(set (match_operand:SF 0 "register_operand" "=x") + (and:SF (not:SF (match_operand:SF 1 "register_operand" "0")) + (match_operand:SF 2 "register_operand" "x")))] + "TARGET_SSE" + "andnps\t{%2, %0|%0, %2}" + [(set_attr "type" "sselog") + (set_attr "mode" "V4SF")]) + +(define_insn "*iorsf3" + [(set (match_operand:SF 0 "register_operand" "=x") + (ior:SF (match_operand:SF 1 "register_operand" "0") + (match_operand:SF 2 "register_operand" "x")))] + "TARGET_SSE" + "orps\t{%2, %0|%0, %2}" + [(set_attr "type" "sselog") + (set_attr "mode" "V4SF")]) + +(define_insn "*xorsf3" + [(set (match_operand:SF 0 "register_operand" "=x") + (xor:SF (match_operand:SF 1 "register_operand" "0") + (match_operand:SF 2 "register_operand" "x")))] + "TARGET_SSE" + "xorps\t{%2, %0|%0, %2}" + [(set_attr "type" "sselog") + (set_attr "mode" "V4SF")]) + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; ;; Parallel single-precision floating point conversion operations @@ -1624,7 +1665,7 @@ [(set (match_operand:V2DF 0 "register_operand" "=x") (and:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0") (match_operand:V2DF 2 "nonimmediate_operand" "xm")))] - "TARGET_SSE2 && ix86_binary_operator_ok (AND, V4SFmode, operands)" + "TARGET_SSE2 && ix86_binary_operator_ok (AND, V2DFmode, operands)" "andpd\t{%2, %0|%0, %2}" [(set_attr "type" "sselog") (set_attr "mode" "V2DF")]) @@ -1670,6 +1711,47 @@ [(set_attr "type" "sselog") (set_attr "mode" "V2DF")]) +;; Also define scalar versions. These are used for abs, neg, and +;; conditional move. Using subregs into vector modes causes regiser +;; allocation lossage. These patterns do not allow memory operands +;; because the native instructions read the full 128-bits. + +(define_insn "*anddf3" + [(set (match_operand:DF 0 "register_operand" "=x") + (and:DF (match_operand:DF 1 "register_operand" "0") + (match_operand:DF 2 "register_operand" "x")))] + "TARGET_SSE2" + "andpd\t{%2, %0|%0, %2}" + [(set_attr "type" "sselog") + (set_attr "mode" "V2DF")]) + +(define_insn "*nanddf3" + [(set (match_operand:DF 0 "register_operand" "=x") + (and:DF (not:DF (match_operand:DF 1 "register_operand" "0")) + (match_operand:DF 2 "register_operand" "x")))] + "TARGET_SSE2" + "andnpd\t{%2, %0|%0, %2}" + [(set_attr "type" "sselog") + (set_attr "mode" "V2DF")]) + +(define_insn "*iordf3" + [(set (match_operand:DF 0 "register_operand" "=x") + (ior:DF (match_operand:DF 1 "register_operand" "0") + (match_operand:DF 2 "register_operand" "x")))] + "TARGET_SSE2" + "orpd\t{%2, %0|%0, %2}" + [(set_attr "type" "sselog") + (set_attr "mode" "V2DF")]) + +(define_insn "*xordf3" + [(set (match_operand:DF 0 "register_operand" "=x") + (xor:DF (match_operand:DF 1 "register_operand" "0") + (match_operand:DF 2 "register_operand" "x")))] + "TARGET_SSE2" + "xorpd\t{%2, %0|%0, %2}" + [(set_attr "type" "sselog") + (set_attr "mode" "V2DF")]) + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; ;; Parallel double-precision floating point conversion operations