2011-11-04 Jakub Jelinek <jakub@redhat.com>
+ * config/i386/i386.c (ix86_expand_adjust_ufix_to_sfix_si): Add
+ XORP argument. Subtract 0x1p31 instead of 0x1p32. Use normal
+ signalling comparison instead of non-signalling. Store into
+ *XORP pseudo holding 0x80000000 integers if 0x1p31 has been
+ subtracted and 0 otherwise.
+ * config/i386/i386-protos.h (ix86_expand_adjust_ufix_to_sfix_si):
+ Adjust prototype.
+ * config/i386/sse.md (fixuns_trunc<mode><sseintvecmodelower>2): Enable
+ already for TARGET_SSE2. Xor in vector initialized by
+ ix86_expand_adjust_ufix_to_sfix_si at the end.
+ (vec_pack_ufix_trunc_<mode>): Likewise.
+
* tree-vect-stmts.c (vectorizable_conversion): Rewritten to handle
not just FLOAT_EXPR and FIX_TRUNC_EXPR, but also CONVERT_EXPR_CODE_P,
WIDEN_MULT_EXPR and WIDEN_LSHIFT_EXPR to handle what
/* Adjust a V*SFmode/V*DFmode value VAL so that *sfix_trunc* resp. fix_trunc*
pattern can be used on it instead of *ufix_trunc* resp. fixuns_trunc*.
- This is done by subtracting 0x1p32 from VAL if VAL is greater or equal
- (non-signalling) than 0x1p31. */
+ This is done by doing just signed conversion if < 0x1p31, and otherwise by
+ subtracting 0x1p31 first and xoring in 0x80000000 from *XORP afterwards. */
rtx
-ix86_expand_adjust_ufix_to_sfix_si (rtx val)
+ix86_expand_adjust_ufix_to_sfix_si (rtx val, rtx *xorp)
{
- REAL_VALUE_TYPE MTWO32r, TWO31r;
- rtx two31r, mtwo32r, tmp[3];
+ REAL_VALUE_TYPE TWO31r;
+ rtx two31r, tmp[4];
enum machine_mode mode = GET_MODE (val);
enum machine_mode scalarmode = GET_MODE_INNER (mode);
+ enum machine_mode intmode = GET_MODE_SIZE (mode) == 32 ? V8SImode : V4SImode;
rtx (*cmp) (rtx, rtx, rtx, rtx);
int i;
two31r = const_double_from_real_value (TWO31r, scalarmode);
two31r = ix86_build_const_vector (mode, 1, two31r);
two31r = force_reg (mode, two31r);
- real_ldexp (&MTWO32r, &dconstm1, 32);
- mtwo32r = const_double_from_real_value (MTWO32r, scalarmode);
- mtwo32r = ix86_build_const_vector (mode, 1, mtwo32r);
- mtwo32r = force_reg (mode, mtwo32r);
switch (mode)
{
- case V8SFmode: cmp = gen_avx_cmpv8sf3; break;
- case V4SFmode: cmp = gen_avx_cmpv4sf3; break;
- case V4DFmode: cmp = gen_avx_cmpv4df3; break;
- case V2DFmode: cmp = gen_avx_cmpv2df3; break;
+ case V8SFmode: cmp = gen_avx_maskcmpv8sf3; break;
+ case V4SFmode: cmp = gen_sse_maskcmpv4sf3; break;
+ case V4DFmode: cmp = gen_avx_maskcmpv4df3; break;
+ case V2DFmode: cmp = gen_sse2_maskcmpv2df3; break;
default: gcc_unreachable ();
}
- emit_insn (cmp (tmp[0], val, two31r, GEN_INT (29)));
- tmp[1] = expand_simple_binop (mode, AND, tmp[0], mtwo32r, tmp[1],
+ tmp[3] = gen_rtx_LE (mode, two31r, val);
+ emit_insn (cmp (tmp[0], two31r, val, tmp[3]));
+ tmp[1] = expand_simple_binop (mode, AND, tmp[0], two31r, tmp[1],
0, OPTAB_DIRECT);
- return expand_simple_binop (mode, PLUS, val, tmp[1], tmp[2],
+ if (intmode == V4SImode || TARGET_AVX2)
+ *xorp = expand_simple_binop (intmode, ASHIFT,
+ gen_lowpart (intmode, tmp[0]),
+ GEN_INT (31), NULL_RTX, 0,
+ OPTAB_DIRECT);
+ else
+ {
+ rtx two31 = GEN_INT ((unsigned HOST_WIDE_INT) 1 << 31);
+ two31 = ix86_build_const_vector (intmode, 1, two31);
+ *xorp = expand_simple_binop (intmode, AND,
+ gen_lowpart (intmode, tmp[0]),
+ two31, NULL_RTX, 0,
+ OPTAB_DIRECT);
+ }
+ return expand_simple_binop (mode, MINUS, val, tmp[1], tmp[2],
0, OPTAB_DIRECT);
}
(define_expand "fixuns_trunc<mode><sseintvecmodelower>2"
[(match_operand:<sseintvecmode> 0 "register_operand" "")
(match_operand:VF1 1 "register_operand" "")]
- "TARGET_AVX"
+ "TARGET_SSE2"
{
- rtx tmp = ix86_expand_adjust_ufix_to_sfix_si (operands[1]);
- emit_insn (gen_fix_trunc<mode><sseintvecmodelower>2 (operands[0], tmp));
+ rtx tmp[3];
+ tmp[0] = ix86_expand_adjust_ufix_to_sfix_si (operands[1], &tmp[2]);
+ tmp[1] = gen_reg_rtx (<sseintvecmode>mode);
+ emit_insn (gen_fix_trunc<mode><sseintvecmodelower>2 (tmp[1], tmp[0]));
+ emit_insn (gen_xor<sseintvecmodelower>3 (operands[0], tmp[1], tmp[2]));
DONE;
})
[(match_operand:<ssepackfltmode> 0 "register_operand" "")
(match_operand:VF2 1 "register_operand" "")
(match_operand:VF2 2 "register_operand" "")]
- "TARGET_AVX"
+ "TARGET_SSE2"
{
- rtx tmp[2];
- tmp[0] = ix86_expand_adjust_ufix_to_sfix_si (operands[1]);
- tmp[1] = ix86_expand_adjust_ufix_to_sfix_si (operands[2]);
- emit_insn (gen_vec_pack_sfix_trunc_<mode> (operands[0], tmp[0], tmp[1]));
+ rtx tmp[7];
+ tmp[0] = ix86_expand_adjust_ufix_to_sfix_si (operands[1], &tmp[2]);
+ tmp[1] = ix86_expand_adjust_ufix_to_sfix_si (operands[2], &tmp[3]);
+ tmp[4] = gen_reg_rtx (<ssepackfltmode>mode);
+ emit_insn (gen_vec_pack_sfix_trunc_<mode> (tmp[4], tmp[0], tmp[1]));
+ if (<ssepackfltmode>mode == V4SImode || TARGET_AVX2)
+ {
+ tmp[5] = gen_reg_rtx (<ssepackfltmode>mode);
+ ix86_expand_vec_extract_even_odd (tmp[5], tmp[2], tmp[3], 0);
+ }
+ else
+ {
+ tmp[5] = gen_reg_rtx (V8SFmode);
+ ix86_expand_vec_extract_even_odd (tmp[5], gen_lowpart (V8SFmode, tmp[2]),
+ gen_lowpart (V8SFmode, tmp[3]), 0);
+ tmp[5] = gen_lowpart (V8SImode, tmp[5]);
+ }
+ tmp[6] = expand_simple_binop (<ssepackfltmode>mode, XOR, tmp[4], tmp[5],
+ operands[0], 0, OPTAB_DIRECT);
+ if (tmp[6] != operands[0])
+ emit_move_insn (operands[0], tmp[6]);
DONE;
})