+2012-05-09 Uros Bizjak <ubizjak@gmail.com>
+
+ PR target/44141
+ * config/i386/i386.c (ix86_expand_vector_move_misalign): Do not handle
+ 128 bit vectors specially for TARGET_AVX. Emit sse2_movupd and
+ sse_movupd RTXes for TARGET_AVX, TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
+ or when optimizing for size.
+ * config/i386/sse.md (*mov<mode>_internal): Remove
+ TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL handling from asm output code.
+ Calculate "mode" attribute according to optimize_function_for_size_p
+ and TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL flag.
+ (*<sse>_movu<ssemodesuffix><avxsizesuffix>): Choose asm template
+ depending on the mode of the instruction. Calculate "mode" attribute
+ according to optimize_function_for_size_p, TARGET_SSE_TYPELESS_STORES
+ and TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL flags.
+ (*<sse2>_movdqu<avxsizesuffix>): Ditto.
+
2012-05-09 Georg-Johann Lay <avr@gjlay.de>
PR target/53256
PR target/51244
* config/sh/sh.md (*branch_true, *branch_false): New insns.
-2012-05-08 Teresa Johnson <tejohnson@google.com>
+2012-05-08 Teresa Johnson <tejohnson@google.com>
* gcov-io.h (__gcov_reset, __gcov_dump): Declare.
* doc/gcov.texi: Add note on using __gcov_reset and __gcov_dump.
(clone_function_name): Likewise.
(cgraph_create_virtual_clone): Likewise.
(cgraph_remove_node_and_inline_clones): Likewise.
- (cgraph_redirect_edge_call_stmt_to_callee): Move here from
- cgraphunit.c
+ (cgraph_redirect_edge_call_stmt_to_callee): Move here from cgraphunit.c
* cgraph.h: Reorder declarations so they match file of origin.
(cgraph_create_empty_node): Declare.
* cgraphunit.c (update_call_expr): Move to cgraphclones.c
Enable -Wunused-local-typedefs when -Wall or -Wunused is on
* opts.c (finish_options): Activate -Wunused-local-typedefs if
- -Wunused is activated.
+ -Wunused is activated.
* doc/invoke.texi: Update blurb of -Wunused-local-typedefs.
2012-05-04 Andreas Krebbel <Andreas.Krebbel@de.ibm.com>
* config/pa/pa.c (pa_legitimate_constant_p): Don't put function labels
in constant pool.
-2012-04-27 Ollie Wild <aaw@google.com>
+2012-04-27 Ollie Wild <aaw@google.com>
* doc/invoke.texi (Wliteral-suffix): Document new option.
op0 = operands[0];
op1 = operands[1];
- if (TARGET_AVX)
+ if (TARGET_AVX
+ && GET_MODE_SIZE (mode) == 32)
{
switch (GET_MODE_CLASS (mode))
{
case MODE_VECTOR_INT:
case MODE_INT:
- switch (GET_MODE_SIZE (mode))
- {
- case 16:
- if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
- {
- op0 = gen_lowpart (V4SFmode, op0);
- op1 = gen_lowpart (V4SFmode, op1);
- emit_insn (gen_sse_movups (op0, op1));
- }
- else
- {
- op0 = gen_lowpart (V16QImode, op0);
- op1 = gen_lowpart (V16QImode, op1);
- emit_insn (gen_sse2_movdqu (op0, op1));
- }
- break;
- case 32:
- op0 = gen_lowpart (V32QImode, op0);
- op1 = gen_lowpart (V32QImode, op1);
- ix86_avx256_split_vector_move_misalign (op0, op1);
- break;
- default:
- gcc_unreachable ();
- }
- break;
+ op0 = gen_lowpart (V32QImode, op0);
+ op1 = gen_lowpart (V32QImode, op1);
+ /* FALLTHRU */
+
case MODE_VECTOR_FLOAT:
- switch (mode)
- {
- case V4SFmode:
- emit_insn (gen_sse_movups (op0, op1));
- break;
- case V2DFmode:
- if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
- {
- op0 = gen_lowpart (V4SFmode, op0);
- op1 = gen_lowpart (V4SFmode, op1);
- emit_insn (gen_sse_movups (op0, op1));
- }
- else
- emit_insn (gen_sse2_movupd (op0, op1));
- break;
- case V8SFmode:
- case V4DFmode:
- ix86_avx256_split_vector_move_misalign (op0, op1);
- break;
- default:
- gcc_unreachable ();
- }
+ ix86_avx256_split_vector_move_misalign (op0, op1);
break;
default:
if (MEM_P (op1))
{
- /* If we're optimizing for size, movups is the smallest. */
- if (optimize_insn_for_size_p ()
- || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
- {
- op0 = gen_lowpart (V4SFmode, op0);
- op1 = gen_lowpart (V4SFmode, op1);
- emit_insn (gen_sse_movups (op0, op1));
- return;
- }
-
/* ??? If we have typed data, then it would appear that using
movdqu is the only way to get unaligned data loaded with
integer type. */
{
op0 = gen_lowpart (V16QImode, op0);
op1 = gen_lowpart (V16QImode, op1);
+ /* We will eventually emit movups based on insn attributes. */
emit_insn (gen_sse2_movdqu (op0, op1));
- return;
}
-
- if (TARGET_SSE2 && mode == V2DFmode)
+ else if (TARGET_SSE2 && mode == V2DFmode)
{
rtx zero;
- if (TARGET_SSE_UNALIGNED_LOAD_OPTIMAL)
+ if (TARGET_AVX
+ || TARGET_SSE_UNALIGNED_LOAD_OPTIMAL
+ || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
+ || optimize_function_for_size_p (cfun))
{
+ /* We will eventually emit movups based on insn attributes. */
emit_insn (gen_sse2_movupd (op0, op1));
return;
}
}
else
{
- if (TARGET_SSE_UNALIGNED_LOAD_OPTIMAL)
+ if (TARGET_AVX
+ || TARGET_SSE_UNALIGNED_LOAD_OPTIMAL
+ || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
+ || optimize_function_for_size_p (cfun))
{
op0 = gen_lowpart (V4SFmode, op0);
op1 = gen_lowpart (V4SFmode, op1);
if (mode != V4SFmode)
op0 = gen_lowpart (V4SFmode, op0);
+
m = adjust_address (op1, V2SFmode, 0);
emit_insn (gen_sse_loadlps (op0, op0, m));
m = adjust_address (op1, V2SFmode, 8);
}
else if (MEM_P (op0))
{
- /* If we're optimizing for size, movups is the smallest. */
- if (optimize_insn_for_size_p ()
- || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
- {
- op0 = gen_lowpart (V4SFmode, op0);
- op1 = gen_lowpart (V4SFmode, op1);
- emit_insn (gen_sse_movups (op0, op1));
- return;
- }
-
- /* ??? Similar to above, only less clear
- because of typeless stores. */
- if (TARGET_SSE2 && !TARGET_SSE_TYPELESS_STORES
- && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
+ if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
{
op0 = gen_lowpart (V16QImode, op0);
op1 = gen_lowpart (V16QImode, op1);
+ /* We will eventually emit movups based on insn attributes. */
emit_insn (gen_sse2_movdqu (op0, op1));
- return;
}
-
- if (TARGET_SSE2 && mode == V2DFmode)
+ else if (TARGET_SSE2 && mode == V2DFmode)
{
- if (TARGET_SSE_UNALIGNED_STORE_OPTIMAL)
+ if (TARGET_AVX
+ || TARGET_SSE_UNALIGNED_STORE_OPTIMAL
+ || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
+ || optimize_function_for_size_p (cfun))
+ /* We will eventually emit movups based on insn attributes. */
emit_insn (gen_sse2_movupd (op0, op1));
else
{
if (mode != V4SFmode)
op1 = gen_lowpart (V4SFmode, op1);
- if (TARGET_SSE_UNALIGNED_STORE_OPTIMAL)
+ if (TARGET_AVX
+ || TARGET_SSE_UNALIGNED_STORE_OPTIMAL
+ || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
+ || optimize_function_for_size_p (cfun))
{
op0 = gen_lowpart (V4SFmode, op0);
emit_insn (gen_sse_movups (op0, op1));
&& (misaligned_operand (operands[0], <MODE>mode)
|| misaligned_operand (operands[1], <MODE>mode)))
return "vmovupd\t{%1, %0|%0, %1}";
- else if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
- return "%vmovaps\t{%1, %0|%0, %1}";
else
return "%vmovapd\t{%1, %0|%0, %1}";
&& (misaligned_operand (operands[0], <MODE>mode)
|| misaligned_operand (operands[1], <MODE>mode)))
return "vmovdqu\t{%1, %0|%0, %1}";
- else if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
- return "%vmovaps\t{%1, %0|%0, %1}";
else
return "%vmovdqa\t{%1, %0|%0, %1}";
[(set_attr "type" "sselog1,ssemov,ssemov")
(set_attr "prefix" "maybe_vex")
(set (attr "mode")
- (cond [(match_test "TARGET_AVX")
+ (cond [(and (eq_attr "alternative" "1,2")
+ (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL"))
+ (if_then_else
+ (match_test "GET_MODE_SIZE (<MODE>mode) > 16")
+ (const_string "V8SF")
+ (const_string "V4SF"))
+ (match_test "TARGET_AVX")
(const_string "<sseinsnmode>")
- (ior (ior (match_test "optimize_function_for_size_p (cfun)")
- (not (match_test "TARGET_SSE2")))
+ (ior (and (eq_attr "alternative" "1,2")
+ (match_test "optimize_function_for_size_p (cfun)"))
(and (eq_attr "alternative" "2")
(match_test "TARGET_SSE_TYPELESS_STORES")))
(const_string "V4SF")
- (eq (const_string "<MODE>mode") (const_string "V4SFmode"))
- (const_string "V4SF")
- (eq (const_string "<MODE>mode") (const_string "V2DFmode"))
- (const_string "V2DF")
]
- (const_string "TI")))])
+ (const_string "<sseinsnmode>")))])
(define_insn "sse2_movq128"
[(set (match_operand:V2DI 0 "register_operand" "=x")
[(match_operand:VF 1 "nonimmediate_operand" "xm,x")]
UNSPEC_MOVU))]
"TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
- "%vmovu<ssemodesuffix>\t{%1, %0|%0, %1}"
+{
+ switch (get_attr_mode (insn))
+ {
+ case MODE_V8SF:
+ case MODE_V4SF:
+ return "%vmovups\t{%1, %0|%0, %1}";
+ default:
+ return "%vmovu<ssemodesuffix>\t{%1, %0|%0, %1}";
+ }
+}
[(set_attr "type" "ssemov")
(set_attr "movu" "1")
(set_attr "prefix" "maybe_vex")
- (set_attr "mode" "<MODE>")])
+ (set (attr "mode")
+ (cond [(match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
+ (if_then_else
+ (match_test "GET_MODE_SIZE (<MODE>mode) > 16")
+ (const_string "V8SF")
+ (const_string "V4SF"))
+ (match_test "TARGET_AVX")
+ (const_string "<MODE>")
+ (ior (match_test "optimize_function_for_size_p (cfun)")
+ (and (eq_attr "alternative" "1")
+ (match_test "TARGET_SSE_TYPELESS_STORES")))
+ (const_string "V4SF")
+ ]
+ (const_string "<MODE>")))])
(define_expand "<sse2>_movdqu<avxsizesuffix>"
[(set (match_operand:VI1 0 "nonimmediate_operand")
(unspec:VI1 [(match_operand:VI1 1 "nonimmediate_operand" "xm,x")]
UNSPEC_MOVU))]
"TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
- "%vmovdqu\t{%1, %0|%0, %1}"
+{
+ switch (get_attr_mode (insn))
+ {
+ case MODE_V8SF:
+ case MODE_V4SF:
+ return "%vmovups\t{%1, %0|%0, %1}";
+ default:
+ return "%vmovdqu\t{%1, %0|%0, %1}";
+ }
+}
[(set_attr "type" "ssemov")
(set_attr "movu" "1")
(set (attr "prefix_data16")
(const_string "*")
(const_string "1")))
(set_attr "prefix" "maybe_vex")
- (set_attr "mode" "<sseinsnmode>")])
+ (set (attr "mode")
+ (cond [(match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
+ (if_then_else
+ (match_test "GET_MODE_SIZE (<MODE>mode) > 16")
+ (const_string "V8SF")
+ (const_string "V4SF"))
+ (match_test "TARGET_AVX")
+ (const_string "<sseinsnmode>")
+ (ior (match_test "optimize_function_for_size_p (cfun)")
+ (and (eq_attr "alternative" "1")
+ (match_test "TARGET_SSE_TYPELESS_STORES")))
+ (const_string "V4SF")
+ ]
+ (const_string "<sseinsnmode>")))])
(define_insn "<sse3>_lddqu<avxsizesuffix>"
[(set (match_operand:VI1 0 "register_operand" "=x")