From: Uros Bizjak Date: Wed, 9 May 2012 18:06:47 +0000 (+0200) Subject: re PR target/44141 (Redundant loads and stores generated for AMD bdver1 target) X-Git-Tag: misc/gupc_5_2_0_release~354^2 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=20f9034bc38de755016d3858e23ee438abcfc80b;p=thirdparty%2Fgcc.git re PR target/44141 (Redundant loads and stores generated for AMD bdver1 target) PR target/44141 * config/i386/i386.c (ix86_expand_vector_move_misalign): Do not handle 128 bit vectors specially for TARGET_AVX. Emit sse2_movupd and sse_movupd RTXes for TARGET_AVX, TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL or when optimizing for size. * config/i386/sse.md (*mov_internal): Remove TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL handling from asm output code. Calculate "mode" attribute according to optimize_function_for_size_p and TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL flag. (*_movu): Choose asm template depending on the mode of the instruction. Calculate "mode" attribute according to optimize_function_for_size_p, TARGET_SSE_TYPELESS_STORES and TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL flags. (*_movdqu): Ditto. From-SVN: r187347 --- diff --git a/gcc/ChangeLog b/gcc/ChangeLog index f32e96f76b6b..0d17b2a2e03e 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,20 @@ +2012-05-09 Uros Bizjak + + PR target/44141 + * config/i386/i386.c (ix86_expand_vector_move_misalign): Do not handle + 128 bit vectors specially for TARGET_AVX. Emit sse2_movupd and + sse_movupd RTXes for TARGET_AVX, TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL + or when optimizing for size. + * config/i386/sse.md (*mov_internal): Remove + TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL handling from asm output code. + Calculate "mode" attribute according to optimize_function_for_size_p + and TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL flag. + (*_movu): Choose asm template + depending on the mode of the instruction. Calculate "mode" attribute + according to optimize_function_for_size_p, TARGET_SSE_TYPELESS_STORES + and TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL flags. + (*_movdqu): Ditto. + 2012-05-09 Georg-Johann Lay PR target/53256 @@ -161,7 +178,7 @@ PR target/51244 * config/sh/sh.md (*branch_true, *branch_false): New insns. -2012-05-08 Teresa Johnson +2012-05-08 Teresa Johnson * gcov-io.h (__gcov_reset, __gcov_dump): Declare. * doc/gcov.texi: Add note on using __gcov_reset and __gcov_dump. @@ -180,8 +197,7 @@ (clone_function_name): Likewise. (cgraph_create_virtual_clone): Likewise. (cgraph_remove_node_and_inline_clones): Likewise. - (cgraph_redirect_edge_call_stmt_to_callee): Move here from - cgraphunit.c + (cgraph_redirect_edge_call_stmt_to_callee): Move here from cgraphunit.c * cgraph.h: Reorder declarations so they match file of origin. (cgraph_create_empty_node): Declare. * cgraphunit.c (update_call_expr): Move to cgraphclones.c @@ -702,7 +718,7 @@ Enable -Wunused-local-typedefs when -Wall or -Wunused is on * opts.c (finish_options): Activate -Wunused-local-typedefs if - -Wunused is activated. + -Wunused is activated. * doc/invoke.texi: Update blurb of -Wunused-local-typedefs. 2012-05-04 Andreas Krebbel @@ -1757,7 +1773,7 @@ * config/pa/pa.c (pa_legitimate_constant_p): Don't put function labels in constant pool. -2012-04-27 Ollie Wild +2012-04-27 Ollie Wild * doc/invoke.texi (Wliteral-suffix): Document new option. diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index 6bb64e085232..36370b2f6d1c 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -15907,60 +15907,19 @@ ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[]) op0 = operands[0]; op1 = operands[1]; - if (TARGET_AVX) + if (TARGET_AVX + && GET_MODE_SIZE (mode) == 32) { switch (GET_MODE_CLASS (mode)) { case MODE_VECTOR_INT: case MODE_INT: - switch (GET_MODE_SIZE (mode)) - { - case 16: - if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL) - { - op0 = gen_lowpart (V4SFmode, op0); - op1 = gen_lowpart (V4SFmode, op1); - emit_insn (gen_sse_movups (op0, op1)); - } - else - { - op0 = gen_lowpart (V16QImode, op0); - op1 = gen_lowpart (V16QImode, op1); - emit_insn (gen_sse2_movdqu (op0, op1)); - } - break; - case 32: - op0 = gen_lowpart (V32QImode, op0); - op1 = gen_lowpart (V32QImode, op1); - ix86_avx256_split_vector_move_misalign (op0, op1); - break; - default: - gcc_unreachable (); - } - break; + op0 = gen_lowpart (V32QImode, op0); + op1 = gen_lowpart (V32QImode, op1); + /* FALLTHRU */ + case MODE_VECTOR_FLOAT: - switch (mode) - { - case V4SFmode: - emit_insn (gen_sse_movups (op0, op1)); - break; - case V2DFmode: - if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL) - { - op0 = gen_lowpart (V4SFmode, op0); - op1 = gen_lowpart (V4SFmode, op1); - emit_insn (gen_sse_movups (op0, op1)); - } - else - emit_insn (gen_sse2_movupd (op0, op1)); - break; - case V8SFmode: - case V4DFmode: - ix86_avx256_split_vector_move_misalign (op0, op1); - break; - default: - gcc_unreachable (); - } + ix86_avx256_split_vector_move_misalign (op0, op1); break; default: @@ -15972,16 +15931,6 @@ ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[]) if (MEM_P (op1)) { - /* If we're optimizing for size, movups is the smallest. */ - if (optimize_insn_for_size_p () - || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL) - { - op0 = gen_lowpart (V4SFmode, op0); - op1 = gen_lowpart (V4SFmode, op1); - emit_insn (gen_sse_movups (op0, op1)); - return; - } - /* ??? If we have typed data, then it would appear that using movdqu is the only way to get unaligned data loaded with integer type. */ @@ -15989,16 +15938,19 @@ ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[]) { op0 = gen_lowpart (V16QImode, op0); op1 = gen_lowpart (V16QImode, op1); + /* We will eventually emit movups based on insn attributes. */ emit_insn (gen_sse2_movdqu (op0, op1)); - return; } - - if (TARGET_SSE2 && mode == V2DFmode) + else if (TARGET_SSE2 && mode == V2DFmode) { rtx zero; - if (TARGET_SSE_UNALIGNED_LOAD_OPTIMAL) + if (TARGET_AVX + || TARGET_SSE_UNALIGNED_LOAD_OPTIMAL + || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL + || optimize_function_for_size_p (cfun)) { + /* We will eventually emit movups based on insn attributes. */ emit_insn (gen_sse2_movupd (op0, op1)); return; } @@ -16030,7 +15982,10 @@ ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[]) } else { - if (TARGET_SSE_UNALIGNED_LOAD_OPTIMAL) + if (TARGET_AVX + || TARGET_SSE_UNALIGNED_LOAD_OPTIMAL + || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL + || optimize_function_for_size_p (cfun)) { op0 = gen_lowpart (V4SFmode, op0); op1 = gen_lowpart (V4SFmode, op1); @@ -16045,6 +16000,7 @@ ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[]) if (mode != V4SFmode) op0 = gen_lowpart (V4SFmode, op0); + m = adjust_address (op1, V2SFmode, 0); emit_insn (gen_sse_loadlps (op0, op0, m)); m = adjust_address (op1, V2SFmode, 8); @@ -16053,30 +16009,20 @@ ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[]) } else if (MEM_P (op0)) { - /* If we're optimizing for size, movups is the smallest. */ - if (optimize_insn_for_size_p () - || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL) - { - op0 = gen_lowpart (V4SFmode, op0); - op1 = gen_lowpart (V4SFmode, op1); - emit_insn (gen_sse_movups (op0, op1)); - return; - } - - /* ??? Similar to above, only less clear - because of typeless stores. */ - if (TARGET_SSE2 && !TARGET_SSE_TYPELESS_STORES - && GET_MODE_CLASS (mode) == MODE_VECTOR_INT) + if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT) { op0 = gen_lowpart (V16QImode, op0); op1 = gen_lowpart (V16QImode, op1); + /* We will eventually emit movups based on insn attributes. */ emit_insn (gen_sse2_movdqu (op0, op1)); - return; } - - if (TARGET_SSE2 && mode == V2DFmode) + else if (TARGET_SSE2 && mode == V2DFmode) { - if (TARGET_SSE_UNALIGNED_STORE_OPTIMAL) + if (TARGET_AVX + || TARGET_SSE_UNALIGNED_STORE_OPTIMAL + || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL + || optimize_function_for_size_p (cfun)) + /* We will eventually emit movups based on insn attributes. */ emit_insn (gen_sse2_movupd (op0, op1)); else { @@ -16091,7 +16037,10 @@ ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[]) if (mode != V4SFmode) op1 = gen_lowpart (V4SFmode, op1); - if (TARGET_SSE_UNALIGNED_STORE_OPTIMAL) + if (TARGET_AVX + || TARGET_SSE_UNALIGNED_STORE_OPTIMAL + || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL + || optimize_function_for_size_p (cfun)) { op0 = gen_lowpart (V4SFmode, op0); emit_insn (gen_sse_movups (op0, op1)); diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index d270c634ae00..86b2ed39f0aa 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -449,8 +449,6 @@ && (misaligned_operand (operands[0], mode) || misaligned_operand (operands[1], mode))) return "vmovupd\t{%1, %0|%0, %1}"; - else if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL) - return "%vmovaps\t{%1, %0|%0, %1}"; else return "%vmovapd\t{%1, %0|%0, %1}"; @@ -460,8 +458,6 @@ && (misaligned_operand (operands[0], mode) || misaligned_operand (operands[1], mode))) return "vmovdqu\t{%1, %0|%0, %1}"; - else if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL) - return "%vmovaps\t{%1, %0|%0, %1}"; else return "%vmovdqa\t{%1, %0|%0, %1}"; @@ -475,19 +471,21 @@ [(set_attr "type" "sselog1,ssemov,ssemov") (set_attr "prefix" "maybe_vex") (set (attr "mode") - (cond [(match_test "TARGET_AVX") + (cond [(and (eq_attr "alternative" "1,2") + (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")) + (if_then_else + (match_test "GET_MODE_SIZE (mode) > 16") + (const_string "V8SF") + (const_string "V4SF")) + (match_test "TARGET_AVX") (const_string "") - (ior (ior (match_test "optimize_function_for_size_p (cfun)") - (not (match_test "TARGET_SSE2"))) + (ior (and (eq_attr "alternative" "1,2") + (match_test "optimize_function_for_size_p (cfun)")) (and (eq_attr "alternative" "2") (match_test "TARGET_SSE_TYPELESS_STORES"))) (const_string "V4SF") - (eq (const_string "mode") (const_string "V4SFmode")) - (const_string "V4SF") - (eq (const_string "mode") (const_string "V2DFmode")) - (const_string "V2DF") ] - (const_string "TI")))]) + (const_string "")))]) (define_insn "sse2_movq128" [(set (match_operand:V2DI 0 "register_operand" "=x") @@ -597,11 +595,33 @@ [(match_operand:VF 1 "nonimmediate_operand" "xm,x")] UNSPEC_MOVU))] "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))" - "%vmovu\t{%1, %0|%0, %1}" +{ + switch (get_attr_mode (insn)) + { + case MODE_V8SF: + case MODE_V4SF: + return "%vmovups\t{%1, %0|%0, %1}"; + default: + return "%vmovu\t{%1, %0|%0, %1}"; + } +} [(set_attr "type" "ssemov") (set_attr "movu" "1") (set_attr "prefix" "maybe_vex") - (set_attr "mode" "")]) + (set (attr "mode") + (cond [(match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL") + (if_then_else + (match_test "GET_MODE_SIZE (mode) > 16") + (const_string "V8SF") + (const_string "V4SF")) + (match_test "TARGET_AVX") + (const_string "") + (ior (match_test "optimize_function_for_size_p (cfun)") + (and (eq_attr "alternative" "1") + (match_test "TARGET_SSE_TYPELESS_STORES"))) + (const_string "V4SF") + ] + (const_string "")))]) (define_expand "_movdqu" [(set (match_operand:VI1 0 "nonimmediate_operand") @@ -618,7 +638,16 @@ (unspec:VI1 [(match_operand:VI1 1 "nonimmediate_operand" "xm,x")] UNSPEC_MOVU))] "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))" - "%vmovdqu\t{%1, %0|%0, %1}" +{ + switch (get_attr_mode (insn)) + { + case MODE_V8SF: + case MODE_V4SF: + return "%vmovups\t{%1, %0|%0, %1}"; + default: + return "%vmovdqu\t{%1, %0|%0, %1}"; + } +} [(set_attr "type" "ssemov") (set_attr "movu" "1") (set (attr "prefix_data16") @@ -627,7 +656,20 @@ (const_string "*") (const_string "1"))) (set_attr "prefix" "maybe_vex") - (set_attr "mode" "")]) + (set (attr "mode") + (cond [(match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL") + (if_then_else + (match_test "GET_MODE_SIZE (mode) > 16") + (const_string "V8SF") + (const_string "V4SF")) + (match_test "TARGET_AVX") + (const_string "") + (ior (match_test "optimize_function_for_size_p (cfun)") + (and (eq_attr "alternative" "1") + (match_test "TARGET_SSE_TYPELESS_STORES"))) + (const_string "V4SF") + ] + (const_string "")))]) (define_insn "_lddqu" [(set (match_operand:VI1 0 "register_operand" "=x")