From: Jakub Jelinek Date: Sat, 4 Jan 2014 09:57:36 +0000 (+0100) Subject: sse.md (avx512f_load_mask): Emit vmovup{s,d} or vmovdqu* for misaligned_operand. X-Git-Tag: releases/gcc-4.9.0~1838 X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=dad5ed2eb152e85ae2ebb3582d57d895488011a4;p=thirdparty%2Fgcc.git sse.md (avx512f_load_mask): Emit vmovup{s,d} or vmovdqu* for misaligned_operand. * config/i386/sse.md (avx512f_load_mask): Emit vmovup{s,d} or vmovdqu* for misaligned_operand. (_loadu, _loaddqu): Handle . * config/i386/i386.c (ix86_expand_special_args_builtin): Set aligned_mem for AVX512F masked aligned load and store builtins and for non-temporal moves. * gcc.target/i386/avx512f-vmovdqu32-1.c: Allow vmovdqu64 instead of vmovdqu32. From-SVN: r206332 --- diff --git a/gcc/ChangeLog b/gcc/ChangeLog index cf4cfabeb820..caae1f6f473a 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,4 +1,14 @@ -2014-01-03 Bingfeng Mei +2014-01-04 Jakub Jelinek + + * config/i386/sse.md (avx512f_load_mask): Emit vmovup{s,d} + or vmovdqu* for misaligned_operand. + (_loadu, + _loaddqu): Handle . + * config/i386/i386.c (ix86_expand_special_args_builtin): Set + aligned_mem for AVX512F masked aligned load and store builtins and for + non-temporal moves. + +2014-01-03 Bingfeng Mei PR tree-optimization/59651 * tree-vect-loop-manip.c (vect_create_cond_for_alias_checks): diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index d2f5b6e9fda7..1fc68e144bbe 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -34407,6 +34407,9 @@ ix86_expand_special_args_builtin (const struct builtin_description *d, case CODE_FOR_sse2_movntidi: case CODE_FOR_sse_movntq: case CODE_FOR_sse2_movntisi: + case CODE_FOR_avx512f_movntv16sf: + case CODE_FOR_avx512f_movntv8df: + case CODE_FOR_avx512f_movntv8di: aligned_mem = true; break; default: @@ -34431,6 +34434,24 @@ ix86_expand_special_args_builtin (const struct builtin_description *d, klass = load; memory = 0; break; + case VOID_FTYPE_PV8DF_V8DF_QI: + case VOID_FTYPE_PV16SF_V16SF_HI: + case VOID_FTYPE_PV8DI_V8DI_QI: + case VOID_FTYPE_PV16SI_V16SI_HI: + switch (icode) + { + /* These builtins and instructions require the memory + to be properly aligned. */ + case CODE_FOR_avx512f_storev16sf_mask: + case CODE_FOR_avx512f_storev16si_mask: + case CODE_FOR_avx512f_storev8df_mask: + case CODE_FOR_avx512f_storev8di_mask: + aligned_mem = true; + break; + default: + break; + } + /* FALLTHRU */ case VOID_FTYPE_PV8SF_V8SI_V8SF: case VOID_FTYPE_PV4DF_V4DI_V4DF: case VOID_FTYPE_PV4SF_V4SI_V4SF: @@ -34439,10 +34460,6 @@ ix86_expand_special_args_builtin (const struct builtin_description *d, case VOID_FTYPE_PV4DI_V4DI_V4DI: case VOID_FTYPE_PV4SI_V4SI_V4SI: case VOID_FTYPE_PV2DI_V2DI_V2DI: - case VOID_FTYPE_PV8DF_V8DF_QI: - case VOID_FTYPE_PV16SF_V16SF_HI: - case VOID_FTYPE_PV8DI_V8DI_QI: - case VOID_FTYPE_PV16SI_V16SI_HI: case VOID_FTYPE_PDOUBLE_V2DF_QI: case VOID_FTYPE_PFLOAT_V4SF_QI: nargs = 2; @@ -34459,6 +34476,19 @@ ix86_expand_special_args_builtin (const struct builtin_description *d, nargs = 3; klass = load; memory = 0; + switch (icode) + { + /* These builtins and instructions require the memory + to be properly aligned. */ + case CODE_FOR_avx512f_loadv16sf_mask: + case CODE_FOR_avx512f_loadv16si_mask: + case CODE_FOR_avx512f_loadv8df_mask: + case CODE_FOR_avx512f_loadv8di_mask: + aligned_mem = true; + break; + default: + break; + } break; case VOID_FTYPE_UINT_UINT_UINT: case VOID_FTYPE_UINT64_UINT_UINT: diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index 405f9988d9bf..dfc98ba813ad 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -786,8 +786,12 @@ { case MODE_V8DF: case MODE_V16SF: + if (misaligned_operand (operands[1], mode)) + return "vmovu\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"; return "vmova\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"; default: + if (misaligned_operand (operands[1], mode)) + return "vmovdqu\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"; return "vmovdqa\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"; } } @@ -936,11 +940,14 @@ false, still emit UNSPEC_LOADU insn to honor user's request for misaligned load. */ if (TARGET_AVX - && misaligned_operand (operands[1], mode) - /* FIXME: Revisit after AVX512F merge is completed. */ - && !) + && misaligned_operand (operands[1], mode)) { - emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[1])); + rtx src = operands[1]; + if () + src = gen_rtx_VEC_MERGE (mode, operands[1], + operands[2 * ], + operands[3 * ]); + emit_insn (gen_rtx_SET (VOIDmode, operands[0], src)); DONE; } }) @@ -1046,11 +1053,14 @@ false, still emit UNSPEC_LOADU insn to honor user's request for misaligned load. */ if (TARGET_AVX - && misaligned_operand (operands[1], mode) - /* FIXME: Revisit after AVX512F merge is completed. */ - && !) + && misaligned_operand (operands[1], mode)) { - emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[1])); + rtx src = operands[1]; + if () + src = gen_rtx_VEC_MERGE (mode, operands[1], + operands[2 * ], + operands[3 * ]); + emit_insn (gen_rtx_SET (VOIDmode, operands[0], src)); DONE; } }) diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 8a9c0cbaf0e5..267bcc0c7c60 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,8 @@ +2014-01-04 Jakub Jelinek + + * gcc.target/i386/avx512f-vmovdqu32-1.c: Allow vmovdqu64 instead of + vmovdqu32. + 2014-01-04 Janus Weil PR fortran/59547 diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vmovdqu32-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vmovdqu32-1.c index b8af781834e8..79dbf9dd37a0 100644 --- a/gcc/testsuite/gcc.target/i386/avx512f-vmovdqu32-1.c +++ b/gcc/testsuite/gcc.target/i386/avx512f-vmovdqu32-1.c @@ -1,6 +1,6 @@ /* { dg-do compile } */ /* { dg-options "-mavx512f -O2" } */ -/* { dg-final { scan-assembler-times "vmovdqu32\[ \\t\]+\[^\n\]*\\)\[^\n\]*%zmm\[0-9\]\[^\{\]" 1 } } */ +/* { dg-final { scan-assembler-times "vmovdqu\[36\]\[24\]\[ \\t\]+\[^\n\]*\\)\[^\n\]*%zmm\[0-9\]\[^\{\]" 1 } } */ /* { dg-final { scan-assembler-times "vmovdqu32\[ \\t\]+\[^\n\]*\\)\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */ /* { dg-final { scan-assembler-times "vmovdqu32\[ \\t\]+\[^\n\]*\\)\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */ /* { dg-final { scan-assembler-times "vmovdqu32\[ \\t\]+\[^\n\]*%zmm\[0-9\]\[^\n\]*\\)\[^\{\]" 1 } } */