From: Jakub Jelinek Date: Wed, 4 Dec 2013 11:11:24 +0000 (+0100) Subject: re PR target/59163 (program compiled with g++ -O3 segfaults) X-Git-Tag: releases/gcc-4.9.0~2295 X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=f220a4f4e556777fdfbca7b41cda3f234a2507da;p=thirdparty%2Fgcc.git re PR target/59163 (program compiled with g++ -O3 segfaults) PR target/59163 * config/i386/i386.c (ix86_legitimate_combined_insn): If for !TARGET_AVX there is misaligned MEM operand with vector mode and get_attr_ssememalign is 0, return false. (ix86_expand_special_args_builtin): Add get_pointer_alignment computed alignment and for non-temporal loads/stores also at least GET_MODE_ALIGNMENT as MEM_ALIGN. * config/i386/sse.md (_loadu, _storeu, _loaddqu, _storedqu, _lddqu, sse_vmrcpv4sf2, sse_vmrsqrtv4sf2, sse2_cvtdq2pd, sse_movhlps, sse_movlhps, sse_storehps, sse_loadhps, sse_loadlps, *vec_interleave_highv2df, *vec_interleave_lowv2df, *vec_extractv2df_1_sse, sse2_movsd, sse4_1_v8qiv8hi2, sse4_1_v4qiv4si2, sse4_1_v4hiv4si2, sse4_1_v2qiv2di2, sse4_1_v2hiv2di2, sse4_1_v2siv2di2, sse4_2_pcmpestr, *sse4_2_pcmpestr_unaligned, sse4_2_pcmpestri, sse4_2_pcmpestrm, sse4_2_pcmpestr_cconly, sse4_2_pcmpistr, *sse4_2_pcmpistr_unaligned, sse4_2_pcmpistri, sse4_2_pcmpistrm, sse4_2_pcmpistr_cconly): Add ssememalign attribute. * config/i386/i386.md (ssememalign): New define_attr. * g++.dg/torture/pr59163.C: New test. Co-Authored-By: Uros Bizjak From-SVN: r205661 --- diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 92a8651c5629..56b4f477f785 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,30 @@ +2013-12-04 Jakub Jelinek + Uros Bizjak + + PR target/59163 + * config/i386/i386.c (ix86_legitimate_combined_insn): If for + !TARGET_AVX there is misaligned MEM operand with vector mode + and get_attr_ssememalign is 0, return false. + (ix86_expand_special_args_builtin): Add get_pointer_alignment + computed alignment and for non-temporal loads/stores also + at least GET_MODE_ALIGNMENT as MEM_ALIGN. + * config/i386/sse.md + (_loadu, + _storeu, + _loaddqu, + _storedqu, _lddqu, + sse_vmrcpv4sf2, sse_vmrsqrtv4sf2, sse2_cvtdq2pd, sse_movhlps, + sse_movlhps, sse_storehps, sse_loadhps, sse_loadlps, + *vec_interleave_highv2df, *vec_interleave_lowv2df, + *vec_extractv2df_1_sse, sse2_movsd, sse4_1_v8qiv8hi2, + sse4_1_v4qiv4si2, sse4_1_v4hiv4si2, + sse4_1_v2qiv2di2, sse4_1_v2hiv2di2, + sse4_1_v2siv2di2, sse4_2_pcmpestr, *sse4_2_pcmpestr_unaligned, + sse4_2_pcmpestri, sse4_2_pcmpestrm, sse4_2_pcmpestr_cconly, + sse4_2_pcmpistr, *sse4_2_pcmpistr_unaligned, sse4_2_pcmpistri, + sse4_2_pcmpistrm, sse4_2_pcmpistr_cconly): Add ssememalign attribute. + * config/i386/i386.md (ssememalign): New define_attr. + 2013-12-04 Jakub Jelinek PR tree-optimization/59355 diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index 21963bb28b71..f9b06d13b89b 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -5733,6 +5733,17 @@ ix86_legitimate_combined_insn (rtx insn) bool win; int j; + /* For pre-AVX disallow unaligned loads/stores where the + instructions don't support it. */ + if (!TARGET_AVX + && VECTOR_MODE_P (GET_MODE (op)) + && misaligned_operand (op, GET_MODE (op))) + { + int min_align = get_attr_ssememalign (insn); + if (min_align == 0) + return false; + } + /* A unary operator may be accepted by the predicate, but it is irrelevant for matching constraints. */ if (UNARY_P (op)) @@ -32481,11 +32492,12 @@ ix86_expand_args_builtin (const struct builtin_description *d, static rtx ix86_expand_special_args_builtin (const struct builtin_description *d, - tree exp, rtx target) + tree exp, rtx target) { tree arg; rtx pat, op; unsigned int i, nargs, arg_adjust, memory; + bool aligned_mem = false; struct { rtx op; @@ -32531,6 +32543,15 @@ ix86_expand_special_args_builtin (const struct builtin_description *d, nargs = 1; klass = load; memory = 0; + switch (icode) + { + case CODE_FOR_sse4_1_movntdqa: + case CODE_FOR_avx2_movntdqa: + aligned_mem = true; + break; + default: + break; + } break; case VOID_FTYPE_PV2SF_V4SF: case VOID_FTYPE_PV4DI_V4DI: @@ -32548,6 +32569,26 @@ ix86_expand_special_args_builtin (const struct builtin_description *d, klass = store; /* Reserve memory operand for target. */ memory = ARRAY_SIZE (args); + switch (icode) + { + /* These builtins and instructions require the memory + to be properly aligned. */ + case CODE_FOR_avx_movntv4di: + case CODE_FOR_sse2_movntv2di: + case CODE_FOR_avx_movntv8sf: + case CODE_FOR_sse_movntv4sf: + case CODE_FOR_sse4a_vmmovntv4sf: + case CODE_FOR_avx_movntv4df: + case CODE_FOR_sse2_movntv2df: + case CODE_FOR_sse4a_vmmovntv2df: + case CODE_FOR_sse2_movntidi: + case CODE_FOR_sse_movntq: + case CODE_FOR_sse2_movntisi: + aligned_mem = true; + break; + default: + break; + } break; case V4SF_FTYPE_V4SF_PCV2SF: case V2DF_FTYPE_V2DF_PCDOUBLE: @@ -32604,6 +32645,17 @@ ix86_expand_special_args_builtin (const struct builtin_description *d, { op = ix86_zero_extend_to_Pmode (op); target = gen_rtx_MEM (tmode, op); + /* target at this point has just BITS_PER_UNIT MEM_ALIGN + on it. Try to improve it using get_pointer_alignment, + and if the special builtin is one that requires strict + mode alignment, also from it's GET_MODE_ALIGNMENT. + Failure to do so could leak to ix86_legitimate_combined_insn + rejecting all changes to such insns. */ + unsigned int align = get_pointer_alignment (arg); + if (aligned_mem && align < GET_MODE_ALIGNMENT (tmode)) + align = GET_MODE_ALIGNMENT (tmode); + if (MEM_ALIGN (target) < align) + set_mem_align (target, align); } else target = force_reg (tmode, op); @@ -32649,8 +32701,17 @@ ix86_expand_special_args_builtin (const struct builtin_description *d, /* This must be the memory operand. */ op = ix86_zero_extend_to_Pmode (op); op = gen_rtx_MEM (mode, op); - gcc_assert (GET_MODE (op) == mode - || GET_MODE (op) == VOIDmode); + /* op at this point has just BITS_PER_UNIT MEM_ALIGN + on it. Try to improve it using get_pointer_alignment, + and if the special builtin is one that requires strict + mode alignment, also from it's GET_MODE_ALIGNMENT. + Failure to do so could leak to ix86_legitimate_combined_insn + rejecting all changes to such insns. */ + unsigned int align = get_pointer_alignment (arg); + if (aligned_mem && align < GET_MODE_ALIGNMENT (mode)) + align = GET_MODE_ALIGNMENT (mode); + if (MEM_ALIGN (op) < align) + set_mem_align (op, align); } else { diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index 6976124d4a89..0c732c7689f6 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -402,6 +402,13 @@ (const_string "unknown")] (const_string "integer"))) +;; The minimum required alignment of vector mode memory operands of the SSE +;; (non-VEX/EVEX) instruction in bits, if it is different from +;; GET_MODE_ALIGNMENT of the operand, otherwise 0. If an instruction has +;; multiple alternatives, this should be conservative maximum of those minimum +;; required alignments. +(define_attr "ssememalign" "" (const_int 0)) + ;; The (bounding maximum) length of an instruction immediate. (define_attr "length_immediate" "" (cond [(eq_attr "type" "incdec,setcc,icmov,str,lea,other,multi,idiv,leave, diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index 7a47f27e94ee..f4efeaa31226 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -931,6 +931,7 @@ } [(set_attr "type" "ssemov") (set_attr "movu" "1") + (set_attr "ssememalign" "8") (set_attr "prefix" "maybe_vex") (set (attr "mode") (cond [(match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL") @@ -961,6 +962,7 @@ } [(set_attr "type" "ssemov") (set_attr "movu" "1") + (set_attr "ssememalign" "8") (set_attr "prefix" "maybe_vex") (set (attr "mode") (cond [(ior (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL") @@ -1020,6 +1022,7 @@ } [(set_attr "type" "ssemov") (set_attr "movu" "1") + (set_attr "ssememalign" "8") (set (attr "prefix_data16") (if_then_else (match_test "TARGET_AVX") @@ -1059,6 +1062,7 @@ } [(set_attr "type" "ssemov") (set_attr "movu" "1") + (set_attr "ssememalign" "8") (set (attr "prefix_data16") (if_then_else (match_test "TARGET_AVX") @@ -1105,6 +1109,7 @@ "%vlddqu\t{%1, %0|%0, %1}" [(set_attr "type" "ssemov") (set_attr "movu" "1") + (set_attr "ssememalign" "8") (set (attr "prefix_data16") (if_then_else (match_test "TARGET_AVX") @@ -1369,6 +1374,7 @@ vrcpss\t{%1, %2, %0|%0, %2, %k1}" [(set_attr "isa" "noavx,avx") (set_attr "type" "sse") + (set_attr "ssememalign" "32") (set_attr "atom_sse_attr" "rcp") (set_attr "btver2_sse_attr" "rcp") (set_attr "prefix" "orig,vex") @@ -1509,6 +1515,7 @@ vrsqrtss\t{%1, %2, %0|%0, %2, %k1}" [(set_attr "isa" "noavx,avx") (set_attr "type" "sse") + (set_attr "ssememalign" "32") (set_attr "prefix" "orig,vex") (set_attr "mode" "SF")]) @@ -3853,6 +3860,7 @@ "%vcvtdq2pd\t{%1, %0|%0, %q1}" [(set_attr "type" "ssecvt") (set_attr "prefix" "maybe_vex") + (set_attr "ssememalign" "64") (set_attr "mode" "V2DF")]) (define_insn "avx512f_cvtpd2dq512" @@ -4725,6 +4733,7 @@ %vmovhps\t{%2, %0|%q0, %2}" [(set_attr "isa" "noavx,avx,noavx,avx,*") (set_attr "type" "ssemov") + (set_attr "ssememalign" "64") (set_attr "prefix" "orig,vex,orig,vex,maybe_vex") (set_attr "mode" "V4SF,V4SF,V2SF,V2SF,V2SF")]) @@ -4770,6 +4779,7 @@ %vmovlps\t{%2, %H0|%H0, %2}" [(set_attr "isa" "noavx,avx,noavx,avx,*") (set_attr "type" "ssemov") + (set_attr "ssememalign" "64") (set_attr "prefix" "orig,vex,orig,vex,maybe_vex") (set_attr "mode" "V4SF,V4SF,V2SF,V2SF,V2SF")]) @@ -5174,6 +5184,7 @@ %vmovhlps\t{%1, %d0|%d0, %1} %vmovlps\t{%H1, %d0|%d0, %H1}" [(set_attr "type" "ssemov") + (set_attr "ssememalign" "64") (set_attr "prefix" "maybe_vex") (set_attr "mode" "V2SF,V4SF,V2SF")]) @@ -5213,6 +5224,7 @@ %vmovlps\t{%2, %H0|%H0, %2}" [(set_attr "isa" "noavx,avx,noavx,avx,*") (set_attr "type" "ssemov") + (set_attr "ssememalign" "64") (set_attr "prefix" "orig,vex,orig,vex,maybe_vex") (set_attr "mode" "V2SF,V2SF,V4SF,V4SF,V2SF")]) @@ -5266,6 +5278,7 @@ %vmovlps\t{%2, %0|%q0, %2}" [(set_attr "isa" "noavx,avx,noavx,avx,*") (set_attr "type" "sseshuf,sseshuf,ssemov,ssemov,ssemov") + (set_attr "ssememalign" "64") (set_attr "length_immediate" "1,1,*,*,*") (set_attr "prefix" "orig,vex,orig,vex,maybe_vex") (set_attr "mode" "V4SF,V4SF,V2SF,V2SF,V2SF")]) @@ -6224,7 +6237,8 @@ vmovlpd\t{%H1, %2, %0|%0, %2, %H1} %vmovhpd\t{%1, %0|%q0, %1}" [(set_attr "isa" "noavx,avx,sse3,noavx,avx,*") - (set_attr "type" "sselog,sselog,sselog,ssemov,ssemov,ssemov") + (set_attr "type" "sselog,sselog,sselog,ssemov,ssemov,ssemov") + (set_attr "ssememalign" "64") (set_attr "prefix_data16" "*,*,*,1,*,1") (set_attr "prefix" "orig,vex,maybe_vex,orig,vex,maybe_vex") (set_attr "mode" "V2DF,V2DF,DF,V1DF,V1DF,V1DF")]) @@ -6368,6 +6382,7 @@ %vmovlpd\t{%2, %H0|%H0, %2}" [(set_attr "isa" "noavx,avx,sse3,noavx,avx,*") (set_attr "type" "sselog,sselog,sselog,ssemov,ssemov,ssemov") + (set_attr "ssememalign" "64") (set_attr "prefix_data16" "*,*,*,1,*,1") (set_attr "prefix" "orig,vex,maybe_vex,orig,vex,maybe_vex") (set_attr "mode" "V2DF,V2DF,DF,V1DF,V1DF,V1DF")]) @@ -6959,6 +6974,7 @@ movhlps\t{%1, %0|%0, %1} movlps\t{%H1, %0|%0, %H1}" [(set_attr "type" "ssemov") + (set_attr "ssememalign" "64") (set_attr "mode" "V2SF,V4SF,V2SF")]) ;; Avoid combining registers from different units in a single alternative, @@ -7051,6 +7067,7 @@ #" [(set_attr "isa" "noavx,avx,noavx,avx,*,*,*") (set_attr "type" "ssemov,ssemov,sselog,sselog,ssemov,fmov,imov") + (set_attr "ssememalign" "64") (set_attr "prefix_data16" "1,*,*,*,*,*,*") (set_attr "prefix" "orig,vex,orig,vex,*,*,*") (set_attr "mode" "V1DF,V1DF,V2DF,V2DF,DF,DF,DF")]) @@ -7119,6 +7136,7 @@ (const_string "imov") ] (const_string "ssemov"))) + (set_attr "ssememalign" "64") (set_attr "prefix_data16" "*,1,*,*,*,*,1,*,*,*,*") (set_attr "length_immediate" "*,*,*,*,*,1,*,*,*,*,*") (set_attr "prefix" "maybe_vex,orig,vex,orig,vex,orig,orig,vex,*,*,*") @@ -7163,6 +7181,7 @@ (const_string "1") (const_string "*"))) (set_attr "length_immediate" "*,*,*,*,*,1,*,*,*") + (set_attr "ssememalign" "64") (set_attr "prefix" "orig,vex,orig,vex,maybe_vex,orig,orig,vex,maybe_vex") (set_attr "mode" "DF,DF,V1DF,V1DF,V1DF,V2DF,V1DF,V1DF,V1DF")]) @@ -11459,6 +11478,7 @@ "TARGET_SSE4_1" "%vpmovbw\t{%1, %0|%0, %q1}" [(set_attr "type" "ssemov") + (set_attr "ssememalign" "64") (set_attr "prefix_extra" "1") (set_attr "prefix" "maybe_vex") (set_attr "mode" "TI")]) @@ -11499,6 +11519,7 @@ "TARGET_SSE4_1" "%vpmovbd\t{%1, %0|%0, %k1}" [(set_attr "type" "ssemov") + (set_attr "ssememalign" "32") (set_attr "prefix_extra" "1") (set_attr "prefix" "maybe_vex") (set_attr "mode" "TI")]) @@ -11534,6 +11555,7 @@ "TARGET_SSE4_1" "%vpmovwd\t{%1, %0|%0, %q1}" [(set_attr "type" "ssemov") + (set_attr "ssememalign" "64") (set_attr "prefix_extra" "1") (set_attr "prefix" "maybe_vex") (set_attr "mode" "TI")]) @@ -11576,6 +11598,7 @@ "TARGET_SSE4_1" "%vpmovbq\t{%1, %0|%0, %w1}" [(set_attr "type" "ssemov") + (set_attr "ssememalign" "16") (set_attr "prefix_extra" "1") (set_attr "prefix" "maybe_vex") (set_attr "mode" "TI")]) @@ -11613,6 +11636,7 @@ "TARGET_SSE4_1" "%vpmovwq\t{%1, %0|%0, %k1}" [(set_attr "type" "ssemov") + (set_attr "ssememalign" "32") (set_attr "prefix_extra" "1") (set_attr "prefix" "maybe_vex") (set_attr "mode" "TI")]) @@ -11646,6 +11670,7 @@ "TARGET_SSE4_1" "%vpmovdq\t{%1, %0|%0, %q1}" [(set_attr "type" "ssemov") + (set_attr "ssememalign" "64") (set_attr "prefix_extra" "1") (set_attr "prefix" "maybe_vex") (set_attr "mode" "TI")]) @@ -11939,6 +11964,7 @@ [(set_attr "type" "sselog") (set_attr "prefix_data16" "1") (set_attr "prefix_extra" "1") + (set_attr "ssememalign" "8") (set_attr "length_immediate" "1") (set_attr "memory" "none,load") (set_attr "mode" "TI")]) @@ -12001,6 +12027,7 @@ [(set_attr "type" "sselog") (set_attr "prefix_data16" "1") (set_attr "prefix_extra" "1") + (set_attr "ssememalign" "8") (set_attr "length_immediate" "1") (set_attr "memory" "load") (set_attr "mode" "TI")]) @@ -12028,6 +12055,7 @@ (set_attr "prefix_data16" "1") (set_attr "prefix_extra" "1") (set_attr "prefix" "maybe_vex") + (set_attr "ssememalign" "8") (set_attr "length_immediate" "1") (set_attr "btver2_decode" "vector") (set_attr "memory" "none,load") @@ -12055,6 +12083,7 @@ [(set_attr "type" "sselog") (set_attr "prefix_data16" "1") (set_attr "prefix_extra" "1") + (set_attr "ssememalign" "8") (set_attr "length_immediate" "1") (set_attr "prefix" "maybe_vex") (set_attr "btver2_decode" "vector") @@ -12081,6 +12110,7 @@ [(set_attr "type" "sselog") (set_attr "prefix_data16" "1") (set_attr "prefix_extra" "1") + (set_attr "ssememalign" "8") (set_attr "length_immediate" "1") (set_attr "memory" "none,load,none,load") (set_attr "btver2_decode" "vector,vector,vector,vector") @@ -12134,6 +12164,7 @@ [(set_attr "type" "sselog") (set_attr "prefix_data16" "1") (set_attr "prefix_extra" "1") + (set_attr "ssememalign" "8") (set_attr "length_immediate" "1") (set_attr "memory" "none,load") (set_attr "mode" "TI")]) @@ -12187,6 +12218,7 @@ [(set_attr "type" "sselog") (set_attr "prefix_data16" "1") (set_attr "prefix_extra" "1") + (set_attr "ssememalign" "8") (set_attr "length_immediate" "1") (set_attr "memory" "load") (set_attr "mode" "TI")]) @@ -12209,6 +12241,7 @@ [(set_attr "type" "sselog") (set_attr "prefix_data16" "1") (set_attr "prefix_extra" "1") + (set_attr "ssememalign" "8") (set_attr "length_immediate" "1") (set_attr "prefix" "maybe_vex") (set_attr "memory" "none,load") @@ -12233,6 +12266,7 @@ [(set_attr "type" "sselog") (set_attr "prefix_data16" "1") (set_attr "prefix_extra" "1") + (set_attr "ssememalign" "8") (set_attr "length_immediate" "1") (set_attr "prefix" "maybe_vex") (set_attr "memory" "none,load") @@ -12257,6 +12291,7 @@ [(set_attr "type" "sselog") (set_attr "prefix_data16" "1") (set_attr "prefix_extra" "1") + (set_attr "ssememalign" "8") (set_attr "length_immediate" "1") (set_attr "memory" "none,load,none,load") (set_attr "prefix" "maybe_vex") diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index e35854179d2f..32b4ff41e3e2 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,5 +1,8 @@ 2013-12-04 Jakub Jelinek + PR target/59163 + * g++.dg/torture/pr59163.C: New test. + PR tree-optimization/59355 * g++.dg/ipa/pr59355.C: New test. diff --git a/gcc/testsuite/g++.dg/torture/pr59163.C b/gcc/testsuite/g++.dg/torture/pr59163.C new file mode 100644 index 000000000000..2f9a99970781 --- /dev/null +++ b/gcc/testsuite/g++.dg/torture/pr59163.C @@ -0,0 +1,30 @@ +// PR target/59163 +// { dg-do run } + +struct A { float a[4]; }; +struct B { int b; A a; }; + +__attribute__((noinline, noclone)) void +bar (A &a) +{ + if (a.a[0] != 36.0f || a.a[1] != 42.0f || a.a[2] != 48.0f || a.a[3] != 54.0f) + __builtin_abort (); +} + +__attribute__((noinline, noclone)) void +foo (A &a) +{ + int i; + A c = a; + for (i = 0; i < 4; i++) + c.a[i] *= 6.0f; + a = c; + bar (a); +} + +int +main () +{ + B b = { 5, { 6, 7, 8, 9 } }; + foo (b.a); +}