From 114627e2fda58a70828c0fb93d99feb079741a38 Mon Sep 17 00:00:00 2001 From: Richard Biener Date: Wed, 3 Jun 2015 11:03:26 +0000 Subject: [PATCH] Backport PRs 62031, 65518, 65549, 66123, 66251, 66272 2015-06-03 Richard Biener Backport from mainline 2015-05-26 Michael Matz PR middle-end/66251 * tree-vect-stmts.c (vect_create_vectorized_demotion_stmts): Always set STMT_VINFO_VEC_STMT, also with SLP. * gcc.dg/vect/pr66251.c: New test. 2015-05-22 Richard Biener PR tree-optimization/66251 * tree-vect-stmts.c (vectorizable_conversion): Properly set STMT_VINFO_VEC_STMT even for the SLP case. * gfortran.fortran-torture/compile/pr66251.f90: New testcase. 2015-05-27 Richard Biener PR tree-optimization/66272 Revert parts of 2014-08-15 Richard Biener PR tree-optimization/62031 * tree-data-ref.c (dr_analyze_indices): Do not set DR_UNCONSTRAINED_BASE. (dr_may_alias_p): All indirect accesses have to go the formerly DR_UNCONSTRAINED_BASE path. * tree-data-ref.h (struct indices): Remove unconstrained_base member. (DR_UNCONSTRAINED_BASE): Remove. * gcc.dg/torture/pr66272.c: New testcase. 2015-05-13 Richard Biener PR tree-optimization/66123 * tree-ssa-dom.c (propagate_rhs_into_lhs): Check if we found a taken edge. * gcc.dg/torture/pr66123.c: New testcase. 2015-06-02 Richard Biener PR debug/65549 * dwarf2out.c (lookup_context_die): New function. (resolve_addr): Avoid forcing a full DIE for the target of a DW_TAG_GNU_call_site during late compilation. Instead create a stub DIE without a type if we have a context DIE present. * g++.dg/lto/pr65549_0.C: New testcase. 2015-03-23 Richard Biener PR tree-optimization/65518 * tree-vect-stmts.c (vectorizable_load): Reject single-element interleaving cases we generate absymal code for. * gcc.dg/vect/pr65518.c: New testcase. From-SVN: r224072 --- gcc/ChangeLog | 51 +++++++ gcc/dwarf2out.c | 38 ++++- gcc/testsuite/ChangeLog | 33 ++++ gcc/testsuite/g++.dg/lto/pr65549_0.C | 144 ++++++++++++++++++ gcc/testsuite/gcc.dg/torture/pr66123.c | 11 ++ gcc/testsuite/gcc.dg/torture/pr66272.c | 23 +++ gcc/testsuite/gcc.dg/vect/pr65518.c | 43 ++++++ gcc/testsuite/gcc.dg/vect/pr66251.c | 79 ++++++++++ .../compile/pr66251.f90 | 7 + gcc/tree-data-ref.c | 7 +- gcc/tree-data-ref.h | 5 + gcc/tree-ssa-dom.c | 3 + gcc/tree-vect-stmts.c | 40 +++-- 13 files changed, 465 insertions(+), 19 deletions(-) create mode 100644 gcc/testsuite/g++.dg/lto/pr65549_0.C create mode 100644 gcc/testsuite/gcc.dg/torture/pr66123.c create mode 100644 gcc/testsuite/gcc.dg/torture/pr66272.c create mode 100644 gcc/testsuite/gcc.dg/vect/pr65518.c create mode 100644 gcc/testsuite/gcc.dg/vect/pr66251.c create mode 100644 gcc/testsuite/gfortran.fortran-torture/compile/pr66251.f90 diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 9984a4c6b314..9a5def97e439 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,54 @@ +2015-06-03 Richard Biener + + Backport from mainline + 2015-05-26 Michael Matz + + PR middle-end/66251 + * tree-vect-stmts.c (vect_create_vectorized_demotion_stmts): Always set + STMT_VINFO_VEC_STMT, also with SLP. + + 2015-05-22 Richard Biener + + PR tree-optimization/66251 + * tree-vect-stmts.c (vectorizable_conversion): Properly + set STMT_VINFO_VEC_STMT even for the SLP case. + + 2015-05-27 Richard Biener + + PR tree-optimization/66272 + Revert parts of + 2014-08-15 Richard Biener + + PR tree-optimization/62031 + * tree-data-ref.c (dr_analyze_indices): Do not set + DR_UNCONSTRAINED_BASE. + (dr_may_alias_p): All indirect accesses have to go the + formerly DR_UNCONSTRAINED_BASE path. + * tree-data-ref.h (struct indices): Remove + unconstrained_base member. + (DR_UNCONSTRAINED_BASE): Remove. + + 2015-05-13 Richard Biener + + PR tree-optimization/66123 + * tree-ssa-dom.c (propagate_rhs_into_lhs): Check if we found + a taken edge. + + 2015-06-02 Richard Biener + + PR debug/65549 + * dwarf2out.c (lookup_context_die): New function. + (resolve_addr): Avoid forcing a full DIE for the + target of a DW_TAG_GNU_call_site during late compilation. + Instead create a stub DIE without a type if we have a + context DIE present. + + 2015-03-23 Richard Biener + + PR tree-optimization/65518 + * tree-vect-stmts.c (vectorizable_load): Reject single-element + interleaving cases we generate absymal code for. + 2015-06-01 Dominik Vogt Backport from mainline diff --git a/gcc/dwarf2out.c b/gcc/dwarf2out.c index c7d7181baede..6543d81e93cd 100644 --- a/gcc/dwarf2out.c +++ b/gcc/dwarf2out.c @@ -20099,6 +20099,28 @@ is_naming_typedef_decl (const_tree decl) != TYPE_NAME (TREE_TYPE (decl)))); } +/* Looks up the DIE for a context. */ + +static inline dw_die_ref +lookup_context_die (tree context) +{ + if (context) + { + /* Find die that represents this context. */ + if (TYPE_P (context)) + { + context = TYPE_MAIN_VARIANT (context); + dw_die_ref ctx = lookup_type_die (context); + if (!ctx) + return NULL; + return strip_naming_typedef (context, ctx); + } + else + return lookup_decl_die (context); + } + return comp_unit_die (); +} + /* Returns the DIE for a context. */ static inline dw_die_ref @@ -23404,12 +23426,22 @@ resolve_addr (dw_die_ref die) { tree tdecl = SYMBOL_REF_DECL (a->dw_attr_val.v.val_addr); dw_die_ref tdie = lookup_decl_die (tdecl); + dw_die_ref cdie; if (tdie == NULL && DECL_EXTERNAL (tdecl) - && DECL_ABSTRACT_ORIGIN (tdecl) == NULL_TREE) + && DECL_ABSTRACT_ORIGIN (tdecl) == NULL_TREE + && (cdie = lookup_context_die (DECL_CONTEXT (tdecl)))) { - force_decl_die (tdecl); - tdie = lookup_decl_die (tdecl); + /* Creating a full DIE for tdecl is overly expensive and + at this point even wrong when in the LTO phase + as it can end up generating new type DIEs we didn't + output and thus optimize_external_refs will crash. */ + tdie = new_die (DW_TAG_subprogram, cdie, NULL_TREE); + add_AT_flag (tdie, DW_AT_external, 1); + add_AT_flag (tdie, DW_AT_declaration, 1); + add_linkage_attr (tdie, tdecl); + add_name_and_src_coords_attributes (tdie, tdecl); + equate_decl_number_to_die (tdecl, tdie); } if (tdie) { diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 2fb4f836bbe7..057b418cf239 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,36 @@ +2015-06-03 Richard Biener + + Backport from mainline + 2015-05-26 Michael Matz + + PR middle-end/66251 + * gcc.dg/vect/pr66251.c: New test. + + 2015-05-22 Richard Biener + + PR tree-optimization/66251 + * gfortran.fortran-torture/compile/pr66251.f90: New testcase. + + 2015-05-27 Richard Biener + + PR tree-optimization/66272 + * gcc.dg/torture/pr66272.c: New testcase. + + 2015-05-13 Richard Biener + + PR tree-optimization/66123 + * gcc.dg/torture/pr66123.c: New testcase. + + 2015-06-02 Richard Biener + + PR debug/65549 + * g++.dg/lto/pr65549_0.C: New testcase. + + 2015-03-23 Richard Biener + + PR tree-optimization/65518 + * gcc.dg/vect/pr65518.c: New testcase. + 2015-06-01 Jakub Jelinek * gcc.target/s390/hotpatch-compile-15.c: Remove dg-prune-output diff --git a/gcc/testsuite/g++.dg/lto/pr65549_0.C b/gcc/testsuite/g++.dg/lto/pr65549_0.C new file mode 100644 index 000000000000..4c6358dc7b90 --- /dev/null +++ b/gcc/testsuite/g++.dg/lto/pr65549_0.C @@ -0,0 +1,144 @@ +// { dg-lto-do link } +// { dg-lto-options { { -std=gnu++14 -flto -g } { -std=gnu++14 -flto -g -O2 -fno-inline -flto-partition=max } } } +// { dg-extra-ld-options "-r -nostdlib" } + +namespace std { +inline namespace __cxx11 {} +template struct integral_constant { + static constexpr _Tp value = 0; +}; +template struct __and_; +struct is_member_object_pointer : integral_constant {}; +template +struct is_member_function_pointer : integral_constant {}; +template struct remove_reference { typedef int type; }; +template class C; +template struct __result_of_impl; +template +struct __result_of_impl { + typedef decltype(0) type; +}; +template +struct C<_Functor(_ArgTypes...)> + : __result_of_impl::type>::value, + _Functor> {}; +template using result_of_t = typename C<_Tp>::type; +template void forward(); +template _Tp move(_Tp) {} +namespace __cxx11 { +class basic_string typedef string; +} +template struct allocator_traits { typedef decltype(0) pointer; }; +} +struct F : std::allocator_traits {}; +namespace std { +namespace __cxx11 { +class basic_string { +public: + struct _Alloc_hider : F { + _Alloc_hider(pointer); + } _M_dataplus; + basic_string(int) : _M_dataplus(0) {} + ~basic_string(); +}; +} +template class function; +template class _Base_manager { +protected: + static _Functor *_M_get_pointer(int) {} +}; +template class _Function_handler; +template +class _Function_handler<_Res(_ArgTypes...), _Functor> + : _Base_manager<_Functor> { +public: + static _Res _M_invoke(const int &) { + (*_Base_manager<_Functor>::_M_get_pointer(0))(); + } +}; +template using __check_func_return_type = int; +template +class function<_Res(_ArgTypes...)> { + template using _Invoke = decltype(0); + template + using _Callable = __and_<__check_func_return_type<_Invoke<_Functor>, _Res>>; + template using _Requires = int; + +public: + template , void>> + function(_Functor); + using _Invoker_type = _Res (*)(const int &); + _Invoker_type _M_invoker; +}; +template +template +function<_Res(_ArgTypes...)>::function(_Functor) { + _M_invoker = _Function_handler<_Res(), _Functor>::_M_invoke; +} +class unique_ptr { +public: + ~unique_ptr(); +}; +template _Tp make_unique(_Args... __args) { + _Tp(__args...); +} +} +class A { +public: + template T as(); +}; +class variables_map { +public: + A operator[](std::basic_string); +}; +class B { +public: + variables_map configuration(); + void run(int, int, std::function); +}; +class H; +struct G { + enum {} _state; +}; +class D { + G _local_state; + std::unique_ptr _task; + template void schedule(Func func) { + struct task_with_state { + task_with_state(Func func) : _func(func) {} + Func _func; + } tws = std::make_unique(std::move(func)); + } + friend H; +}; +template using futurize_t = H; +class H { + D *_promise; + template void schedule(Func func) { + G __trans_tmp_1; + struct task_with_ready_state { + task_with_ready_state(Func, G); + }; + std::make_unique(std::move(func), __trans_tmp_1); + _promise->schedule(std::move(func)); + } + template void then(Func func, Param) { + using P = D; + P pr; + schedule([ pr = std::move(pr), func, param = std::forward ]{}); + } + +public: + template futurize_t> then(Func) { + then(0, [] {}); + } +} clients; +main() { + B app; + app.run(0, 0, [&] { + auto config = app.configuration()[0].as(); + clients.then([] {}); + }); +} diff --git a/gcc/testsuite/gcc.dg/torture/pr66123.c b/gcc/testsuite/gcc.dg/torture/pr66123.c new file mode 100644 index 000000000000..848f8fedceeb --- /dev/null +++ b/gcc/testsuite/gcc.dg/torture/pr66123.c @@ -0,0 +1,11 @@ +/* { dg-do compile } */ + +int +test (int foo) +{ + static void *dummy[] = { &&a, &&b }; + goto *((char *) &&b - 2 * (foo < 0)); +a: +b: + return 0; +} diff --git a/gcc/testsuite/gcc.dg/torture/pr66272.c b/gcc/testsuite/gcc.dg/torture/pr66272.c new file mode 100644 index 000000000000..6f0148a45f95 --- /dev/null +++ b/gcc/testsuite/gcc.dg/torture/pr66272.c @@ -0,0 +1,23 @@ +/* { dg-do run } */ + +struct S +{ + int f0; + int f1; +}; + +int b; + +int main () +{ + struct S a[2] = { 0 }; + struct S d = { 0, 1 }; + for (b = 0; b < 2; b++) + { + a[b] = d; + d = a[0]; + } + if (d.f1 != 1) + __builtin_abort (); + return 0; +} diff --git a/gcc/testsuite/gcc.dg/vect/pr65518.c b/gcc/testsuite/gcc.dg/vect/pr65518.c new file mode 100644 index 000000000000..dc400c6d8234 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/pr65518.c @@ -0,0 +1,43 @@ +/* { dg-do run } */ + +extern void abort (void); + +typedef struct giga +{ + unsigned int g[16]; +} giga; + +unsigned long __attribute__((noinline,noclone)) +addfst(giga const *gptr, int num) +{ + unsigned int retval = 0; + int i; + for (i = 0; i < num; i++) + retval += gptr[i].g[0]; + return retval; +} + +int main () +{ + struct giga g[8]; + unsigned int n = 1; + int i, j; + for (i = 0; i < 8; ++i) + for (j = 0; j < 16; ++j) + { + g[i].g[j] = n++; + __asm__ volatile (""); + } + if (addfst (g, 8) != 456) + abort (); + return 0; +} + +/* We don't want to vectorize the single-element interleaving in the way + we currently do that (without ignoring not needed vectors in the + gap between gptr[0].g[0] and gptr[1].g[0]), because that's very + sub-optimal and causes memory explosion (even though the cost model + should reject that in the end). */ + +/* { dg-final { scan-tree-dump-times "vectorized 0 loops in function" 2 "vect" } } */ +/* { dg-final { cleanup-tree-dump "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/pr66251.c b/gcc/testsuite/gcc.dg/vect/pr66251.c new file mode 100644 index 000000000000..e39f700df9af --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/pr66251.c @@ -0,0 +1,79 @@ +/* { dg-require-effective-target vect_int } */ +/* { dg-require-effective-target vect_double } */ +/* { dg-require-effective-target vect_floatint_cvt } */ +/* { dg-require-effective-target vect_intfloat_cvt } */ +/* { dg-require-effective-target vect_pack_trunc } */ +/* { dg-require-effective-target vect_unpack } */ +/* { dg-require-effective-target vect_hw_misalign } */ + +#include "tree-vect.h" + +void __attribute__((noinline,noclone)) +test1(_Complex double *a, _Complex int *b, int stride, int n) +{ + int i; + for (i = 0; i < n; i++) + { + a[i*stride] = b[i*stride]; + } +} + +void __attribute__((noinline,noclone)) +test2(_Complex int *a, _Complex double *b, int stride, int n) +{ + int i; + for (i = 0; i < n; i++) + { + a[i*stride] = b[i*stride]; + } +} + +_Complex int ia[256]; +_Complex double da[256]; + +extern void abort (void); + +int main () +{ + int i; + int stride; + + check_vect (); + + for (stride = 1; stride < 15; stride++) + { + for (i = 0; i < 256; i++) + { + __real__ ia[i] = (i + stride) % 19; + __imag__ ia[i] = (i + stride) % 23; + __asm__ volatile (""); + } + + test1(da, ia, stride, 256/stride); + + for (i = 0; i < 256/stride; i++) + { + if (da[i*stride] != ia[i*stride]) + abort (); + } + + for (i = 0; i < 256; i++) + { + __real__ da[i] = (i + stride + 1) % 29; + __imag__ da[i] = (i + stride + 1) % 31; + __asm__ volatile (""); + } + + test2(ia, da, stride, 256/stride); + + for (i = 0; i < 256/stride; i++) + { + if (da[i*stride] != ia[i*stride]) + abort (); + } + } + return 0; +} + +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" } } */ +/* { dg-final { cleanup-tree-dump "vect" } } */ diff --git a/gcc/testsuite/gfortran.fortran-torture/compile/pr66251.f90 b/gcc/testsuite/gfortran.fortran-torture/compile/pr66251.f90 new file mode 100644 index 000000000000..f5b04bc8391d --- /dev/null +++ b/gcc/testsuite/gfortran.fortran-torture/compile/pr66251.f90 @@ -0,0 +1,7 @@ +SUBROUTINE dbcsr_data_convert (n) + COMPLEX(KIND=4), DIMENSION(:), POINTER :: s_data_c + COMPLEX(KIND=8), DIMENSION(:), POINTER :: t_data_z + t_data_z(1:n) = CMPLX(s_data_c(1:n), KIND=8) + CALL foo() +END SUBROUTINE dbcsr_data_convert + diff --git a/gcc/tree-data-ref.c b/gcc/tree-data-ref.c index 0099f5ed2859..7def7f0cb3ba 100644 --- a/gcc/tree-data-ref.c +++ b/gcc/tree-data-ref.c @@ -1003,6 +1003,7 @@ dr_analyze_indices (struct data_reference *dr, loop_p nest, loop_p loop) ref = fold_build2_loc (EXPR_LOCATION (ref), MEM_REF, TREE_TYPE (ref), base, memoff); + DR_UNCONSTRAINED_BASE (dr) = true; access_fns.safe_push (access_fn); } } @@ -1414,7 +1415,8 @@ dr_may_alias_p (const struct data_reference *a, const struct data_reference *b, offset/overlap based analysis but have to rely on points-to information only. */ if (TREE_CODE (addr_a) == MEM_REF - && TREE_CODE (TREE_OPERAND (addr_a, 0)) == SSA_NAME) + && (DR_UNCONSTRAINED_BASE (a) + || TREE_CODE (TREE_OPERAND (addr_a, 0)) == SSA_NAME)) { /* For true dependences we can apply TBAA. */ if (flag_strict_aliasing @@ -1430,7 +1432,8 @@ dr_may_alias_p (const struct data_reference *a, const struct data_reference *b, build_fold_addr_expr (addr_b)); } else if (TREE_CODE (addr_b) == MEM_REF - && TREE_CODE (TREE_OPERAND (addr_b, 0)) == SSA_NAME) + && (DR_UNCONSTRAINED_BASE (b) + || TREE_CODE (TREE_OPERAND (addr_b, 0)) == SSA_NAME)) { /* For true dependences we can apply TBAA. */ if (flag_strict_aliasing diff --git a/gcc/tree-data-ref.h b/gcc/tree-data-ref.h index f4f6832af110..9d819e4a0ffb 100644 --- a/gcc/tree-data-ref.h +++ b/gcc/tree-data-ref.h @@ -81,6 +81,10 @@ struct indices /* A list of chrecs. Access functions of the indices. */ vec access_fns; + + /* Whether BASE_OBJECT is an access representing the whole object + or whether the access could not be constrained. */ + bool unconstrained_base; }; struct dr_alias @@ -191,6 +195,7 @@ struct data_reference #define DR_STMT(DR) (DR)->stmt #define DR_REF(DR) (DR)->ref #define DR_BASE_OBJECT(DR) (DR)->indices.base_object +#define DR_UNCONSTRAINED_BASE(DR) (DR)->indices.unconstrained_base #define DR_ACCESS_FNS(DR) (DR)->indices.access_fns #define DR_ACCESS_FN(DR, I) DR_ACCESS_FNS (DR)[I] #define DR_NUM_DIMENSIONS(DR) DR_ACCESS_FNS (DR).length () diff --git a/gcc/tree-ssa-dom.c b/gcc/tree-ssa-dom.c index 91253dc05921..8ef0920cc779 100644 --- a/gcc/tree-ssa-dom.c +++ b/gcc/tree-ssa-dom.c @@ -2863,6 +2863,9 @@ propagate_rhs_into_lhs (gimple stmt, tree lhs, tree rhs, bitmap interesting_name { basic_block bb = gimple_bb (use_stmt); edge te = find_taken_edge (bb, val); + if (!te) + continue; + edge_iterator ei; edge e; gimple_stmt_iterator gsi, psi; diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c index 7c4575d391fc..798bcaf15cc5 100644 --- a/gcc/tree-vect-stmts.c +++ b/gcc/tree-vect-stmts.c @@ -3314,15 +3314,13 @@ vect_create_vectorized_demotion_stmts (vec *vec_oprnds, (or in STMT_VINFO_RELATED_STMT chain). */ if (slp_node) SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt); + + if (!*prev_stmt_info) + STMT_VINFO_VEC_STMT (stmt_info) = new_stmt; else - { - if (!*prev_stmt_info) - STMT_VINFO_VEC_STMT (stmt_info) = new_stmt; - else - STMT_VINFO_RELATED_STMT (*prev_stmt_info) = new_stmt; + STMT_VINFO_RELATED_STMT (*prev_stmt_info) = new_stmt; - *prev_stmt_info = vinfo_for_stmt (new_stmt); - } + *prev_stmt_info = vinfo_for_stmt (new_stmt); } } @@ -3903,14 +3901,12 @@ vectorizable_conversion (gimple stmt, gimple_stmt_iterator *gsi, if (slp_node) SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt); + + if (!prev_stmt_info) + STMT_VINFO_VEC_STMT (stmt_info) = new_stmt; else - { - if (!prev_stmt_info) - STMT_VINFO_VEC_STMT (stmt_info) = new_stmt; - else - STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt; - prev_stmt_info = vinfo_for_stmt (new_stmt); - } + STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt; + prev_stmt_info = vinfo_for_stmt (new_stmt); } } @@ -5715,6 +5711,22 @@ vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt, gcc_assert (! nested_in_vect_loop && !STMT_VINFO_GATHER_P (stmt_info)); first_stmt = GROUP_FIRST_ELEMENT (stmt_info); + + /* If this is single-element interleaving with an element distance + that leaves unused vector loads around punt - we at least create + very sub-optimal code in that case (and blow up memory, + see PR65518). */ + if (first_stmt == stmt + && !GROUP_NEXT_ELEMENT (stmt_info) + && GROUP_SIZE (stmt_info) > TYPE_VECTOR_SUBPARTS (vectype)) + { + if (dump_enabled_p ()) + dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, + "single-element interleaving not supported " + "for not adjacent vector loads\n"); + return false; + } + if (!slp && !PURE_SLP_STMT (stmt_info)) { group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt)); -- 2.47.2