From: Richard Biener Date: Thu, 21 Jun 2018 11:18:50 +0000 (+0000) Subject: backport: [multiple changes] X-Git-Tag: releases/gcc-6.5.0~262 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=fcaba42fe9139375327a1e74884ff6dbbb4a26c7;p=thirdparty%2Fgcc.git backport: [multiple changes] 2018-06-21 Richard Biener Backport from mainline 2017-09-06 Richard Biener PR tree-optimization/82108 * tree-vect-stmts.c (vectorizable_load): Fix pointer adjustment for gap in the non-permutation SLP case. * gcc.dg/vect/pr82108.c: New testcase. 2017-06-18 Richard Biener PR tree-optimization/81410 * tree-vect-stmts.c (vectorizable_load): Properly adjust for the gap in the ! slp_perm SLP case after each group. * gcc.dg/vect/pr81410.c: New testcase. 2017-03-08 Richard Biener PR tree-optimization/79920 * tree-vect-slp.c (vect_create_mask_and_perm): Remove and inline with ncopies == 1 to ... (vect_transform_slp_perm_load): ... here. Properly compute all element loads by iterating VF times over the group. Do not handle ncopies (computed in a broken way) in vect_create_mask_and_perm. * gcc.dg/vect/pr79920.c: New testcase. From-SVN: r261842 --- diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 3ac7879f610b..0a6094e4a63f 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,28 @@ +2018-06-21 Richard Biener + + Backport from mainline + 2017-09-06 Richard Biener + + PR tree-optimization/82108 + * tree-vect-stmts.c (vectorizable_load): Fix pointer adjustment + for gap in the non-permutation SLP case. + + 2017-06-18 Richard Biener + + PR tree-optimization/81410 + * tree-vect-stmts.c (vectorizable_load): Properly adjust for + the gap in the ! slp_perm SLP case after each group. + + 2017-03-08 Richard Biener + + PR tree-optimization/79920 + * tree-vect-slp.c (vect_create_mask_and_perm): Remove and inline + with ncopies == 1 to ... + (vect_transform_slp_perm_load): ... here. Properly compute + all element loads by iterating VF times over the group. Do + not handle ncopies (computed in a broken way) in + vect_create_mask_and_perm. + 2018-06-21 Richard Biener Backport from mainline diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 071b121c73c4..19a35ad13deb 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,21 @@ +2018-06-21 Richard Biener + + Backport from mainline + 2017-09-06 Richard Biener + + PR tree-optimization/82108 + * gcc.dg/vect/pr82108.c: New testcase. + + 2017-06-18 Richard Biener + + PR tree-optimization/81410 + * gcc.dg/vect/pr81410.c: New testcase. + + 2017-03-08 Richard Biener + + PR tree-optimization/79920 + * gcc.dg/vect/pr79920.c: New testcase. + 2018-06-21 Richard Biener Backport from mainline diff --git a/gcc/testsuite/gcc.dg/vect/pr79920.c b/gcc/testsuite/gcc.dg/vect/pr79920.c new file mode 100644 index 000000000000..c066b91e73f2 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/pr79920.c @@ -0,0 +1,44 @@ +/* { dg-do run } */ +/* { dg-additional-options "-O3" } */ + +#include "tree-vect.h" + +double __attribute__((noinline,noclone)) +compute_integral (double w_1[18]) +{ + double A = 0; + double t33[2][6] = {{0.0, 0.0, 0.0, 0.0, 0.0, 0.0}, + {0.0, 0.0, 0.0, 0.0, 0.0, 0.0}}; + double t43[2] = {0.0, 0.0}; + double t31[2][2] = {{1.0, 1.0}, {1.0, 1.0}}; + double t32[2][3] = {{0.0, 0.0, 1.0}, {0.0, 0.0, 1.0}}; + + for (int ip_1 = 0; ip_1 < 2; ++ip_1) + { + for (int i_0 = 0; i_0 < 6; ++i_0) + t33[ip_1][i_0] = ((w_1[i_0*3] * t32[ip_1][0]) + + (w_1[i_0*3+2] * t32[ip_1][2])); + t43[ip_1] = 2.0; + } + for (int i_0 = 0; i_0 < 6; ++i_0) + A += t43[1]*t33[1][i_0]; + return A; +} + +int main() +{ + check_vect (); + + double w_1[18] = {0., 1.0, 1.0, + 0., 1.0, 1.0, + 0., 1.0, 1.0, + 0., 1.0, 1.0, + 0., 1.0, 1.0, + 0., 1.0, 1.0}; + double A = compute_integral(w_1); + if (A != 12.0) + __builtin_abort (); + return 0; +} + +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target { vect_double && { vect_perm && vect_hw_misalign } } } } } */ diff --git a/gcc/testsuite/gcc.dg/vect/pr81410.c b/gcc/testsuite/gcc.dg/vect/pr81410.c new file mode 100644 index 000000000000..8d1bd6c8efda --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/pr81410.c @@ -0,0 +1,37 @@ +/* { dg-do run } */ +/* { dg-require-effective-target vect_long_long } */ + +#include "tree-vect.h" + +long long x[24]; +long long y[16]; +long long z[8]; + +void __attribute__((noinline)) foo() +{ + for (int i = 0; i < 8; ++i) + { + y[2*i] = x[3*i]; + y[2*i + 1] = x[3*i + 1]; + z[i] = 1; + } +} + +int main() +{ + check_vect (); + + for (int i = 0; i < 24; ++i) + { + x[i] = i; + __asm__ volatile ("" : : : "memory"); + } + foo (); + for (int i = 0; i < 8; ++i) + if (y[2*i] != 3*i || y[2*i+1] != 3*i + 1) + __builtin_abort (); + + return 0; +} + +/* { dg-final { scan-tree-dump "vectorized 1 loops" "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/pr82108.c b/gcc/testsuite/gcc.dg/vect/pr82108.c new file mode 100644 index 000000000000..5b8faf1aefbd --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/pr82108.c @@ -0,0 +1,47 @@ +/* { dg-do run } */ +/* { dg-require-effective-target vect_float } */ + +#include "tree-vect.h" + +void __attribute__((noinline,noclone)) +downscale_2 (const float* src, int src_n, float* dst) +{ + int i; + + for (i = 0; i < src_n; i += 2) { + const float* a = src; + const float* b = src + 4; + + dst[0] = (a[0] + b[0]) / 2; + dst[1] = (a[1] + b[1]) / 2; + dst[2] = (a[2] + b[2]) / 2; + dst[3] = (a[3] + b[3]) / 2; + + src += 2 * 4; + dst += 4; + } +} + +int main () +{ + const float in[4 * 4] = { + 1, 2, 3, 4, + 5, 6, 7, 8, + + 1, 2, 3, 4, + 5, 6, 7, 8 + }; + float out[2 * 4]; + + check_vect (); + + downscale_2 (in, 4, out); + + if (out[0] != 3 || out[1] != 4 || out[2] != 5 || out[3] != 6 + || out[4] != 3 || out[5] != 4 || out[6] != 5 || out[7] != 6) + __builtin_abort (); + + return 0; +} + +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */ diff --git a/gcc/tree-vect-slp.c b/gcc/tree-vect-slp.c index b6a6fde40fbe..2dce445ced0d 100644 --- a/gcc/tree-vect-slp.c +++ b/gcc/tree-vect-slp.c @@ -3288,66 +3288,6 @@ vect_get_slp_defs (vec ops, slp_tree slp_node, } } - -/* Create NCOPIES permutation statements using the mask MASK_BYTES (by - building a vector of type MASK_TYPE from it) and two input vectors placed in - DR_CHAIN at FIRST_VEC_INDX and SECOND_VEC_INDX for the first copy and - shifting by STRIDE elements of DR_CHAIN for every copy. - (STRIDE is the number of vectorized stmts for NODE divided by the number of - copies). - VECT_STMTS_COUNTER specifies the index in the vectorized stmts of NODE, where - the created stmts must be inserted. */ - -static inline void -vect_create_mask_and_perm (gimple *stmt, - tree mask, int first_vec_indx, int second_vec_indx, - gimple_stmt_iterator *gsi, slp_tree node, - tree vectype, vec dr_chain, - int ncopies, int vect_stmts_counter) -{ - tree perm_dest; - gimple *perm_stmt = NULL; - int i, stride_in, stride_out; - tree first_vec, second_vec, data_ref; - - stride_out = SLP_TREE_NUMBER_OF_VEC_STMTS (node) / ncopies; - stride_in = dr_chain.length () / ncopies; - - /* Initialize the vect stmts of NODE to properly insert the generated - stmts later. */ - for (i = SLP_TREE_VEC_STMTS (node).length (); - i < (int) SLP_TREE_NUMBER_OF_VEC_STMTS (node); i++) - SLP_TREE_VEC_STMTS (node).quick_push (NULL); - - perm_dest = vect_create_destination_var (gimple_assign_lhs (stmt), vectype); - for (i = 0; i < ncopies; i++) - { - first_vec = dr_chain[first_vec_indx]; - second_vec = dr_chain[second_vec_indx]; - - /* Generate the permute statement if necessary. */ - if (mask) - { - perm_stmt = gimple_build_assign (perm_dest, VEC_PERM_EXPR, - first_vec, second_vec, mask); - data_ref = make_ssa_name (perm_dest, perm_stmt); - gimple_set_lhs (perm_stmt, data_ref); - vect_finish_stmt_generation (stmt, perm_stmt, gsi); - } - else - /* If mask was NULL_TREE generate the requested identity transform. */ - perm_stmt = SSA_NAME_DEF_STMT (first_vec); - - /* Store the vector statement in NODE. */ - SLP_TREE_VEC_STMTS (node)[stride_out * i + vect_stmts_counter] - = perm_stmt; - - first_vec_indx += stride_in; - second_vec_indx += stride_in; - } -} - - /* Generate vector permute statements from a list of loads in DR_CHAIN. If ANALYZE_ONLY is TRUE, only check that it is possible to create valid permute statements for the SLP node NODE of the SLP instance @@ -3364,7 +3304,7 @@ vect_transform_slp_perm_load (slp_tree node, vec dr_chain, int nunits, vec_index = 0; tree vectype = STMT_VINFO_VECTYPE (stmt_info); int group_size = SLP_INSTANCE_GROUP_SIZE (slp_node_instance); - int unroll_factor, mask_element, ncopies; + int mask_element; unsigned char *mask; machine_mode mode; @@ -3382,11 +3322,13 @@ vect_transform_slp_perm_load (slp_tree node, vec dr_chain, mask_type = get_vectype_for_scalar_type (mask_element_type); nunits = TYPE_VECTOR_SUBPARTS (vectype); mask = XALLOCAVEC (unsigned char, nunits); - unroll_factor = SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance); - /* Number of copies is determined by the final vectorization factor - relatively to SLP_NODE_INSTANCE unrolling factor. */ - ncopies = vf / SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance); + /* Initialize the vect stmts of NODE to properly insert the generated + stmts later. */ + if (! analyze_only) + for (unsigned i = SLP_TREE_VEC_STMTS (node).length (); + i < SLP_TREE_NUMBER_OF_VEC_STMTS (node); i++) + SLP_TREE_VEC_STMTS (node).quick_push (NULL); /* Generate permutation masks for every NODE. Number of masks for each NODE is equal to GROUP_SIZE. @@ -3412,7 +3354,7 @@ vect_transform_slp_perm_load (slp_tree node, vec dr_chain, int second_vec_index = -1; bool noop_p = true; - for (int j = 0; j < unroll_factor; j++) + for (int j = 0; j < vf; j++) { for (int k = 0; k < group_size; k++) { @@ -3483,10 +3425,30 @@ vect_transform_slp_perm_load (slp_tree node, vec dr_chain, if (second_vec_index == -1) second_vec_index = first_vec_index; - vect_create_mask_and_perm (stmt, mask_vec, first_vec_index, - second_vec_index, - gsi, node, vectype, dr_chain, - ncopies, vect_stmts_counter++); + + /* Generate the permute statement if necessary. */ + tree first_vec = dr_chain[first_vec_index]; + tree second_vec = dr_chain[second_vec_index]; + gimple *perm_stmt; + if (! noop_p) + { + tree perm_dest + = vect_create_destination_var (gimple_assign_lhs (stmt), + vectype); + perm_dest = make_ssa_name (perm_dest); + perm_stmt = gimple_build_assign (perm_dest, + VEC_PERM_EXPR, + first_vec, second_vec, + mask_vec); + vect_finish_stmt_generation (stmt, perm_stmt, gsi); + } + else + /* If mask was NULL_TREE generate the requested + identity transform. */ + perm_stmt = SSA_NAME_DEF_STMT (first_vec); + + /* Store the vector statement in NODE. */ + SLP_TREE_VEC_STMTS (node)[vect_stmts_counter++] = perm_stmt; } index = 0; diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c index 9ac7cf3a2ec0..dae3766dce62 100644 --- a/gcc/tree-vect-stmts.c +++ b/gcc/tree-vect-stmts.c @@ -6937,10 +6937,16 @@ vectorizable_load (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt, not only the number of vector stmts the permutation result fits in. */ if (slp_perm) - vec_num = (group_size * vf + nunits - 1) / nunits; + { + vec_num = (group_size * vf + nunits - 1) / nunits; + group_gap_adj = vf * group_size - nunits * vec_num; + } else - vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node); - group_gap_adj = vf * group_size - nunits * vec_num; + { + vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node); + group_gap_adj + = group_size - SLP_INSTANCE_GROUP_SIZE (slp_node_instance); + } } else vec_num = group_size; @@ -7101,6 +7107,7 @@ vectorizable_load (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt, aggr_type = vectype; prev_stmt_info = NULL; + int group_elt = 0; for (j = 0; j < ncopies; j++) { /* 1. Create the vector or array pointer update chain. */ @@ -7392,10 +7399,27 @@ vectorizable_load (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt, /* Store vector loads in the corresponding SLP_NODE. */ if (slp && !slp_perm) SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt); + + /* With SLP permutation we load the gaps as well, without + we need to skip the gaps after we manage to fully load + all elements. group_gap_adj is GROUP_SIZE here. */ + group_elt += nunits; + if (group_gap_adj != 0 && ! slp_perm + && group_elt == group_size - group_gap_adj) + { + bool ovf; + tree bump + = wide_int_to_tree (sizetype, + wi::smul (TYPE_SIZE_UNIT (elem_type), + group_gap_adj, &ovf)); + dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, + stmt, bump); + group_elt = 0; + } } /* Bump the vector pointer to account for a gap or for excess elements loaded for a permuted SLP load. */ - if (group_gap_adj != 0) + if (group_gap_adj != 0 && slp_perm) { bool ovf; tree bump