From 47d3fdb242c7374a6fe3e4b0981a70767bc585a2 Mon Sep 17 00:00:00 2001 From: Richard Biener Date: Wed, 24 Feb 2016 08:27:25 +0000 Subject: [PATCH] re PR tree-optimization/69907 (wrong code at -O3 on x86_64-linux-gnu) 2016-02-24 Richard Biener PR tree-optimization/69907 * tree-vect-stmts.c (vectorizable_load): Check for gaps at the end of permutations for BB vectorization. * gcc.dg/vect/bb-slp-pr69907.c: New testcase. * gcc.dg/vect/bb-slp-34.c: XFAIL. * gcc.dg/vect/bb-slp-pr68892.c: Likewise. From-SVN: r233655 --- gcc/ChangeLog | 6 ++++++ gcc/testsuite/ChangeLog | 7 +++++++ gcc/testsuite/gcc.dg/vect/bb-slp-34.c | 3 ++- gcc/testsuite/gcc.dg/vect/bb-slp-pr68892.c | 4 +++- gcc/testsuite/gcc.dg/vect/bb-slp-pr69907.c | 12 ++++++++++++ gcc/tree-vect-stmts.c | 13 +++++++++++++ 6 files changed, 43 insertions(+), 2 deletions(-) create mode 100644 gcc/testsuite/gcc.dg/vect/bb-slp-pr69907.c diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 405796963771..dc87f1b5d64f 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,9 @@ +2016-02-24 Richard Biener + + PR tree-optimization/69907 + * tree-vect-stmts.c (vectorizable_load): Check for gaps at the + end of permutations for BB vectorization. + 2016-02-24 Christian Bruel * config/arm/arm-c.c (arm_option_override): Initialize diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index beb802600078..d716932e42f2 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,10 @@ +2016-02-24 Richard Biener + + PR tree-optimization/69907 + * gcc.dg/vect/bb-slp-pr69907.c: New testcase. + * gcc.dg/vect/bb-slp-34.c: XFAIL. + * gcc.dg/vect/bb-slp-pr68892.c: Likewise. + 2016-02-24 Christian Bruel * gcc.target/arm/pragma_cpp_fma.c: New test. diff --git a/gcc/testsuite/gcc.dg/vect/bb-slp-34.c b/gcc/testsuite/gcc.dg/vect/bb-slp-34.c index c51c7706adcb..418f2b536c99 100644 --- a/gcc/testsuite/gcc.dg/vect/bb-slp-34.c +++ b/gcc/testsuite/gcc.dg/vect/bb-slp-34.c @@ -32,4 +32,5 @@ int main() return 0; } -/* { dg-final { scan-tree-dump "basic block vectorized" "slp2" { target vect_perm } } } */ +/* ??? XFAILed because we access "excess" elements with the permutation. */ +/* { dg-final { scan-tree-dump "basic block vectorized" "slp2" { target vect_perm xfail *-*-* } } } */ diff --git a/gcc/testsuite/gcc.dg/vect/bb-slp-pr68892.c b/gcc/testsuite/gcc.dg/vect/bb-slp-pr68892.c index ba51b76fdb92..216883fc0c46 100644 --- a/gcc/testsuite/gcc.dg/vect/bb-slp-pr68892.c +++ b/gcc/testsuite/gcc.dg/vect/bb-slp-pr68892.c @@ -13,5 +13,7 @@ void foo(void) b[3] = a[3][0]; } -/* { dg-final { scan-tree-dump "not profitable" "slp2" } } */ +/* ??? The profitability check is not reached because we give up on the + gaps we access earlier. */ +/* { dg-final { scan-tree-dump "not profitable" "slp2" { xfail *-*-* } } } */ /* { dg-final { scan-tree-dump-times "Basic block will be vectorized" 0 "slp2" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/bb-slp-pr69907.c b/gcc/testsuite/gcc.dg/vect/bb-slp-pr69907.c new file mode 100644 index 000000000000..9f1e71a3db13 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/bb-slp-pr69907.c @@ -0,0 +1,12 @@ +/* { dg-do compile } */ +/* { dg-additional-options "-O3" } */ +/* { dg-require-effective-target vect_unpack } */ + +void foo(unsigned *p1, unsigned short *p2) +{ + int n; + for (n = 0; n < 320; n++) + p1[n] = p2[n * 2]; +} + +/* { dg-final { scan-tree-dump "BB vectorization with gaps at the end of a load is not supported" "slp1" } } */ diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c index 1aade9e650c4..9678d7c1615e 100644 --- a/gcc/tree-vect-stmts.c +++ b/gcc/tree-vect-stmts.c @@ -6395,6 +6395,19 @@ vectorizable_load (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt, slp_perm = true; group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt)); + + /* ??? The following is overly pessimistic (as well as the loop + case above) in the case we can statically determine the excess + elements loaded are within the bounds of a decl that is accessed. + Likewise for BB vectorizations using masked loads is a possibility. */ + if (bb_vinfo && slp_perm && group_size % nunits != 0) + { + dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, + "BB vectorization with gaps at the end of a load " + "is not supported\n"); + return false; + } + if (!slp && !PURE_SLP_STMT (stmt_info) && !STMT_VINFO_STRIDED_P (stmt_info)) -- 2.47.2