From 2d8ea3a0a6095a56b7c59c50b1068d602cde934a Mon Sep 17 00:00:00 2001 From: Richard Sandiford Date: Mon, 11 Nov 2019 19:43:52 +0000 Subject: [PATCH] Fix SLP downward group access classification [PR92420] This PR was caused by the SLP handling in get_group_load_store_type returning VMAT_CONTIGUOUS rather than VMAT_CONTIGUOUS_REVERSE for downward groups. A more elaborate fix would be to try to combine the reverse permutation into SLP_TREE_LOAD_PERMUTATION for loads, but that's really a follow-on optimisation and not backport material. It might also not necessarily be a win, if the target supports (say) reversing and odd/even swaps as independent permutes but doesn't recognise the combined form. 2020-02-18 Richard Sandiford gcc/ Backport from mainline 2019-11-11 Richard Sandiford PR tree-optimization/92420 * tree-vect-stmts.c (get_negative_load_store_type): Move further up file. (get_group_load_store_type): Use it for reversed SLP accesses. gcc/testsuite/ PR tree-optimization/92420 * gcc.dg/vect/pr92420.c: New test. --- gcc/ChangeLog | 10 +++ gcc/testsuite/ChangeLog | 5 ++ gcc/testsuite/gcc.dg/vect/pr92420.c | 48 ++++++++++++ gcc/tree-vect-stmts.c | 110 +++++++++++++++------------- 4 files changed, 122 insertions(+), 51 deletions(-) create mode 100644 gcc/testsuite/gcc.dg/vect/pr92420.c diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 967a9fb4539e..8de081487299 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,13 @@ +2020-02-18 Richard Sandiford + + Backport from mainline + 2019-11-11 Richard Sandiford + + PR tree-optimization/92420 + * tree-vect-stmts.c (get_negative_load_store_type): Move further + up file. + (get_group_load_store_type): Use it for reversed SLP accesses. + 2020-02-18 Richard Sandiford Backport from mainline diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 179c18da199f..3ec4cf5addb1 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,8 @@ +2020-02-18 Richard Sandiford + + PR tree-optimization/92420 + * gcc.dg/vect/pr92420.c: New test. + 2020-02-14 Hongtao Liu * gcc.target/i386/avx512vbmi2-vpshld-1.c: New test. diff --git a/gcc/testsuite/gcc.dg/vect/pr92420.c b/gcc/testsuite/gcc.dg/vect/pr92420.c new file mode 100644 index 000000000000..e43539fbbd72 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/pr92420.c @@ -0,0 +1,48 @@ +/* { dg-additional-options "-mavx2" { target avx_runtime } } */ + +#include "tree-vect.h" + +#define N 16 +struct C { int r, i; }; +struct C a[N], b[N], c[N], d[N], e[N]; + +__attribute__((noipa)) static void +foo (struct C *__restrict x, struct C *__restrict y, struct C *__restrict z, int w) +{ + int i; + for (int i = 0; i < w; i++) + { + z[i].r = x[i].r * y[-1 - i].r - x[i].i * y[-1 - i].i; + z[i].i = x[i].i * y[-1 - i].r + x[i].r * y[-1 - i].i; + } +} + +__attribute__((noipa)) static void +bar (struct C *__restrict x, struct C *__restrict y, struct C *__restrict z, int w) +{ + int i; + for (int i = 0; i < w; i++) + { + z[i].r = x[i].r * y[i].r - x[i].i * y[i].i; + z[i].i = x[i].i * y[i].r + x[i].r * y[i].i; + } +} + +int +main () +{ + check_vect (); + int i; + for (i = 0; i < N; ++i) + { + a[i].r = N - i; a[i].i = i - N; + b[i].r = i - N; b[i].i = i + N; + c[i].r = -1 - i; c[i].i = 2 * N - 1 - i; + } + foo (a, b + N, d, N); + bar (a, c, e, N); + for (i = 0; i < N; ++i) + if (d[i].r != e[i].r || d[i].i != e[i].i) + __builtin_abort (); + return 0; +} diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c index baabdb03b5d8..8fd7af198175 100644 --- a/gcc/tree-vect-stmts.c +++ b/gcc/tree-vect-stmts.c @@ -2166,6 +2166,56 @@ perm_mask_for_reverse (tree vectype) return vect_gen_perm_mask_checked (vectype, indices); } +/* A subroutine of get_load_store_type, with a subset of the same + arguments. Handle the case where STMT_INFO is a load or store that + accesses consecutive elements with a negative step. */ + +static vect_memory_access_type +get_negative_load_store_type (stmt_vec_info stmt_info, tree vectype, + vec_load_store_type vls_type, + unsigned int ncopies) +{ + dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info); + dr_alignment_support alignment_support_scheme; + + if (ncopies > 1) + { + if (dump_enabled_p ()) + dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, + "multiple types with negative step.\n"); + return VMAT_ELEMENTWISE; + } + + alignment_support_scheme = vect_supportable_dr_alignment (dr_info, false); + if (alignment_support_scheme != dr_aligned + && alignment_support_scheme != dr_unaligned_supported) + { + if (dump_enabled_p ()) + dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, + "negative step but alignment required.\n"); + return VMAT_ELEMENTWISE; + } + + if (vls_type == VLS_STORE_INVARIANT) + { + if (dump_enabled_p ()) + dump_printf_loc (MSG_NOTE, vect_location, + "negative step with invariant source;" + " no permute needed.\n"); + return VMAT_CONTIGUOUS_DOWN; + } + + if (!perm_mask_for_reverse (vectype)) + { + if (dump_enabled_p ()) + dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, + "negative step and reversing not supported.\n"); + return VMAT_ELEMENTWISE; + } + + return VMAT_CONTIGUOUS_REVERSE; +} + /* STMT_INFO is either a masked or unconditional store. Return the value being stored. */ @@ -2268,7 +2318,15 @@ get_group_load_store_type (stmt_vec_info stmt_info, tree vectype, bool slp, "Peeling for outer loop is not supported\n"); return false; } - *memory_access_type = VMAT_CONTIGUOUS; + int cmp = compare_step_with_zero (stmt_info); + if (cmp < 0) + *memory_access_type = get_negative_load_store_type + (stmt_info, vectype, vls_type, 1); + else + { + gcc_assert (!loop_vinfo || cmp > 0); + *memory_access_type = VMAT_CONTIGUOUS; + } } } else @@ -2370,56 +2428,6 @@ get_group_load_store_type (stmt_vec_info stmt_info, tree vectype, bool slp, return true; } -/* A subroutine of get_load_store_type, with a subset of the same - arguments. Handle the case where STMT_INFO is a load or store that - accesses consecutive elements with a negative step. */ - -static vect_memory_access_type -get_negative_load_store_type (stmt_vec_info stmt_info, tree vectype, - vec_load_store_type vls_type, - unsigned int ncopies) -{ - dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info); - dr_alignment_support alignment_support_scheme; - - if (ncopies > 1) - { - if (dump_enabled_p ()) - dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, - "multiple types with negative step.\n"); - return VMAT_ELEMENTWISE; - } - - alignment_support_scheme = vect_supportable_dr_alignment (dr_info, false); - if (alignment_support_scheme != dr_aligned - && alignment_support_scheme != dr_unaligned_supported) - { - if (dump_enabled_p ()) - dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, - "negative step but alignment required.\n"); - return VMAT_ELEMENTWISE; - } - - if (vls_type == VLS_STORE_INVARIANT) - { - if (dump_enabled_p ()) - dump_printf_loc (MSG_NOTE, vect_location, - "negative step with invariant source;" - " no permute needed.\n"); - return VMAT_CONTIGUOUS_DOWN; - } - - if (!perm_mask_for_reverse (vectype)) - { - if (dump_enabled_p ()) - dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, - "negative step and reversing not supported.\n"); - return VMAT_ELEMENTWISE; - } - - return VMAT_CONTIGUOUS_REVERSE; -} - /* Analyze load or store statement STMT_INFO of type VLS_TYPE. Return true if there is a memory access type that the vectorized form can use, storing it in *MEMORY_ACCESS_TYPE if so. If we decide to use gathers -- 2.47.2