#include "backend.h"
#include "tree-data-ref.h"
#include "tree-ssa-loop-niter.h"
+#include "tree-hash-traits.h"
/* This file should be included last. */
#include "riscv-vector-costs.h"
top of riscv_builtin_vectorization_cost handling which doesn't have any
information on statement operation codes etc. */
-static unsigned
-adjust_stmt_cost (enum vect_cost_for_stmt kind, tree vectype, int stmt_cost)
+unsigned
+costs::adjust_stmt_cost (enum vect_cost_for_stmt kind, loop_vec_info loop,
+ stmt_vec_info stmt_info,
+ slp_tree, tree vectype, int stmt_cost)
{
const cpu_vector_cost *costs = get_vector_costs ();
switch (kind)
{
case scalar_to_vec:
- return stmt_cost += (FLOAT_TYPE_P (vectype) ? costs->regmove->FR2VR
- : costs->regmove->GR2VR);
+ stmt_cost += (FLOAT_TYPE_P (vectype) ? costs->regmove->FR2VR
+ : costs->regmove->GR2VR);
+ break;
case vec_to_scalar:
- return stmt_cost += (FLOAT_TYPE_P (vectype) ? costs->regmove->VR2FR
- : costs->regmove->VR2GR);
+ stmt_cost += (FLOAT_TYPE_P (vectype) ? costs->regmove->VR2FR
+ : costs->regmove->VR2GR);
+ break;
+ case vector_load:
+ case vector_store:
+ {
+ /* Unit-stride vector loads and stores do not have offset addressing
+ as opposed to scalar loads and stores.
+ If the address depends on a variable we need an additional
+ add/sub for each load/store in the worst case. */
+ if (stmt_info && stmt_info->stmt)
+ {
+ data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
+ class loop *father = stmt_info->stmt->bb->loop_father;
+ if (!loop && father && !father->inner && father->superloops)
+ {
+ tree ref;
+ if (TREE_CODE (dr->ref) != MEM_REF
+ || !(ref = TREE_OPERAND (dr->ref, 0))
+ || TREE_CODE (ref) != SSA_NAME)
+ break;
+
+ if (SSA_NAME_IS_DEFAULT_DEF (ref))
+ break;
+
+ if (memrefs.contains ({ref, cst0}))
+ break;
+
+ memrefs.add ({ref, cst0});
+
+ /* In case we have not seen REF before and the base address
+ is a pointer operation try a bit harder. */
+ tree base = DR_BASE_ADDRESS (dr);
+ if (TREE_CODE (base) == POINTER_PLUS_EXPR
+ || TREE_CODE (base) == POINTER_DIFF_EXPR)
+ {
+ /* Deconstruct BASE's first operand. If it is a binary
+ operation, i.e. a base and an "offset" store this
+ pair. Only increase the stmt_cost if we haven't seen
+ it before. */
+ tree argp = TREE_OPERAND (base, 1);
+ typedef std::pair<tree, tree> addr_pair;
+ addr_pair pair;
+ if (TREE_CODE_CLASS (TREE_CODE (argp)) == tcc_binary)
+ {
+ tree argp0 = tree_strip_nop_conversions
+ (TREE_OPERAND (argp, 0));
+ tree argp1 = TREE_OPERAND (argp, 1);
+ pair = addr_pair (argp0, argp1);
+ if (memrefs.contains (pair))
+ break;
+
+ memrefs.add (pair);
+ stmt_cost += builtin_vectorization_cost (scalar_stmt,
+ NULL_TREE, 0);
+ }
+ }
+ }
+ }
+ break;
+ }
+
default:
break;
}
unsigned
costs::add_stmt_cost (int count, vect_cost_for_stmt kind,
- stmt_vec_info stmt_info, slp_tree, tree vectype,
+ stmt_vec_info stmt_info, slp_tree node, tree vectype,
int misalign, vect_cost_model_location where)
{
int stmt_cost
if (loop_vinfo)
analyze_loop_vinfo (loop_vinfo);
+ memrefs.empty ();
m_analyzed_vinfo = true;
}
as one iteration of the VLA loop. */
if (where == vect_body && m_unrolled_vls_niters)
m_unrolled_vls_stmts += count * m_unrolled_vls_niters;
-
- if (vectype)
- stmt_cost = adjust_stmt_cost (kind, vectype, stmt_cost);
}
+ if (vectype)
+ stmt_cost = adjust_stmt_cost (kind, loop_vinfo, stmt_info, node, vectype,
+ stmt_cost);
+
return record_stmt_cost (stmt_info, where, count * stmt_cost);
}
unsigned HOST_WIDE_INT m_unrolled_vls_niters = 0;
unsigned HOST_WIDE_INT m_unrolled_vls_stmts = 0;
+ tree cst0 = build_int_cst (integer_type_node, 0);
+
+ /* Store the memory references already processed. */
+ typedef pair_hash <tree_operand_hash, tree_operand_hash> tree_pair_hash;
+ hash_set <tree_pair_hash> memrefs;
+
void analyze_loop_vinfo (loop_vec_info);
void record_potential_vls_unrolling (loop_vec_info);
bool prefer_unrolled_loop () const;
void record_potential_unexpected_spills (loop_vec_info);
void adjust_vect_cost_per_loop (loop_vec_info);
+ unsigned adjust_stmt_cost (enum vect_cost_for_stmt kind,
+ loop_vec_info,
+ stmt_vec_info stmt_info, slp_tree,
+ tree vectype, int stmt_cost);
};
} // namespace riscv_vector
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -fdump-tree-slp1-details" } */
+
+#define f1 (1.0 / 3.0)
+#define f2 (1.0 / 18.0)
+#define f3 (1.0 / 36.0)
+
+#define SIZE_X 10
+#define SIZE_Y 10
+#define SIZE_Z 10
+
+typedef enum {C = 0,
+ N, S, E, W, T, B,
+ NE, NW, SE, SW,
+ NT, NB, ST, SB,
+ ET, EB, WT, WB,
+ FLAGS, N_CELL_ENTRIES} CELL_ENTRIES;
+
+#define CALC_INDEX(x,y,z,e) ((e)+N_CELL_ENTRIES*((x)+ \
+ (y)*SIZE_X+(z)*SIZE_X*SIZE_Y))
+#define GRID_ENTRY_SWEEP(g,dx,dy,dz,e) ((g)[CALC_INDEX(dx, dy, dz, e)+(i)])
+#define LOCAL(g,e) (GRID_ENTRY_SWEEP (g, 0, 0, 0, e))
+
+void foo (double *grid)
+{
+ for( int i = CALC_INDEX(0, 0, -2, 0); \
+ i < CALC_INDEX(0, 0, SIZE_Z + 2, 0); \
+ i += N_CELL_ENTRIES ) {
+ LOCAL (grid, C ) = f1;
+ LOCAL (grid, N ) = f2;
+ LOCAL (grid, S ) = f2;
+ LOCAL (grid, E ) = f2;
+ LOCAL (grid, W ) = f2;
+ LOCAL (grid, T ) = f2;
+ LOCAL (grid, B ) = f2;
+ LOCAL (grid, NE) = f3;
+ LOCAL (grid, NW) = f3;
+ LOCAL (grid, SE) = f3;
+ LOCAL (grid, SW) = f3;
+ LOCAL (grid, NT) = f3;
+ LOCAL (grid, NB) = f3;
+ LOCAL (grid, ST) = f3;
+ LOCAL (grid, SB) = f3;
+ LOCAL (grid, ET) = f3;
+ LOCAL (grid, EB) = f3;
+ LOCAL (grid, WT) = f3;
+ LOCAL (grid, WB) = f3;
+ }
+}
+
+/* { dg-final { scan-tree-dump-times "vectorized using SLP" 0 "slp1" } } */
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -fdump-tree-slp1-details" } */
+
+#define f1 3
+#define f2 4
+#define f3 5
+
+#define SIZE_X 10
+#define SIZE_Y 10
+#define SIZE_Z 10
+
+typedef enum {C = 0,
+ N, S, E, W, T, B,
+ NE, NW, SE, SW,
+ NT, NB, ST, SB,
+ ET, EB, WT, WB,
+ FLAGS, N_CELL_ENTRIES} CELL_ENTRIES;
+
+#define CALC_INDEX(x,y,z,e) ((e)+N_CELL_ENTRIES*((x)+ \
+ (y)*SIZE_X+(z)*SIZE_X*SIZE_Y))
+#define GRID_ENTRY_SWEEP(g,dx,dy,dz,e) ((g)[CALC_INDEX(dx, dy, dz, e)+(i)])
+#define LOCAL(g,e) (GRID_ENTRY_SWEEP (g, 0, 0, 0, e))
+
+void foo (unsigned long *grid)
+{
+ for( int i = CALC_INDEX(0, 0, -2, 0); \
+ i < CALC_INDEX(0, 0, SIZE_Z + 2, 0); \
+ i += N_CELL_ENTRIES ) {
+ LOCAL (grid, C ) = f1;
+ LOCAL (grid, N ) = f2;
+ LOCAL (grid, S ) = f2;
+ LOCAL (grid, E ) = f2;
+ LOCAL (grid, W ) = f2;
+ LOCAL (grid, T ) = f2;
+ LOCAL (grid, B ) = f2;
+ LOCAL (grid, NE) = f3;
+ LOCAL (grid, NW) = f3;
+ LOCAL (grid, SE) = f3;
+ LOCAL (grid, SW) = f3;
+ LOCAL (grid, NT) = f3;
+ LOCAL (grid, NB) = f3;
+ LOCAL (grid, ST) = f3;
+ LOCAL (grid, SB) = f3;
+ LOCAL (grid, ET) = f3;
+ LOCAL (grid, EB) = f3;
+ LOCAL (grid, WT) = f3;
+ LOCAL (grid, WB) = f3;
+ }
+}
+
+/* { dg-final { scan-tree-dump-times "vectorized using SLP" 0 "slp1" } } */