vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies, mask,
&vec_masks, mask_vectype);
}
+
tree vec_mask = NULL_TREE;
+ if (memory_access_type == VMAT_LOAD_STORE_LANES)
+ {
+ gcc_assert (alignment_support_scheme == dr_aligned
+ || alignment_support_scheme == dr_unaligned_supported);
+ gcc_assert (grouped_load && !slp);
+
+ unsigned int inside_cost = 0, prologue_cost = 0;
+ for (j = 0; j < ncopies; j++)
+ {
+ if (costing_p)
+ {
+ /* An IFN_LOAD_LANES will load all its vector results,
+ regardless of which ones we actually need. Account
+ for the cost of unused results. */
+ if (first_stmt_info == stmt_info)
+ {
+ unsigned int gaps = DR_GROUP_SIZE (first_stmt_info);
+ stmt_vec_info next_stmt_info = first_stmt_info;
+ do
+ {
+ gaps -= 1;
+ next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
+ }
+ while (next_stmt_info);
+ if (gaps)
+ {
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_NOTE, vect_location,
+ "vect_model_load_cost: %d "
+ "unused vectors.\n",
+ gaps);
+ vect_get_load_cost (vinfo, stmt_info, gaps,
+ alignment_support_scheme,
+ misalignment, false, &inside_cost,
+ &prologue_cost, cost_vec, cost_vec,
+ true);
+ }
+ }
+ vect_get_load_cost (vinfo, stmt_info, 1, alignment_support_scheme,
+ misalignment, false, &inside_cost,
+ &prologue_cost, cost_vec, cost_vec, true);
+ continue;
+ }
+
+ /* 1. Create the vector or array pointer update chain. */
+ if (j == 0)
+ dataref_ptr
+ = vect_create_data_ref_ptr (vinfo, first_stmt_info, aggr_type,
+ at_loop, offset, &dummy, gsi,
+ &ptr_incr, false, bump);
+ else
+ {
+ gcc_assert (!LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo));
+ dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
+ stmt_info, bump);
+ }
+ if (mask)
+ vec_mask = vec_masks[j];
+
+ tree vec_array = create_vector_array (vectype, vec_num);
+
+ tree final_mask = NULL_TREE;
+ if (loop_masks)
+ final_mask = vect_get_loop_mask (loop_vinfo, gsi, loop_masks,
+ ncopies, vectype, j);
+ if (vec_mask)
+ final_mask = prepare_vec_mask (loop_vinfo, mask_vectype, final_mask,
+ vec_mask, gsi);
+
+ gcall *call;
+ if (final_mask)
+ {
+ /* Emit:
+ VEC_ARRAY = MASK_LOAD_LANES (DATAREF_PTR, ALIAS_PTR,
+ VEC_MASK). */
+ unsigned int align = TYPE_ALIGN (TREE_TYPE (vectype));
+ tree alias_ptr = build_int_cst (ref_type, align);
+ call = gimple_build_call_internal (IFN_MASK_LOAD_LANES, 3,
+ dataref_ptr, alias_ptr,
+ final_mask);
+ }
+ else
+ {
+ /* Emit:
+ VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]). */
+ data_ref = create_array_ref (aggr_type, dataref_ptr, ref_type);
+ call = gimple_build_call_internal (IFN_LOAD_LANES, 1, data_ref);
+ }
+ gimple_call_set_lhs (call, vec_array);
+ gimple_call_set_nothrow (call, true);
+ vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
+
+ dr_chain.create (vec_num);
+ /* Extract each vector into an SSA_NAME. */
+ for (i = 0; i < vec_num; i++)
+ {
+ new_temp = read_vector_array (vinfo, stmt_info, gsi, scalar_dest,
+ vec_array, i);
+ dr_chain.quick_push (new_temp);
+ }
+
+ /* Record the mapping between SSA_NAMEs and statements. */
+ vect_record_grouped_load_vectors (vinfo, stmt_info, dr_chain);
+
+ /* Record that VEC_ARRAY is now dead. */
+ vect_clobber_variable (vinfo, stmt_info, gsi, vec_array);
+
+ dr_chain.release ();
+
+ *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
+ }
+
+ if (costing_p && dump_enabled_p ())
+ dump_printf_loc (MSG_NOTE, vect_location,
+ "vect_model_load_cost: inside_cost = %u, "
+ "prologue_cost = %u .\n",
+ inside_cost, prologue_cost);
+
+ return true;
+ }
+
poly_uint64 group_elt = 0;
unsigned int inside_cost = 0, prologue_cost = 0;
for (j = 0; j < ncopies; j++)
dr_chain.create (vec_num);
gimple *new_stmt = NULL;
- if (memory_access_type == VMAT_LOAD_STORE_LANES)
+ for (i = 0; i < vec_num; i++)
{
- if (costing_p)
- {
- /* An IFN_LOAD_LANES will load all its vector results,
- regardless of which ones we actually need. Account
- for the cost of unused results. */
- if (grouped_load && first_stmt_info == stmt_info)
- {
- unsigned int gaps = DR_GROUP_SIZE (first_stmt_info);
- stmt_vec_info next_stmt_info = first_stmt_info;
- do
- {
- gaps -= 1;
- next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
- }
- while (next_stmt_info);
- if (gaps)
- {
- if (dump_enabled_p ())
- dump_printf_loc (MSG_NOTE, vect_location,
- "vect_model_load_cost: %d "
- "unused vectors.\n",
- gaps);
- vect_get_load_cost (vinfo, stmt_info, gaps,
- alignment_support_scheme,
- misalignment, false, &inside_cost,
- &prologue_cost, cost_vec, cost_vec,
- true);
- }
- }
- vect_get_load_cost (vinfo, stmt_info, 1, alignment_support_scheme,
- misalignment, false, &inside_cost,
- &prologue_cost, cost_vec, cost_vec, true);
- continue;
- }
- tree vec_array;
-
- vec_array = create_vector_array (vectype, vec_num);
-
tree final_mask = NULL_TREE;
- if (loop_masks)
- final_mask = vect_get_loop_mask (loop_vinfo, gsi, loop_masks,
- ncopies, vectype, j);
- if (vec_mask)
- final_mask = prepare_vec_mask (loop_vinfo, mask_vectype,
- final_mask, vec_mask, gsi);
-
- gcall *call;
- if (final_mask)
- {
- /* Emit:
- VEC_ARRAY = MASK_LOAD_LANES (DATAREF_PTR, ALIAS_PTR,
- VEC_MASK). */
- unsigned int align = TYPE_ALIGN (TREE_TYPE (vectype));
- tree alias_ptr = build_int_cst (ref_type, align);
- call = gimple_build_call_internal (IFN_MASK_LOAD_LANES, 3,
- dataref_ptr, alias_ptr,
- final_mask);
- }
- else
+ tree final_len = NULL_TREE;
+ tree bias = NULL_TREE;
+ if (!costing_p)
{
- /* Emit:
- VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]). */
- data_ref = create_array_ref (aggr_type, dataref_ptr, ref_type);
- call = gimple_build_call_internal (IFN_LOAD_LANES, 1, data_ref);
- }
- gimple_call_set_lhs (call, vec_array);
- gimple_call_set_nothrow (call, true);
- vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
- new_stmt = call;
+ if (loop_masks)
+ final_mask = vect_get_loop_mask (loop_vinfo, gsi, loop_masks,
+ vec_num * ncopies, vectype,
+ vec_num * j + i);
+ if (vec_mask)
+ final_mask = prepare_vec_mask (loop_vinfo, mask_vectype,
+ final_mask, vec_mask, gsi);
- /* Extract each vector into an SSA_NAME. */
- for (i = 0; i < vec_num; i++)
- {
- new_temp = read_vector_array (vinfo, stmt_info, gsi, scalar_dest,
- vec_array, i);
- dr_chain.quick_push (new_temp);
+ if (i > 0 && !STMT_VINFO_GATHER_SCATTER_P (stmt_info))
+ dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr,
+ gsi, stmt_info, bump);
}
- /* Record the mapping between SSA_NAMEs and statements. */
- vect_record_grouped_load_vectors (vinfo, stmt_info, dr_chain);
-
- /* Record that VEC_ARRAY is now dead. */
- vect_clobber_variable (vinfo, stmt_info, gsi, vec_array);
- }
- else
- {
- for (i = 0; i < vec_num; i++)
+ /* 2. Create the vector-load in the loop. */
+ switch (alignment_support_scheme)
{
- tree final_mask = NULL_TREE;
- tree final_len = NULL_TREE;
- tree bias = NULL_TREE;
- if (!costing_p)
- {
- if (loop_masks)
- final_mask
- = vect_get_loop_mask (loop_vinfo, gsi, loop_masks,
- vec_num * ncopies, vectype,
- vec_num * j + i);
- if (vec_mask)
- final_mask = prepare_vec_mask (loop_vinfo, mask_vectype,
- final_mask, vec_mask, gsi);
-
- if (i > 0 && !STMT_VINFO_GATHER_SCATTER_P (stmt_info))
- dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr,
- gsi, stmt_info, bump);
- }
+ case dr_aligned:
+ case dr_unaligned_supported:
+ {
+ unsigned int misalign;
+ unsigned HOST_WIDE_INT align;
- /* 2. Create the vector-load in the loop. */
- switch (alignment_support_scheme)
- {
- case dr_aligned:
- case dr_unaligned_supported:
+ if (memory_access_type == VMAT_GATHER_SCATTER
+ && gs_info.ifn != IFN_LAST)
{
- unsigned int misalign;
- unsigned HOST_WIDE_INT align;
-
- if (memory_access_type == VMAT_GATHER_SCATTER
- && gs_info.ifn != IFN_LAST)
+ if (costing_p)
{
- if (costing_p)
- {
- unsigned int cnunits
- = vect_nunits_for_cost (vectype);
- inside_cost
- = record_stmt_cost (cost_vec, cnunits,
- scalar_load, stmt_info, 0,
- vect_body);
- break;
- }
- if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
- vec_offset = vec_offsets[vec_num * j + i];
- tree zero = build_zero_cst (vectype);
- tree scale = size_int (gs_info.scale);
-
- if (gs_info.ifn == IFN_MASK_LEN_GATHER_LOAD)
- {
- if (loop_lens)
- final_len
- = vect_get_loop_len (loop_vinfo, gsi, loop_lens,
- vec_num * ncopies, vectype,
- vec_num * j + i, 1);
- else
- final_len = build_int_cst (sizetype,
- TYPE_VECTOR_SUBPARTS (
- vectype));
- signed char biasval
- = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo);
- bias = build_int_cst (intQI_type_node, biasval);
- if (!final_mask)
- {
- mask_vectype = truth_type_for (vectype);
- final_mask = build_minus_one_cst (mask_vectype);
- }
- }
-
- gcall *call;
- if (final_len && final_mask)
- call = gimple_build_call_internal (
- IFN_MASK_LEN_GATHER_LOAD, 7, dataref_ptr,
- vec_offset, scale, zero, final_mask, final_len,
- bias);
- else if (final_mask)
- call = gimple_build_call_internal
- (IFN_MASK_GATHER_LOAD, 5, dataref_ptr,
- vec_offset, scale, zero, final_mask);
- else
- call = gimple_build_call_internal
- (IFN_GATHER_LOAD, 4, dataref_ptr,
- vec_offset, scale, zero);
- gimple_call_set_nothrow (call, true);
- new_stmt = call;
- data_ref = NULL_TREE;
+ unsigned int cnunits = vect_nunits_for_cost (vectype);
+ inside_cost
+ = record_stmt_cost (cost_vec, cnunits, scalar_load,
+ stmt_info, 0, vect_body);
break;
}
- else if (memory_access_type == VMAT_GATHER_SCATTER)
+ if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
+ vec_offset = vec_offsets[vec_num * j + i];
+ tree zero = build_zero_cst (vectype);
+ tree scale = size_int (gs_info.scale);
+
+ if (gs_info.ifn == IFN_MASK_LEN_GATHER_LOAD)
{
- /* Emulated gather-scatter. */
- gcc_assert (!final_mask);
- unsigned HOST_WIDE_INT const_nunits
- = nunits.to_constant ();
- if (costing_p)
- {
- /* For emulated gathers N offset vector element
- offset add is consumed by the load). */
- inside_cost
- = record_stmt_cost (cost_vec, const_nunits,
- vec_to_scalar, stmt_info, 0,
- vect_body);
- /* N scalar loads plus gathering them into a
- vector. */
- inside_cost
- = record_stmt_cost (cost_vec, const_nunits,
- scalar_load, stmt_info, 0,
- vect_body);
- inside_cost
- = record_stmt_cost (cost_vec, 1, vec_construct,
- stmt_info, 0, vect_body);
- break;
- }
- unsigned HOST_WIDE_INT const_offset_nunits
- = TYPE_VECTOR_SUBPARTS (gs_info.offset_vectype)
- .to_constant ();
- vec<constructor_elt, va_gc> *ctor_elts;
- vec_alloc (ctor_elts, const_nunits);
- gimple_seq stmts = NULL;
- /* We support offset vectors with more elements
- than the data vector for now. */
- unsigned HOST_WIDE_INT factor
- = const_offset_nunits / const_nunits;
- vec_offset = vec_offsets[j / factor];
- unsigned elt_offset = (j % factor) * const_nunits;
- tree idx_type = TREE_TYPE (TREE_TYPE (vec_offset));
- tree scale = size_int (gs_info.scale);
- align
- = get_object_alignment (DR_REF (first_dr_info->dr));
- tree ltype = build_aligned_type (TREE_TYPE (vectype),
- align);
- for (unsigned k = 0; k < const_nunits; ++k)
+ if (loop_lens)
+ final_len
+ = vect_get_loop_len (loop_vinfo, gsi, loop_lens,
+ vec_num * ncopies, vectype,
+ vec_num * j + i, 1);
+ else
+ final_len
+ = build_int_cst (sizetype,
+ TYPE_VECTOR_SUBPARTS (vectype));
+ signed char biasval
+ = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo);
+ bias = build_int_cst (intQI_type_node, biasval);
+ if (!final_mask)
{
- tree boff = size_binop (MULT_EXPR,
- TYPE_SIZE (idx_type),
- bitsize_int
- (k + elt_offset));
- tree idx = gimple_build (&stmts, BIT_FIELD_REF,
- idx_type, vec_offset,
- TYPE_SIZE (idx_type),
- boff);
- idx = gimple_convert (&stmts, sizetype, idx);
- idx = gimple_build (&stmts, MULT_EXPR,
- sizetype, idx, scale);
- tree ptr = gimple_build (&stmts, PLUS_EXPR,
- TREE_TYPE (dataref_ptr),
- dataref_ptr, idx);
- ptr = gimple_convert (&stmts, ptr_type_node, ptr);
- tree elt = make_ssa_name (TREE_TYPE (vectype));
- tree ref = build2 (MEM_REF, ltype, ptr,
- build_int_cst (ref_type, 0));
- new_stmt = gimple_build_assign (elt, ref);
- gimple_set_vuse (new_stmt,
- gimple_vuse (gsi_stmt (*gsi)));
- gimple_seq_add_stmt (&stmts, new_stmt);
- CONSTRUCTOR_APPEND_ELT (ctor_elts, NULL_TREE, elt);
+ mask_vectype = truth_type_for (vectype);
+ final_mask = build_minus_one_cst (mask_vectype);
}
- gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
- new_stmt = gimple_build_assign (NULL_TREE,
- build_constructor
- (vectype, ctor_elts));
- data_ref = NULL_TREE;
- break;
}
- if (costing_p)
- break;
-
- align =
- known_alignment (DR_TARGET_ALIGNMENT (first_dr_info));
- if (alignment_support_scheme == dr_aligned)
- misalign = 0;
- else if (misalignment == DR_MISALIGNMENT_UNKNOWN)
- {
- align = dr_alignment
- (vect_dr_behavior (vinfo, first_dr_info));
- misalign = 0;
- }
+ gcall *call;
+ if (final_len && final_mask)
+ call = gimple_build_call_internal (
+ IFN_MASK_LEN_GATHER_LOAD, 7, dataref_ptr, vec_offset,
+ scale, zero, final_mask, final_len, bias);
+ else if (final_mask)
+ call
+ = gimple_build_call_internal (IFN_MASK_GATHER_LOAD, 5,
+ dataref_ptr, vec_offset,
+ scale, zero, final_mask);
else
- misalign = misalignment;
- if (dataref_offset == NULL_TREE
- && TREE_CODE (dataref_ptr) == SSA_NAME)
- set_ptr_info_alignment (get_ptr_info (dataref_ptr),
- align, misalign);
- align = least_bit_hwi (misalign | align);
-
- /* Compute IFN when LOOP_LENS or final_mask valid. */
- machine_mode vmode = TYPE_MODE (vectype);
- machine_mode new_vmode = vmode;
- internal_fn partial_ifn = IFN_LAST;
- if (loop_lens)
+ call
+ = gimple_build_call_internal (IFN_GATHER_LOAD, 4,
+ dataref_ptr, vec_offset,
+ scale, zero);
+ gimple_call_set_nothrow (call, true);
+ new_stmt = call;
+ data_ref = NULL_TREE;
+ break;
+ }
+ else if (memory_access_type == VMAT_GATHER_SCATTER)
+ {
+ /* Emulated gather-scatter. */
+ gcc_assert (!final_mask);
+ unsigned HOST_WIDE_INT const_nunits = nunits.to_constant ();
+ if (costing_p)
{
- opt_machine_mode new_ovmode
- = get_len_load_store_mode (vmode, true,
- &partial_ifn);
- new_vmode = new_ovmode.require ();
- unsigned factor = (new_ovmode == vmode)
- ? 1
- : GET_MODE_UNIT_SIZE (vmode);
- final_len
- = vect_get_loop_len (loop_vinfo, gsi, loop_lens,
- vec_num * ncopies, vectype,
- vec_num * j + i, factor);
+ /* For emulated gathers N offset vector element
+ offset add is consumed by the load). */
+ inside_cost
+ = record_stmt_cost (cost_vec, const_nunits,
+ vec_to_scalar, stmt_info, 0,
+ vect_body);
+ /* N scalar loads plus gathering them into a
+ vector. */
+ inside_cost = record_stmt_cost (cost_vec, const_nunits,
+ scalar_load, stmt_info,
+ 0, vect_body);
+ inside_cost
+ = record_stmt_cost (cost_vec, 1, vec_construct,
+ stmt_info, 0, vect_body);
+ break;
}
- else if (final_mask)
+ unsigned HOST_WIDE_INT const_offset_nunits
+ = TYPE_VECTOR_SUBPARTS (gs_info.offset_vectype)
+ .to_constant ();
+ vec<constructor_elt, va_gc> *ctor_elts;
+ vec_alloc (ctor_elts, const_nunits);
+ gimple_seq stmts = NULL;
+ /* We support offset vectors with more elements
+ than the data vector for now. */
+ unsigned HOST_WIDE_INT factor
+ = const_offset_nunits / const_nunits;
+ vec_offset = vec_offsets[j / factor];
+ unsigned elt_offset = (j % factor) * const_nunits;
+ tree idx_type = TREE_TYPE (TREE_TYPE (vec_offset));
+ tree scale = size_int (gs_info.scale);
+ align = get_object_alignment (DR_REF (first_dr_info->dr));
+ tree ltype
+ = build_aligned_type (TREE_TYPE (vectype), align);
+ for (unsigned k = 0; k < const_nunits; ++k)
{
- if (!can_vec_mask_load_store_p (
- vmode, TYPE_MODE (TREE_TYPE (final_mask)), true,
- &partial_ifn))
- gcc_unreachable ();
+ tree boff = size_binop (MULT_EXPR, TYPE_SIZE (idx_type),
+ bitsize_int (k + elt_offset));
+ tree idx = gimple_build (&stmts, BIT_FIELD_REF,
+ idx_type, vec_offset,
+ TYPE_SIZE (idx_type), boff);
+ idx = gimple_convert (&stmts, sizetype, idx);
+ idx = gimple_build (&stmts, MULT_EXPR, sizetype, idx,
+ scale);
+ tree ptr = gimple_build (&stmts, PLUS_EXPR,
+ TREE_TYPE (dataref_ptr),
+ dataref_ptr, idx);
+ ptr = gimple_convert (&stmts, ptr_type_node, ptr);
+ tree elt = make_ssa_name (TREE_TYPE (vectype));
+ tree ref = build2 (MEM_REF, ltype, ptr,
+ build_int_cst (ref_type, 0));
+ new_stmt = gimple_build_assign (elt, ref);
+ gimple_set_vuse (new_stmt,
+ gimple_vuse (gsi_stmt (*gsi)));
+ gimple_seq_add_stmt (&stmts, new_stmt);
+ CONSTRUCTOR_APPEND_ELT (ctor_elts, NULL_TREE, elt);
}
+ gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
+ new_stmt = gimple_build_assign (
+ NULL_TREE, build_constructor (vectype, ctor_elts));
+ data_ref = NULL_TREE;
+ break;
+ }
- if (partial_ifn == IFN_MASK_LEN_LOAD)
+ if (costing_p)
+ break;
+
+ align = known_alignment (DR_TARGET_ALIGNMENT (first_dr_info));
+ if (alignment_support_scheme == dr_aligned)
+ misalign = 0;
+ else if (misalignment == DR_MISALIGNMENT_UNKNOWN)
+ {
+ align
+ = dr_alignment (vect_dr_behavior (vinfo, first_dr_info));
+ misalign = 0;
+ }
+ else
+ misalign = misalignment;
+ if (dataref_offset == NULL_TREE
+ && TREE_CODE (dataref_ptr) == SSA_NAME)
+ set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
+ misalign);
+ align = least_bit_hwi (misalign | align);
+
+ /* Compute IFN when LOOP_LENS or final_mask valid. */
+ machine_mode vmode = TYPE_MODE (vectype);
+ machine_mode new_vmode = vmode;
+ internal_fn partial_ifn = IFN_LAST;
+ if (loop_lens)
+ {
+ opt_machine_mode new_ovmode
+ = get_len_load_store_mode (vmode, true, &partial_ifn);
+ new_vmode = new_ovmode.require ();
+ unsigned factor
+ = (new_ovmode == vmode) ? 1 : GET_MODE_UNIT_SIZE (vmode);
+ final_len = vect_get_loop_len (loop_vinfo, gsi, loop_lens,
+ vec_num * ncopies, vectype,
+ vec_num * j + i, factor);
+ }
+ else if (final_mask)
+ {
+ if (!can_vec_mask_load_store_p (
+ vmode, TYPE_MODE (TREE_TYPE (final_mask)), true,
+ &partial_ifn))
+ gcc_unreachable ();
+ }
+
+ if (partial_ifn == IFN_MASK_LEN_LOAD)
+ {
+ if (!final_len)
{
- if (!final_len)
- {
- /* Pass VF value to 'len' argument of
- MASK_LEN_LOAD if LOOP_LENS is invalid. */
- final_len
- = size_int (TYPE_VECTOR_SUBPARTS (vectype));
- }
- if (!final_mask)
- {
- /* Pass all ones value to 'mask' argument of
- MASK_LEN_LOAD if final_mask is invalid. */
- mask_vectype = truth_type_for (vectype);
- final_mask = build_minus_one_cst (mask_vectype);
- }
+ /* Pass VF value to 'len' argument of
+ MASK_LEN_LOAD if LOOP_LENS is invalid. */
+ final_len = size_int (TYPE_VECTOR_SUBPARTS (vectype));
}
- if (final_len)
+ if (!final_mask)
{
- signed char biasval
- = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo);
-
- bias = build_int_cst (intQI_type_node, biasval);
+ /* Pass all ones value to 'mask' argument of
+ MASK_LEN_LOAD if final_mask is invalid. */
+ mask_vectype = truth_type_for (vectype);
+ final_mask = build_minus_one_cst (mask_vectype);
}
+ }
+ if (final_len)
+ {
+ signed char biasval
+ = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo);
- if (final_len)
+ bias = build_int_cst (intQI_type_node, biasval);
+ }
+
+ if (final_len)
+ {
+ tree ptr = build_int_cst (ref_type, align * BITS_PER_UNIT);
+ gcall *call;
+ if (partial_ifn == IFN_MASK_LEN_LOAD)
+ call = gimple_build_call_internal (IFN_MASK_LEN_LOAD, 5,
+ dataref_ptr, ptr,
+ final_mask, final_len,
+ bias);
+ else
+ call = gimple_build_call_internal (IFN_LEN_LOAD, 4,
+ dataref_ptr, ptr,
+ final_len, bias);
+ gimple_call_set_nothrow (call, true);
+ new_stmt = call;
+ data_ref = NULL_TREE;
+
+ /* Need conversion if it's wrapped with VnQI. */
+ if (vmode != new_vmode)
{
- tree ptr
- = build_int_cst (ref_type, align * BITS_PER_UNIT);
- gcall *call;
- if (partial_ifn == IFN_MASK_LEN_LOAD)
- call = gimple_build_call_internal (IFN_MASK_LEN_LOAD,
- 5, dataref_ptr,
- ptr, final_mask,
- final_len, bias);
- else
- call = gimple_build_call_internal (IFN_LEN_LOAD, 4,
- dataref_ptr, ptr,
- final_len, bias);
- gimple_call_set_nothrow (call, true);
- new_stmt = call;
- data_ref = NULL_TREE;
-
- /* Need conversion if it's wrapped with VnQI. */
- if (vmode != new_vmode)
- {
- tree new_vtype = build_vector_type_for_mode (
- unsigned_intQI_type_node, new_vmode);
- tree var = vect_get_new_ssa_name (new_vtype,
- vect_simple_var);
- gimple_set_lhs (call, var);
- vect_finish_stmt_generation (vinfo, stmt_info, call,
- gsi);
- tree op = build1 (VIEW_CONVERT_EXPR, vectype, var);
- new_stmt
- = gimple_build_assign (vec_dest,
- VIEW_CONVERT_EXPR, op);
- }
+ tree new_vtype = build_vector_type_for_mode (
+ unsigned_intQI_type_node, new_vmode);
+ tree var
+ = vect_get_new_ssa_name (new_vtype, vect_simple_var);
+ gimple_set_lhs (call, var);
+ vect_finish_stmt_generation (vinfo, stmt_info, call,
+ gsi);
+ tree op = build1 (VIEW_CONVERT_EXPR, vectype, var);
+ new_stmt = gimple_build_assign (vec_dest,
+ VIEW_CONVERT_EXPR, op);
}
- else if (final_mask)
+ }
+ else if (final_mask)
+ {
+ tree ptr = build_int_cst (ref_type, align * BITS_PER_UNIT);
+ gcall *call = gimple_build_call_internal (IFN_MASK_LOAD, 3,
+ dataref_ptr, ptr,
+ final_mask);
+ gimple_call_set_nothrow (call, true);
+ new_stmt = call;
+ data_ref = NULL_TREE;
+ }
+ else
+ {
+ tree ltype = vectype;
+ tree new_vtype = NULL_TREE;
+ unsigned HOST_WIDE_INT gap = DR_GROUP_GAP (first_stmt_info);
+ unsigned int vect_align
+ = vect_known_alignment_in_bytes (first_dr_info, vectype);
+ unsigned int scalar_dr_size
+ = vect_get_scalar_dr_size (first_dr_info);
+ /* If there's no peeling for gaps but we have a gap
+ with slp loads then load the lower half of the
+ vector only. See get_group_load_store_type for
+ when we apply this optimization. */
+ if (slp
+ && loop_vinfo
+ && !LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) && gap != 0
+ && known_eq (nunits, (group_size - gap) * 2)
+ && known_eq (nunits, group_size)
+ && gap >= (vect_align / scalar_dr_size))
+ {
+ tree half_vtype;
+ new_vtype
+ = vector_vector_composition_type (vectype, 2,
+ &half_vtype);
+ if (new_vtype != NULL_TREE)
+ ltype = half_vtype;
+ }
+ tree offset
+ = (dataref_offset ? dataref_offset
+ : build_int_cst (ref_type, 0));
+ if (ltype != vectype
+ && memory_access_type == VMAT_CONTIGUOUS_REVERSE)
{
- tree ptr = build_int_cst (ref_type,
- align * BITS_PER_UNIT);
- gcall *call
- = gimple_build_call_internal (IFN_MASK_LOAD, 3,
- dataref_ptr, ptr,
- final_mask);
- gimple_call_set_nothrow (call, true);
- new_stmt = call;
- data_ref = NULL_TREE;
+ unsigned HOST_WIDE_INT gap_offset
+ = gap * tree_to_uhwi (TYPE_SIZE_UNIT (elem_type));
+ tree gapcst = build_int_cst (ref_type, gap_offset);
+ offset = size_binop (PLUS_EXPR, offset, gapcst);
}
+ data_ref
+ = fold_build2 (MEM_REF, ltype, dataref_ptr, offset);
+ if (alignment_support_scheme == dr_aligned)
+ ;
else
+ TREE_TYPE (data_ref)
+ = build_aligned_type (TREE_TYPE (data_ref),
+ align * BITS_PER_UNIT);
+ if (ltype != vectype)
{
- tree ltype = vectype;
- tree new_vtype = NULL_TREE;
- unsigned HOST_WIDE_INT gap
- = DR_GROUP_GAP (first_stmt_info);
- unsigned int vect_align
- = vect_known_alignment_in_bytes (first_dr_info,
- vectype);
- unsigned int scalar_dr_size
- = vect_get_scalar_dr_size (first_dr_info);
- /* If there's no peeling for gaps but we have a gap
- with slp loads then load the lower half of the
- vector only. See get_group_load_store_type for
- when we apply this optimization. */
- if (slp
- && loop_vinfo
- && !LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)
- && gap != 0
- && known_eq (nunits, (group_size - gap) * 2)
- && known_eq (nunits, group_size)
- && gap >= (vect_align / scalar_dr_size))
+ vect_copy_ref_info (data_ref,
+ DR_REF (first_dr_info->dr));
+ tree tem = make_ssa_name (ltype);
+ new_stmt = gimple_build_assign (tem, data_ref);
+ vect_finish_stmt_generation (vinfo, stmt_info, new_stmt,
+ gsi);
+ data_ref = NULL;
+ vec<constructor_elt, va_gc> *v;
+ vec_alloc (v, 2);
+ if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
{
- tree half_vtype;
- new_vtype
- = vector_vector_composition_type (vectype, 2,
- &half_vtype);
- if (new_vtype != NULL_TREE)
- ltype = half_vtype;
+ CONSTRUCTOR_APPEND_ELT (v, NULL_TREE,
+ build_zero_cst (ltype));
+ CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, tem);
}
- tree offset
- = (dataref_offset ? dataref_offset
- : build_int_cst (ref_type, 0));
- if (ltype != vectype
- && memory_access_type == VMAT_CONTIGUOUS_REVERSE)
+ else
{
- unsigned HOST_WIDE_INT gap_offset
- = gap * tree_to_uhwi (TYPE_SIZE_UNIT (elem_type));
- tree gapcst = build_int_cst (ref_type, gap_offset);
- offset = size_binop (PLUS_EXPR, offset, gapcst);
+ CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, tem);
+ CONSTRUCTOR_APPEND_ELT (v, NULL_TREE,
+ build_zero_cst (ltype));
}
- data_ref
- = fold_build2 (MEM_REF, ltype, dataref_ptr, offset);
- if (alignment_support_scheme == dr_aligned)
- ;
+ gcc_assert (new_vtype != NULL_TREE);
+ if (new_vtype == vectype)
+ new_stmt = gimple_build_assign (
+ vec_dest, build_constructor (vectype, v));
else
- TREE_TYPE (data_ref)
- = build_aligned_type (TREE_TYPE (data_ref),
- align * BITS_PER_UNIT);
- if (ltype != vectype)
{
- vect_copy_ref_info (data_ref,
- DR_REF (first_dr_info->dr));
- tree tem = make_ssa_name (ltype);
- new_stmt = gimple_build_assign (tem, data_ref);
+ tree new_vname = make_ssa_name (new_vtype);
+ new_stmt = gimple_build_assign (
+ new_vname, build_constructor (new_vtype, v));
vect_finish_stmt_generation (vinfo, stmt_info,
new_stmt, gsi);
- data_ref = NULL;
- vec<constructor_elt, va_gc> *v;
- vec_alloc (v, 2);
- if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
- {
- CONSTRUCTOR_APPEND_ELT (v, NULL_TREE,
- build_zero_cst (ltype));
- CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, tem);
- }
- else
- {
- CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, tem);
- CONSTRUCTOR_APPEND_ELT (v, NULL_TREE,
- build_zero_cst (ltype));
- }
- gcc_assert (new_vtype != NULL_TREE);
- if (new_vtype == vectype)
- new_stmt = gimple_build_assign (
- vec_dest, build_constructor (vectype, v));
- else
- {
- tree new_vname = make_ssa_name (new_vtype);
- new_stmt = gimple_build_assign (
- new_vname, build_constructor (new_vtype, v));
- vect_finish_stmt_generation (vinfo, stmt_info,
- new_stmt, gsi);
- new_stmt = gimple_build_assign (
- vec_dest, build1 (VIEW_CONVERT_EXPR, vectype,
- new_vname));
- }
+ new_stmt = gimple_build_assign (
+ vec_dest,
+ build1 (VIEW_CONVERT_EXPR, vectype, new_vname));
}
}
- break;
}
- case dr_explicit_realign:
- {
- if (costing_p)
- break;
- tree ptr, bump;
-
- tree vs = size_int (TYPE_VECTOR_SUBPARTS (vectype));
+ break;
+ }
+ case dr_explicit_realign:
+ {
+ if (costing_p)
+ break;
+ tree ptr, bump;
- if (compute_in_loop)
- msq = vect_setup_realignment (vinfo, first_stmt_info, gsi,
- &realignment_token,
- dr_explicit_realign,
- dataref_ptr, NULL);
+ tree vs = size_int (TYPE_VECTOR_SUBPARTS (vectype));
- if (TREE_CODE (dataref_ptr) == SSA_NAME)
- ptr = copy_ssa_name (dataref_ptr);
- else
- ptr = make_ssa_name (TREE_TYPE (dataref_ptr));
- // For explicit realign the target alignment should be
- // known at compile time.
- unsigned HOST_WIDE_INT align =
- DR_TARGET_ALIGNMENT (first_dr_info).to_constant ();
- new_stmt = gimple_build_assign
- (ptr, BIT_AND_EXPR, dataref_ptr,
- build_int_cst
- (TREE_TYPE (dataref_ptr),
- -(HOST_WIDE_INT) align));
- vect_finish_stmt_generation (vinfo, stmt_info,
- new_stmt, gsi);
- data_ref
- = build2 (MEM_REF, vectype, ptr,
- build_int_cst (ref_type, 0));
- vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
- vec_dest = vect_create_destination_var (scalar_dest,
- vectype);
- new_stmt = gimple_build_assign (vec_dest, data_ref);
- new_temp = make_ssa_name (vec_dest, new_stmt);
- gimple_assign_set_lhs (new_stmt, new_temp);
- gimple_move_vops (new_stmt, stmt_info->stmt);
- vect_finish_stmt_generation (vinfo, stmt_info,
- new_stmt, gsi);
- msq = new_temp;
-
- bump = size_binop (MULT_EXPR, vs,
- TYPE_SIZE_UNIT (elem_type));
- bump = size_binop (MINUS_EXPR, bump, size_one_node);
- ptr = bump_vector_ptr (vinfo, dataref_ptr, NULL, gsi,
- stmt_info, bump);
- new_stmt = gimple_build_assign
- (NULL_TREE, BIT_AND_EXPR, ptr,
- build_int_cst
- (TREE_TYPE (ptr), -(HOST_WIDE_INT) align));
- if (TREE_CODE (ptr) == SSA_NAME)
- ptr = copy_ssa_name (ptr, new_stmt);
- else
- ptr = make_ssa_name (TREE_TYPE (ptr), new_stmt);
- gimple_assign_set_lhs (new_stmt, ptr);
- vect_finish_stmt_generation (vinfo, stmt_info,
- new_stmt, gsi);
- data_ref
- = build2 (MEM_REF, vectype, ptr,
- build_int_cst (ref_type, 0));
- break;
- }
- case dr_explicit_realign_optimized:
- {
- if (costing_p)
- break;
- if (TREE_CODE (dataref_ptr) == SSA_NAME)
- new_temp = copy_ssa_name (dataref_ptr);
- else
- new_temp = make_ssa_name (TREE_TYPE (dataref_ptr));
- // We should only be doing this if we know the target
- // alignment at compile time.
- unsigned HOST_WIDE_INT align =
- DR_TARGET_ALIGNMENT (first_dr_info).to_constant ();
- new_stmt = gimple_build_assign
- (new_temp, BIT_AND_EXPR, dataref_ptr,
- build_int_cst (TREE_TYPE (dataref_ptr),
- -(HOST_WIDE_INT) align));
- vect_finish_stmt_generation (vinfo, stmt_info,
- new_stmt, gsi);
- data_ref
- = build2 (MEM_REF, vectype, new_temp,
- build_int_cst (ref_type, 0));
- break;
- }
- default:
- gcc_unreachable ();
- }
+ if (compute_in_loop)
+ msq = vect_setup_realignment (vinfo, first_stmt_info, gsi,
+ &realignment_token,
+ dr_explicit_realign,
+ dataref_ptr, NULL);
+
+ if (TREE_CODE (dataref_ptr) == SSA_NAME)
+ ptr = copy_ssa_name (dataref_ptr);
+ else
+ ptr = make_ssa_name (TREE_TYPE (dataref_ptr));
+ // For explicit realign the target alignment should be
+ // known at compile time.
+ unsigned HOST_WIDE_INT align
+ = DR_TARGET_ALIGNMENT (first_dr_info).to_constant ();
+ new_stmt = gimple_build_assign (
+ ptr, BIT_AND_EXPR, dataref_ptr,
+ build_int_cst (TREE_TYPE (dataref_ptr),
+ -(HOST_WIDE_INT) align));
+ vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
+ data_ref
+ = build2 (MEM_REF, vectype, ptr, build_int_cst (ref_type, 0));
+ vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
+ vec_dest = vect_create_destination_var (scalar_dest, vectype);
+ new_stmt = gimple_build_assign (vec_dest, data_ref);
+ new_temp = make_ssa_name (vec_dest, new_stmt);
+ gimple_assign_set_lhs (new_stmt, new_temp);
+ gimple_move_vops (new_stmt, stmt_info->stmt);
+ vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
+ msq = new_temp;
+
+ bump = size_binop (MULT_EXPR, vs, TYPE_SIZE_UNIT (elem_type));
+ bump = size_binop (MINUS_EXPR, bump, size_one_node);
+ ptr = bump_vector_ptr (vinfo, dataref_ptr, NULL, gsi, stmt_info,
+ bump);
+ new_stmt = gimple_build_assign (
+ NULL_TREE, BIT_AND_EXPR, ptr,
+ build_int_cst (TREE_TYPE (ptr), -(HOST_WIDE_INT) align));
+ if (TREE_CODE (ptr) == SSA_NAME)
+ ptr = copy_ssa_name (ptr, new_stmt);
+ else
+ ptr = make_ssa_name (TREE_TYPE (ptr), new_stmt);
+ gimple_assign_set_lhs (new_stmt, ptr);
+ vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
+ data_ref
+ = build2 (MEM_REF, vectype, ptr, build_int_cst (ref_type, 0));
+ break;
+ }
+ case dr_explicit_realign_optimized:
+ {
+ if (costing_p)
+ break;
+ if (TREE_CODE (dataref_ptr) == SSA_NAME)
+ new_temp = copy_ssa_name (dataref_ptr);
+ else
+ new_temp = make_ssa_name (TREE_TYPE (dataref_ptr));
+ // We should only be doing this if we know the target
+ // alignment at compile time.
+ unsigned HOST_WIDE_INT align
+ = DR_TARGET_ALIGNMENT (first_dr_info).to_constant ();
+ new_stmt = gimple_build_assign (
+ new_temp, BIT_AND_EXPR, dataref_ptr,
+ build_int_cst (TREE_TYPE (dataref_ptr),
+ -(HOST_WIDE_INT) align));
+ vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
+ data_ref = build2 (MEM_REF, vectype, new_temp,
+ build_int_cst (ref_type, 0));
+ break;
+ }
+ default:
+ gcc_unreachable ();
+ }
- /* One common place to cost the above vect load for different
- alignment support schemes. */
- if (costing_p)
- {
- /* For VMAT_CONTIGUOUS_PERMUTE if it's grouped load, we
- only need to take care of the first stmt, whose
- stmt_info is first_stmt_info, vec_num iterating on it
- will cover the cost for the remaining, it's consistent
- with transforming. For the prologue cost for realign,
- we only need to count it once for the whole group. */
- bool first_stmt_info_p = first_stmt_info == stmt_info;
- bool add_realign_cost = first_stmt_info_p && i == 0;
- if (memory_access_type == VMAT_CONTIGUOUS
- || memory_access_type == VMAT_CONTIGUOUS_REVERSE
- || (memory_access_type == VMAT_CONTIGUOUS_PERMUTE
- && (!grouped_load || first_stmt_info_p)))
- vect_get_load_cost (vinfo, stmt_info, 1,
- alignment_support_scheme, misalignment,
- add_realign_cost, &inside_cost,
- &prologue_cost, cost_vec, cost_vec,
- true);
- }
- else
+ /* One common place to cost the above vect load for different
+ alignment support schemes. */
+ if (costing_p)
+ {
+ /* For VMAT_CONTIGUOUS_PERMUTE if it's grouped load, we
+ only need to take care of the first stmt, whose
+ stmt_info is first_stmt_info, vec_num iterating on it
+ will cover the cost for the remaining, it's consistent
+ with transforming. For the prologue cost for realign,
+ we only need to count it once for the whole group. */
+ bool first_stmt_info_p = first_stmt_info == stmt_info;
+ bool add_realign_cost = first_stmt_info_p && i == 0;
+ if (memory_access_type == VMAT_CONTIGUOUS
+ || memory_access_type == VMAT_CONTIGUOUS_REVERSE
+ || (memory_access_type == VMAT_CONTIGUOUS_PERMUTE
+ && (!grouped_load || first_stmt_info_p)))
+ vect_get_load_cost (vinfo, stmt_info, 1,
+ alignment_support_scheme, misalignment,
+ add_realign_cost, &inside_cost,
+ &prologue_cost, cost_vec, cost_vec, true);
+ }
+ else
+ {
+ vec_dest = vect_create_destination_var (scalar_dest, vectype);
+ /* DATA_REF is null if we've already built the statement. */
+ if (data_ref)
{
- vec_dest = vect_create_destination_var (scalar_dest, vectype);
- /* DATA_REF is null if we've already built the statement. */
- if (data_ref)
- {
- vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
- new_stmt = gimple_build_assign (vec_dest, data_ref);
- }
- new_temp = make_ssa_name (vec_dest, new_stmt);
- gimple_set_lhs (new_stmt, new_temp);
- vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
+ vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
+ new_stmt = gimple_build_assign (vec_dest, data_ref);
}
+ new_temp = make_ssa_name (vec_dest, new_stmt);
+ gimple_set_lhs (new_stmt, new_temp);
+ vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
+ }
- /* 3. Handle explicit realignment if necessary/supported.
- Create in loop:
- vec_dest = realign_load (msq, lsq, realignment_token) */
- if (!costing_p
- && (alignment_support_scheme == dr_explicit_realign_optimized
- || alignment_support_scheme == dr_explicit_realign))
- {
- lsq = gimple_assign_lhs (new_stmt);
- if (!realignment_token)
- realignment_token = dataref_ptr;
- vec_dest = vect_create_destination_var (scalar_dest, vectype);
- new_stmt = gimple_build_assign (vec_dest, REALIGN_LOAD_EXPR,
- msq, lsq, realignment_token);
- new_temp = make_ssa_name (vec_dest, new_stmt);
- gimple_assign_set_lhs (new_stmt, new_temp);
- vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
+ /* 3. Handle explicit realignment if necessary/supported.
+ Create in loop:
+ vec_dest = realign_load (msq, lsq, realignment_token) */
+ if (!costing_p
+ && (alignment_support_scheme == dr_explicit_realign_optimized
+ || alignment_support_scheme == dr_explicit_realign))
+ {
+ lsq = gimple_assign_lhs (new_stmt);
+ if (!realignment_token)
+ realignment_token = dataref_ptr;
+ vec_dest = vect_create_destination_var (scalar_dest, vectype);
+ new_stmt = gimple_build_assign (vec_dest, REALIGN_LOAD_EXPR, msq,
+ lsq, realignment_token);
+ new_temp = make_ssa_name (vec_dest, new_stmt);
+ gimple_assign_set_lhs (new_stmt, new_temp);
+ vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
- if (alignment_support_scheme == dr_explicit_realign_optimized)
- {
- gcc_assert (phi);
- if (i == vec_num - 1 && j == ncopies - 1)
- add_phi_arg (phi, lsq,
- loop_latch_edge (containing_loop),
- UNKNOWN_LOCATION);
- msq = lsq;
- }
+ if (alignment_support_scheme == dr_explicit_realign_optimized)
+ {
+ gcc_assert (phi);
+ if (i == vec_num - 1 && j == ncopies - 1)
+ add_phi_arg (phi, lsq, loop_latch_edge (containing_loop),
+ UNKNOWN_LOCATION);
+ msq = lsq;
}
+ }
- if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
+ if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
+ {
+ if (costing_p)
+ inside_cost = record_stmt_cost (cost_vec, 1, vec_perm,
+ stmt_info, 0, vect_body);
+ else
{
- if (costing_p)
- inside_cost = record_stmt_cost (cost_vec, 1, vec_perm,
- stmt_info, 0, vect_body);
- else
- {
- tree perm_mask = perm_mask_for_reverse (vectype);
- new_temp
- = permute_vec_elements (vinfo, new_temp, new_temp,
- perm_mask, stmt_info, gsi);
- new_stmt = SSA_NAME_DEF_STMT (new_temp);
- }
+ tree perm_mask = perm_mask_for_reverse (vectype);
+ new_temp = permute_vec_elements (vinfo, new_temp, new_temp,
+ perm_mask, stmt_info, gsi);
+ new_stmt = SSA_NAME_DEF_STMT (new_temp);
}
+ }
- /* Collect vector loads and later create their permutation in
- vect_transform_grouped_load (). */
- if (!costing_p && (grouped_load || slp_perm))
- dr_chain.quick_push (new_temp);
+ /* Collect vector loads and later create their permutation in
+ vect_transform_grouped_load (). */
+ if (!costing_p && (grouped_load || slp_perm))
+ dr_chain.quick_push (new_temp);
- /* Store vector loads in the corresponding SLP_NODE. */
- if (!costing_p && slp && !slp_perm)
- slp_node->push_vec_def (new_stmt);
+ /* Store vector loads in the corresponding SLP_NODE. */
+ if (!costing_p && slp && !slp_perm)
+ slp_node->push_vec_def (new_stmt);
- /* With SLP permutation we load the gaps as well, without
- we need to skip the gaps after we manage to fully load
- all elements. group_gap_adj is DR_GROUP_SIZE here. */
- group_elt += nunits;
- if (!costing_p
- && maybe_ne (group_gap_adj, 0U)
- && !slp_perm
- && known_eq (group_elt, group_size - group_gap_adj))
- {
- poly_wide_int bump_val
- = (wi::to_wide (TYPE_SIZE_UNIT (elem_type))
- * group_gap_adj);
- if (tree_int_cst_sgn
- (vect_dr_behavior (vinfo, dr_info)->step) == -1)
- bump_val = -bump_val;
- tree bump = wide_int_to_tree (sizetype, bump_val);
- dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr,
- gsi, stmt_info, bump);
- group_elt = 0;
- }
- }
- /* Bump the vector pointer to account for a gap or for excess
- elements loaded for a permuted SLP load. */
+ /* With SLP permutation we load the gaps as well, without
+ we need to skip the gaps after we manage to fully load
+ all elements. group_gap_adj is DR_GROUP_SIZE here. */
+ group_elt += nunits;
if (!costing_p
&& maybe_ne (group_gap_adj, 0U)
- && slp_perm)
+ && !slp_perm
+ && known_eq (group_elt, group_size - group_gap_adj))
{
poly_wide_int bump_val
- = (wi::to_wide (TYPE_SIZE_UNIT (elem_type))
- * group_gap_adj);
- if (tree_int_cst_sgn
- (vect_dr_behavior (vinfo, dr_info)->step) == -1)
+ = (wi::to_wide (TYPE_SIZE_UNIT (elem_type)) * group_gap_adj);
+ if (tree_int_cst_sgn (vect_dr_behavior (vinfo, dr_info)->step)
+ == -1)
bump_val = -bump_val;
tree bump = wide_int_to_tree (sizetype, bump_val);
dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
stmt_info, bump);
+ group_elt = 0;
}
}
+ /* Bump the vector pointer to account for a gap or for excess
+ elements loaded for a permuted SLP load. */
+ if (!costing_p
+ && maybe_ne (group_gap_adj, 0U)
+ && slp_perm)
+ {
+ poly_wide_int bump_val
+ = (wi::to_wide (TYPE_SIZE_UNIT (elem_type)) * group_gap_adj);
+ if (tree_int_cst_sgn (vect_dr_behavior (vinfo, dr_info)->step) == -1)
+ bump_val = -bump_val;
+ tree bump = wide_int_to_tree (sizetype, bump_val);
+ dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
+ stmt_info, bump);
+ }
if (slp && !slp_perm)
continue;
}
}
else
- {
- if (grouped_load)
- {
- if (memory_access_type != VMAT_LOAD_STORE_LANES)
+ {
+ if (grouped_load)
+ {
+ gcc_assert (memory_access_type == VMAT_CONTIGUOUS_PERMUTE);
+ /* We assume that the cost of a single load-lanes instruction
+ is equivalent to the cost of DR_GROUP_SIZE separate loads.
+ If a grouped access is instead being provided by a
+ load-and-permute operation, include the cost of the
+ permutes. */
+ if (costing_p && first_stmt_info == stmt_info)
{
- gcc_assert (memory_access_type == VMAT_CONTIGUOUS_PERMUTE);
- /* We assume that the cost of a single load-lanes instruction
- is equivalent to the cost of DR_GROUP_SIZE separate loads.
- If a grouped access is instead being provided by a
- load-and-permute operation, include the cost of the
- permutes. */
- if (costing_p && first_stmt_info == stmt_info)
- {
- /* Uses an even and odd extract operations or shuffle
- operations for each needed permute. */
- int group_size = DR_GROUP_SIZE (first_stmt_info);
- int nstmts = ceil_log2 (group_size) * group_size;
- inside_cost
- += record_stmt_cost (cost_vec, nstmts, vec_perm,
- stmt_info, 0, vect_body);
+ /* Uses an even and odd extract operations or shuffle
+ operations for each needed permute. */
+ int group_size = DR_GROUP_SIZE (first_stmt_info);
+ int nstmts = ceil_log2 (group_size) * group_size;
+ inside_cost += record_stmt_cost (cost_vec, nstmts, vec_perm,
+ stmt_info, 0, vect_body);
- if (dump_enabled_p ())
- dump_printf_loc (
- MSG_NOTE, vect_location,
- "vect_model_load_cost: strided group_size = %d .\n",
- group_size);
- }
- else if (!costing_p)
- vect_transform_grouped_load (vinfo, stmt_info, dr_chain,
- group_size, gsi);
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_NOTE, vect_location,
+ "vect_model_load_cost:"
+ "strided group_size = %d .\n",
+ group_size);
+ }
+ else if (!costing_p)
+ {
+ vect_transform_grouped_load (vinfo, stmt_info, dr_chain,
+ group_size, gsi);
+ *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
}
- if (!costing_p)
- *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
}
else if (!costing_p)
STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
{
gcc_assert (memory_access_type != VMAT_INVARIANT
&& memory_access_type != VMAT_ELEMENTWISE
- && memory_access_type != VMAT_STRIDED_SLP);
+ && memory_access_type != VMAT_STRIDED_SLP
+ && memory_access_type != VMAT_LOAD_STORE_LANES);
if (dump_enabled_p ())
dump_printf_loc (MSG_NOTE, vect_location,
"vect_model_load_cost: inside_cost = %u, "