struct pd_data
{
tree rhs;
+ HOST_WIDE_INT rhs_off;
HOST_WIDE_INT offset;
HOST_WIDE_INT size;
};
unsigned int pos = 0, prec = w.get_precision ();
pd_data pd;
pd.rhs = build_constructor (NULL_TREE, NULL);
+ pd.rhs_off = 0;
/* When bitwise and with a constant is done on a memory load,
we don't really need all the bits to be defined or defined
to constants, we don't really care what is in the position
bool pd_constant_p = (TREE_CODE (pd.rhs) == CONSTRUCTOR
|| CONSTANT_CLASS_P (pd.rhs));
+ pd_range *r;
if (partial_defs.is_empty ())
{
/* If we get a clobber upfront, fail. */
first_set = set;
first_base_set = base_set;
last_vuse_ptr = NULL;
- /* Continue looking for partial defs. */
- return NULL;
- }
-
- if (!known_ranges)
- {
- /* ??? Optimize the case where the 2nd partial def completes things. */
- gcc_obstack_init (&ranges_obstack);
- known_ranges = splay_tree_new_with_allocator (pd_range_compare, 0, 0,
- pd_tree_alloc,
- pd_tree_dealloc, this);
- splay_tree_insert (known_ranges,
- (splay_tree_key)&first_range.offset,
- (splay_tree_value)&first_range);
- }
-
- pd_range newr = { pd.offset, pd.size };
- splay_tree_node n;
- pd_range *r;
- /* Lookup the predecessor of offset + 1 and see if we need to merge. */
- HOST_WIDE_INT loffset = newr.offset + 1;
- if ((n = splay_tree_predecessor (known_ranges, (splay_tree_key)&loffset))
- && ((r = (pd_range *)n->value), true)
- && ranges_known_overlap_p (r->offset, r->size + 1,
- newr.offset, newr.size))
- {
- /* Ignore partial defs already covered. Here we also drop shadowed
- clobbers arriving here at the floor. */
- if (known_subrange_p (newr.offset, newr.size, r->offset, r->size))
- return NULL;
- r->size = MAX (r->offset + r->size, newr.offset + newr.size) - r->offset;
+ r = &first_range;
+ /* Go check if the first partial definition was a full one in case
+ the caller didn't optimize for this. */
}
else
{
- /* newr.offset wasn't covered yet, insert the range. */
- r = XOBNEW (&ranges_obstack, pd_range);
- *r = newr;
- splay_tree_insert (known_ranges, (splay_tree_key)&r->offset,
- (splay_tree_value)r);
- }
- /* Merge r which now contains newr and is a member of the splay tree with
- adjacent overlapping ranges. */
- pd_range *rafter;
- while ((n = splay_tree_successor (known_ranges, (splay_tree_key)&r->offset))
- && ((rafter = (pd_range *)n->value), true)
- && ranges_known_overlap_p (r->offset, r->size + 1,
- rafter->offset, rafter->size))
- {
- r->size = MAX (r->offset + r->size,
- rafter->offset + rafter->size) - r->offset;
- splay_tree_remove (known_ranges, (splay_tree_key)&rafter->offset);
- }
- /* If we get a clobber, fail. */
- if (TREE_CLOBBER_P (pd.rhs))
- return (void *)-1;
- /* Non-constants are OK as long as they are shadowed by a constant. */
- if (!pd_constant_p)
- return (void *)-1;
- partial_defs.safe_push (pd);
+ if (!known_ranges)
+ {
+ /* ??? Optimize the case where the 2nd partial def completes
+ things. */
+ gcc_obstack_init (&ranges_obstack);
+ known_ranges = splay_tree_new_with_allocator (pd_range_compare, 0, 0,
+ pd_tree_alloc,
+ pd_tree_dealloc, this);
+ splay_tree_insert (known_ranges,
+ (splay_tree_key)&first_range.offset,
+ (splay_tree_value)&first_range);
+ }
+
+ pd_range newr = { pd.offset, pd.size };
+ splay_tree_node n;
+ /* Lookup the predecessor of offset + 1 and see if we need to merge. */
+ HOST_WIDE_INT loffset = newr.offset + 1;
+ if ((n = splay_tree_predecessor (known_ranges, (splay_tree_key)&loffset))
+ && ((r = (pd_range *)n->value), true)
+ && ranges_known_overlap_p (r->offset, r->size + 1,
+ newr.offset, newr.size))
+ {
+ /* Ignore partial defs already covered. Here we also drop shadowed
+ clobbers arriving here at the floor. */
+ if (known_subrange_p (newr.offset, newr.size, r->offset, r->size))
+ return NULL;
+ r->size
+ = MAX (r->offset + r->size, newr.offset + newr.size) - r->offset;
+ }
+ else
+ {
+ /* newr.offset wasn't covered yet, insert the range. */
+ r = XOBNEW (&ranges_obstack, pd_range);
+ *r = newr;
+ splay_tree_insert (known_ranges, (splay_tree_key)&r->offset,
+ (splay_tree_value)r);
+ }
+ /* Merge r which now contains newr and is a member of the splay tree with
+ adjacent overlapping ranges. */
+ pd_range *rafter;
+ while ((n = splay_tree_successor (known_ranges,
+ (splay_tree_key)&r->offset))
+ && ((rafter = (pd_range *)n->value), true)
+ && ranges_known_overlap_p (r->offset, r->size + 1,
+ rafter->offset, rafter->size))
+ {
+ r->size = MAX (r->offset + r->size,
+ rafter->offset + rafter->size) - r->offset;
+ splay_tree_remove (known_ranges, (splay_tree_key)&rafter->offset);
+ }
+ /* If we get a clobber, fail. */
+ if (TREE_CLOBBER_P (pd.rhs))
+ return (void *)-1;
+ /* Non-constants are OK as long as they are shadowed by a constant. */
+ if (!pd_constant_p)
+ return (void *)-1;
+ partial_defs.safe_push (pd);
+ }
/* Now we have merged newr into the range tree. When we have covered
[offseti, sizei] then the tree will contain exactly one node which has
else
{
len = native_encode_expr (pd.rhs, this_buffer, bufsize,
- MAX (0, -pd.offset) / BITS_PER_UNIT);
+ (MAX (0, -pd.offset)
+ + pd.rhs_off) / BITS_PER_UNIT);
if (len <= 0
|| len < (ROUND_UP (pd.size, BITS_PER_UNIT) / BITS_PER_UNIT
- MAX (0, -pd.offset) / BITS_PER_UNIT))
{
pd_data pd;
pd.rhs = build_constructor (NULL_TREE, NULL);
+ pd.rhs_off = 0;
pd.offset = offset2i;
pd.size = leni << LOG2_BITS_PER_UNIT;
return data->push_partial_def (pd, 0, 0, offseti, maxsizei);
by a later def. */
pd_data pd;
pd.rhs = gimple_assign_rhs1 (def_stmt);
+ pd.rhs_off = 0;
pd.offset = offset2i;
pd.size = size2i;
return data->push_partial_def (pd, ao_ref_alias_set (&lhs_ref),
if (TREE_CODE (rhs) == SSA_NAME)
rhs = SSA_VAL (rhs);
pd.rhs = rhs;
+ pd.rhs_off = 0;
pd.offset = offset2i;
pd.size = size2i;
return data->push_partial_def (pd, ao_ref_alias_set (&lhs_ref),
{
pd_data pd;
pd.rhs = SSA_VAL (def_rhs);
+ pd.rhs_off = 0;
pd.offset = offset2i;
pd.size = size2i;
return data->push_partial_def (pd, ao_ref_alias_set (&lhs_ref),
}
}
+ /* 4b) Assignment done via one of the vectorizer internal store
+ functions where we may be able to access pieces from or we can
+ combine to a larger entity. */
+ else if (known_eq (ref->size, maxsize)
+ && is_gimple_reg_type (vr->type)
+ && !reverse_storage_order_for_component_p (vr->operands)
+ && !contains_storage_order_barrier_p (vr->operands)
+ && is_gimple_call (def_stmt)
+ && gimple_call_internal_p (def_stmt)
+ && internal_store_fn_p (gimple_call_internal_fn (def_stmt)))
+ {
+ gcall *call = as_a <gcall *> (def_stmt);
+ internal_fn fn = gimple_call_internal_fn (call);
+ tree def_rhs = gimple_call_arg (call,
+ internal_fn_stored_value_index (fn));
+ def_rhs = vn_valueize (def_rhs);
+ if (TREE_CODE (def_rhs) != VECTOR_CST)
+ return (void *)-1;
+
+ tree mask = NULL_TREE, len = NULL_TREE, bias = NULL_TREE;
+ switch (fn)
+ {
+ case IFN_MASK_STORE:
+ mask = gimple_call_arg (call, internal_fn_mask_index (fn));
+ mask = vn_valueize (mask);
+ if (TREE_CODE (mask) != VECTOR_CST)
+ return (void *)-1;
+ break;
+ case IFN_LEN_STORE:
+ len = gimple_call_arg (call, 2);
+ bias = gimple_call_arg (call, 4);
+ if (!tree_fits_uhwi_p (len) || !tree_fits_shwi_p (bias))
+ return (void *)-1;
+ break;
+ default:
+ return (void *)-1;
+ }
+ ao_ref_init_from_ptr_and_size (&lhs_ref,
+ vn_valueize (gimple_call_arg (call, 0)),
+ TYPE_SIZE_UNIT (TREE_TYPE (def_rhs)));
+ tree base2;
+ poly_int64 offset2, size2, maxsize2;
+ HOST_WIDE_INT offset2i, size2i, offseti;
+ base2 = ao_ref_base (&lhs_ref);
+ offset2 = lhs_ref.offset;
+ size2 = lhs_ref.size;
+ maxsize2 = lhs_ref.max_size;
+ if (known_size_p (maxsize2)
+ && known_eq (maxsize2, size2)
+ && adjust_offsets_for_equal_base_address (base, &offset,
+ base2, &offset2)
+ && maxsize.is_constant (&maxsizei)
+ && offset.is_constant (&offseti)
+ && offset2.is_constant (&offset2i)
+ && size2.is_constant (&size2i))
+ {
+ if (!ranges_maybe_overlap_p (offset, maxsize, offset2, size2))
+ /* Poor-mans disambiguation. */
+ return NULL;
+ else if (ranges_known_overlap_p (offset, maxsize, offset2, size2))
+ {
+ pd_data pd;
+ pd.rhs = def_rhs;
+ tree aa = gimple_call_arg (call, 1);
+ alias_set_type set = get_deref_alias_set (TREE_TYPE (aa));
+ tree vectype = TREE_TYPE (def_rhs);
+ unsigned HOST_WIDE_INT elsz
+ = tree_to_uhwi (TYPE_SIZE (TREE_TYPE (vectype)));
+ if (mask)
+ {
+ HOST_WIDE_INT start = 0, len = 0;
+ unsigned mask_idx = 0;
+ do
+ {
+ if (integer_zerop (VECTOR_CST_ELT (mask, mask_idx)))
+ {
+ if (len != 0)
+ {
+ pd.rhs_off = start;
+ pd.offset = offset2i + start;
+ pd.size = len;
+ if (ranges_known_overlap_p
+ (offset, maxsize, pd.offset, pd.size))
+ {
+ void *res = data->push_partial_def
+ (pd, set, set, offseti, maxsizei);
+ if (res != NULL)
+ return res;
+ }
+ }
+ start = (mask_idx + 1) * elsz;
+ len = 0;
+ }
+ else
+ len += elsz;
+ mask_idx++;
+ }
+ while (known_lt (mask_idx, TYPE_VECTOR_SUBPARTS (vectype)));
+ if (len != 0)
+ {
+ pd.rhs_off = start;
+ pd.offset = offset2i + start;
+ pd.size = len;
+ if (ranges_known_overlap_p (offset, maxsize,
+ pd.offset, pd.size))
+ return data->push_partial_def (pd, set, set,
+ offseti, maxsizei);
+ }
+ }
+ else if (fn == IFN_LEN_STORE)
+ {
+ pd.rhs_off = 0;
+ pd.offset = offset2i;
+ pd.size = (tree_to_uhwi (len)
+ + -tree_to_shwi (bias)) * BITS_PER_UNIT;
+ if (ranges_known_overlap_p (offset, maxsize,
+ pd.offset, pd.size))
+ return data->push_partial_def (pd, set, set,
+ offseti, maxsizei);
+ }
+ else
+ gcc_unreachable ();
+ return NULL;
+ }
+ }
+ }
+
/* 5) For aggregate copies translate the reference through them if
the copy kills ref. */
else if (data->vn_walk_kind == VN_WALKREWRITE
{
pd_data pd;
pd.rhs = val;
+ pd.rhs_off = 0;
pd.offset = 0;
pd.size = maxsizei;
return data->push_partial_def (pd, ao_ref_alias_set (&lhs_ref),