]> git.ipfire.org Git - thirdparty/gcc.git/commitdiff
vect: Misalign checks for gather/scatter.
authorRobin Dapp <rdapp@ventanamicro.com>
Thu, 3 Jul 2025 09:04:29 +0000 (11:04 +0200)
committerRobin Dapp <rdapp@ventanamicro.com>
Thu, 24 Jul 2025 14:12:39 +0000 (16:12 +0200)
This patch adds simple misalignment checks for gather/scatter
operations.  Previously, we assumed that those perform element accesses
internally so alignment does not matter.  The riscv vector spec however
explicitly states that vector operations are allowed to fault on
element-misaligned accesses.  Reasonable uarchs won't, but...

For gather/scatter we have two paths in the vectorizer:

 (1) Regular analysis based on datarefs.  Here we can also create
     strided loads.
 (2) Non-affine access where each gather index is relative to the
     initial address.

The assumption this patch works on is that once the alignment for the
first scalar is correct, all others will fall in line, as the index is
always a multiple of the first element's size.

For (1) we have a dataref and can check it for alignment as in other
cases.  For (2) this patch checks the object alignment of BASE and
compares it against the natural alignment of the current vectype's unit.

The patch also adds a pointer argument to the gather/scatter IFNs that
contains the necessary alignment.  Most of the patch is thus mechanical
in that it merely adjusts indices.

I tested the riscv version with a custom qemu version that faults on
element-misaligned vector accesses.  With this patch applied, there is
just a single fault left, which is due to PR120782 and which will be
addressed separately.

Bootstrapped and regtested on x86 and aarch64.  Regtested on
rv64gcv_zvl512b with and without unaligned vector support.

gcc/ChangeLog:

* internal-fn.cc (internal_fn_len_index): Adjust indices for new
alias_ptr param.
(internal_fn_else_index): Ditto.
(internal_fn_mask_index): Ditto.
(internal_fn_stored_value_index): Ditto.
(internal_fn_alias_ptr_index): Ditto.
(internal_fn_offset_index): Ditto.
(internal_fn_scale_index): Ditto.
(internal_gather_scatter_fn_supported_p): Ditto.
* internal-fn.h (internal_fn_alias_ptr_index): Ditto.
* optabs-query.cc (supports_vec_gather_load_p): Ditto.
* tree-vect-data-refs.cc (vect_check_gather_scatter): Add alias
pointer.
* tree-vect-patterns.cc (vect_recog_gather_scatter_pattern): Add
alias pointer.
* tree-vect-slp.cc (vect_get_operand_map): Adjust for alias
pointer.
* tree-vect-stmts.cc (vect_truncate_gather_scatter_offset): Add
alias pointer and misalignment handling.
(get_load_store_type): Move from here...
(get_group_load_store_type): ...To here.
(vectorizable_store): Add alias pointer.
(vectorizable_load): Ditto.
* tree-vectorizer.h (struct gather_scatter_info): Ditto.

gcc/internal-fn.cc
gcc/internal-fn.h
gcc/optabs-query.cc
gcc/tree-vect-data-refs.cc
gcc/tree-vect-patterns.cc
gcc/tree-vect-slp.cc
gcc/tree-vect-stmts.cc
gcc/tree-vectorizer.h

index 1411f4497892a74a3d3ae4b9f13928331b313be7..bf2fac8180706ec418de7eb97cd1260f1d078c03 100644 (file)
@@ -4967,11 +4967,13 @@ internal_fn_len_index (internal_fn fn)
       return 2;
 
     case IFN_MASK_LEN_SCATTER_STORE:
+      return 6;
+
     case IFN_MASK_LEN_STRIDED_LOAD:
       return 5;
 
     case IFN_MASK_LEN_GATHER_LOAD:
-      return 6;
+      return 7;
 
     case IFN_COND_LEN_FMA:
     case IFN_COND_LEN_FMS:
@@ -5075,7 +5077,7 @@ internal_fn_else_index (internal_fn fn)
 
     case IFN_MASK_GATHER_LOAD:
     case IFN_MASK_LEN_GATHER_LOAD:
-      return 5;
+      return 6;
 
     default:
       return -1;
@@ -5110,7 +5112,7 @@ internal_fn_mask_index (internal_fn fn)
     case IFN_MASK_SCATTER_STORE:
     case IFN_MASK_LEN_GATHER_LOAD:
     case IFN_MASK_LEN_SCATTER_STORE:
-      return 4;
+      return 5;
 
     case IFN_VCOND_MASK:
     case IFN_VCOND_MASK_LEN:
@@ -5135,10 +5137,11 @@ internal_fn_stored_value_index (internal_fn fn)
 
     case IFN_MASK_STORE:
     case IFN_MASK_STORE_LANES:
+      return 3;
     case IFN_SCATTER_STORE:
     case IFN_MASK_SCATTER_STORE:
     case IFN_MASK_LEN_SCATTER_STORE:
-      return 3;
+      return 4;
 
     case IFN_LEN_STORE:
       return 4;
@@ -5152,6 +5155,28 @@ internal_fn_stored_value_index (internal_fn fn)
     }
 }
 
+/* If FN has an alias pointer return its index, otherwise return -1.  */
+
+int
+internal_fn_alias_ptr_index (internal_fn fn)
+{
+  switch (fn)
+    {
+    case IFN_MASK_LOAD:
+    case IFN_MASK_LEN_LOAD:
+    case IFN_GATHER_LOAD:
+    case IFN_MASK_GATHER_LOAD:
+    case IFN_MASK_LEN_GATHER_LOAD:
+    case IFN_SCATTER_STORE:
+    case IFN_MASK_SCATTER_STORE:
+    case IFN_MASK_LEN_SCATTER_STORE:
+      return 1;
+
+    default:
+      return -1;
+    }
+}
+
 /* If FN is a gather/scatter return the index of its offset argument,
    otherwise return -1.  */
 
@@ -5169,7 +5194,7 @@ internal_fn_offset_index (internal_fn fn)
     case IFN_SCATTER_STORE:
     case IFN_MASK_SCATTER_STORE:
     case IFN_MASK_LEN_SCATTER_STORE:
-      return 1;
+      return 2;
 
     default:
       return -1;
@@ -5193,7 +5218,7 @@ internal_fn_scale_index (internal_fn fn)
     case IFN_SCATTER_STORE:
     case IFN_MASK_SCATTER_STORE:
     case IFN_MASK_LEN_SCATTER_STORE:
-      return 2;
+      return 3;
 
     default:
       return -1;
@@ -5277,13 +5302,9 @@ internal_gather_scatter_fn_supported_p (internal_fn ifn, tree vector_type,
     && insn_operand_matches (icode, 2 + output_ops, GEN_INT (unsigned_p))
     && insn_operand_matches (icode, 3 + output_ops, GEN_INT (scale));
 
-  /* For gather the optab's operand indices do not match the IFN's because
-     the latter does not have the extension operand (operand 3).  It is
-     implicitly added during expansion so we use the IFN's else index + 1.
-     */
   if (ok && elsvals)
     get_supported_else_vals
-      (icode, internal_fn_else_index (IFN_MASK_GATHER_LOAD) + 1, *elsvals);
+      (icode, internal_fn_else_index (IFN_MASK_GATHER_LOAD), *elsvals);
 
   return ok;
 }
index 825381660bb1b1a88bc4b52ff733f120741b87e5..fd21694dfebfb8518810fd85f7aa8c45dd4c362e 100644 (file)
@@ -242,6 +242,7 @@ extern int internal_fn_else_index (internal_fn);
 extern int internal_fn_stored_value_index (internal_fn);
 extern int internal_fn_offset_index (internal_fn fn);
 extern int internal_fn_scale_index (internal_fn fn);
+extern int internal_fn_alias_ptr_index (internal_fn fn);
 extern bool internal_gather_scatter_fn_supported_p (internal_fn, tree,
                                                    tree, tree, int,
                                                    vec<int> * = nullptr);
index f5ca98da818a7ae2f85c9fedd668afc97184e2e9..5335d0d8401a37a4a1f9e5ce376955e9213c484c 100644 (file)
@@ -719,13 +719,9 @@ supports_vec_gather_load_p (machine_mode mode, vec<int> *elsvals)
        = (icode != CODE_FOR_nothing) ? 1 : -1;
     }
 
-  /* For gather the optab's operand indices do not match the IFN's because
-     the latter does not have the extension operand (operand 3).  It is
-     implicitly added during expansion so we use the IFN's else index + 1.
-     */
   if (elsvals && icode != CODE_FOR_nothing)
     get_supported_else_vals
-      (icode, internal_fn_else_index (IFN_MASK_GATHER_LOAD) + 1, *elsvals);
+      (icode, internal_fn_else_index (IFN_MASK_GATHER_LOAD), *elsvals);
 
   return this_fn_optabs->supports_vec_gather_load[mode] > 0;
 }
index 68e498263115943caf5c10cf701fdf1dc2b3bc1f..824b5f0f76983e353b008fdf0500b8a915520df5 100644 (file)
@@ -4539,6 +4539,8 @@ vect_describe_gather_scatter_call (stmt_vec_info stmt_info,
   info->ifn = gimple_call_internal_fn (call);
   info->decl = NULL_TREE;
   info->base = gimple_call_arg (call, 0);
+  info->alias_ptr = gimple_call_arg
+                    (call, internal_fn_alias_ptr_index (info->ifn));
   info->offset = gimple_call_arg
                  (call, internal_fn_offset_index (info->ifn));
   info->offset_dt = vect_unknown_def_type;
@@ -4865,6 +4867,11 @@ vect_check_gather_scatter (stmt_vec_info stmt_info, loop_vec_info loop_vinfo,
   info->ifn = ifn;
   info->decl = decl;
   info->base = base;
+
+  info->alias_ptr = build_int_cst
+    (reference_alias_ptr_type (DR_REF (dr)),
+     get_object_alignment (DR_REF (dr)));
+
   info->offset = off;
   info->offset_dt = vect_unknown_def_type;
   info->offset_vectype = offset_vectype;
@@ -7353,13 +7360,14 @@ vect_can_force_dr_alignment_p (const_tree decl, poly_uint64 alignment)
    alignment.
    If CHECK_ALIGNED_ACCESSES is TRUE, check if the access is supported even
    it is aligned, i.e., check if it is possible to vectorize it with different
-   alignment.  */
+   alignment.  If GS_INFO is passed we are dealing with a gather/scatter.  */
 
 enum dr_alignment_support
 vect_supportable_dr_alignment (vec_info *vinfo, dr_vec_info *dr_info,
-                              tree vectype, int misalignment)
+                              tree vectype, int misalignment,
+                              gather_scatter_info *gs_info)
 {
-  data_reference *dr = dr_info->dr;
+  data_reference *dr = dr_info ? dr_info->dr : nullptr;
   stmt_vec_info stmt_info = dr_info->stmt;
   machine_mode mode = TYPE_MODE (vectype);
   loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
@@ -7371,14 +7379,6 @@ vect_supportable_dr_alignment (vec_info *vinfo, dr_vec_info *dr_info,
   else if (dr_safe_speculative_read_required (stmt_info))
     return dr_unaligned_unsupported;
 
-  /* For now assume all conditional loads/stores support unaligned
-     access without any special code.  */
-  if (gcall *stmt = dyn_cast <gcall *> (stmt_info->stmt))
-    if (gimple_call_internal_p (stmt)
-       && (gimple_call_internal_fn (stmt) == IFN_MASK_LOAD
-           || gimple_call_internal_fn (stmt) == IFN_MASK_STORE))
-      return dr_unaligned_supported;
-
   if (loop_vinfo)
     {
       vect_loop = LOOP_VINFO_LOOP (loop_vinfo);
@@ -7448,7 +7448,7 @@ vect_supportable_dr_alignment (vec_info *vinfo, dr_vec_info *dr_info,
         }
     } */
 
-  if (DR_IS_READ (dr))
+  if (dr && DR_IS_READ (dr))
     {
       if (can_implement_p (vec_realign_load_optab, mode)
          && (!targetm.vectorize.builtin_mask_for_load
@@ -7476,10 +7476,43 @@ vect_supportable_dr_alignment (vec_info *vinfo, dr_vec_info *dr_info,
 
   bool is_packed = false;
   tree type = TREE_TYPE (DR_REF (dr));
+  bool is_gather_scatter = gs_info != nullptr;
   if (misalignment == DR_MISALIGNMENT_UNKNOWN)
-    is_packed = not_size_aligned (DR_REF (dr));
+    {
+      if (!is_gather_scatter || dr != nullptr)
+       is_packed = not_size_aligned (DR_REF (dr));
+      else
+       {
+         /* Gather-scatter accesses normally perform only component accesses
+            so alignment is irrelevant for them.  Targets like riscv do care
+            about scalar alignment in vector accesses, though, so check scalar
+            alignment here.  We determined the alias pointer as well as the
+            base alignment during pattern recognition and can re-use it here.
+
+            As we do not have an analyzed dataref we only know the alignment
+            of the reference itself and nothing about init, steps, etc.
+            For now don't try harder to determine misalignment and
+            just assume it is unknown.  We consider the type packed if its
+            scalar alignment is lower than the natural alignment of a vector
+            element's type.  */
+
+         gcc_assert (!GATHER_SCATTER_LEGACY_P (*gs_info));
+         gcc_assert (dr == nullptr);
+
+         tree inner_vectype = TREE_TYPE (vectype);
+
+         unsigned HOST_WIDE_INT scalar_align
+           = tree_to_uhwi (gs_info->alias_ptr);
+         unsigned HOST_WIDE_INT inner_vectype_sz
+           = tree_to_uhwi (TYPE_SIZE (inner_vectype));
+
+         bool is_misaligned = scalar_align < inner_vectype_sz;
+         is_packed = scalar_align > 1 && is_misaligned;
+       }
+    }
   if (targetm.vectorize.support_vector_misalignment (mode, type, misalignment,
-                                                    is_packed, false))
+                                                    is_packed,
+                                                    is_gather_scatter))
     return dr_unaligned_supported;
 
   /* Unsupported.  */
index 0f6d6b77ea1ae729c397d4db6a3778fabe9ebdfa..f0ddbf9660c2e161336f26210b214d2fc7d1f420 100644 (file)
@@ -6042,12 +6042,14 @@ vect_recog_gather_scatter_pattern (vec_info *vinfo,
 
          tree vec_els
            = vect_get_mask_load_else (elsval, TREE_TYPE (gs_vectype));
-         pattern_stmt = gimple_build_call_internal (gs_info.ifn, 6, base,
+         pattern_stmt = gimple_build_call_internal (gs_info.ifn, 7, base,
+                                                    gs_info.alias_ptr,
                                                     offset, scale, zero, mask,
                                                     vec_els);
        }
       else
-       pattern_stmt = gimple_build_call_internal (gs_info.ifn, 4, base,
+       pattern_stmt = gimple_build_call_internal (gs_info.ifn, 5, base,
+                                                  gs_info.alias_ptr,
                                                   offset, scale, zero);
       tree lhs = gimple_get_lhs (stmt_info->stmt);
       tree load_lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
@@ -6057,12 +6059,13 @@ vect_recog_gather_scatter_pattern (vec_info *vinfo,
     {
       tree rhs = vect_get_store_rhs (stmt_info);
       if (mask != NULL)
-       pattern_stmt = gimple_build_call_internal (gs_info.ifn, 5,
-                                                  base, offset, scale, rhs,
-                                                  mask);
+       pattern_stmt = gimple_build_call_internal (gs_info.ifn, 6,
+                                                  base, gs_info.alias_ptr,
+                                                  offset, scale, rhs, mask);
       else
-       pattern_stmt = gimple_build_call_internal (gs_info.ifn, 4,
-                                                  base, offset, scale, rhs);
+       pattern_stmt = gimple_build_call_internal (gs_info.ifn, 5,
+                                                  base, gs_info.alias_ptr,
+                                                  offset, scale, rhs);
     }
   gimple_call_set_nothrow (pattern_stmt, true);
 
index cfa841b3ce232e2641f92a8630082de1aa271527..2ef4050e2d1ed7c4d09bdcff82f61db68ffdee4d 100644 (file)
@@ -511,11 +511,11 @@ vect_def_types_match (enum vect_def_type dta, enum vect_def_type dtb)
 
 static const int no_arg_map[] = { 0 };
 static const int arg0_map[] = { 1, 0 };
-static const int arg1_map[] = { 1, 1 };
+static const int arg2_map[] = { 1, 2 };
 static const int arg2_arg3_map[] = { 2, 2, 3 };
-static const int arg1_arg3_map[] = { 2, 1, 3 };
-static const int arg1_arg4_arg5_map[] = { 3, 1, 4, 5 };
-static const int arg1_arg3_arg4_map[] = { 3, 1, 3, 4 };
+static const int arg2_arg4_map[] = { 2, 2, 4 };
+static const int arg2_arg5_arg6_map[] = { 3, 2, 5, 6 };
+static const int arg2_arg4_arg5_map[] = { 3, 2, 4, 5 };
 static const int arg3_arg2_map[] = { 2, 3, 2 };
 static const int op1_op0_map[] = { 2, 1, 0 };
 static const int off_map[] = { 1, GATHER_SCATTER_OFFSET };
@@ -570,18 +570,18 @@ vect_get_operand_map (const gimple *stmt, bool gather_scatter_p = false,
            return gather_scatter_p ? off_arg2_arg3_map : arg2_arg3_map;
 
          case IFN_GATHER_LOAD:
-           return arg1_map;
+           return arg2_map;
 
          case IFN_MASK_GATHER_LOAD:
          case IFN_MASK_LEN_GATHER_LOAD:
-           return arg1_arg4_arg5_map;
+           return arg2_arg5_arg6_map;
 
          case IFN_SCATTER_STORE:
-           return arg1_arg3_map;
+           return arg2_arg4_map;
 
          case IFN_MASK_SCATTER_STORE:
          case IFN_MASK_LEN_SCATTER_STORE:
-           return arg1_arg3_arg4_map;
+           return arg2_arg4_arg5_map;
 
          case IFN_MASK_STORE:
            return gather_scatter_p ? off_arg3_arg2_map : arg3_arg2_map;
index 5b8168fdea14184031bec47174b900c4f5039794..e720331ffbe7594616e56ead6a25092d0d5e1e00 100644 (file)
@@ -1803,6 +1803,9 @@ vect_truncate_gather_scatter_offset (stmt_vec_info stmt_info,
       /* Logically the sum of DR_BASE_ADDRESS, DR_INIT and DR_OFFSET,
         but we don't need to store that here.  */
       gs_info->base = NULL_TREE;
+      gs_info->alias_ptr = build_int_cst
+       (reference_alias_ptr_type (DR_REF (dr)),
+        get_object_alignment (DR_REF (dr)));
       gs_info->element_type = TREE_TYPE (vectype);
       gs_info->offset = fold_convert (offset_type, step);
       gs_info->offset_dt = vect_constant_def;
@@ -2106,7 +2109,7 @@ get_group_load_store_type (vec_info *vinfo, stmt_vec_info stmt_info,
        separated by the stride, until we have a complete vector.
        Fall back to scalar accesses if that isn't possible.  */
     *memory_access_type = VMAT_STRIDED_SLP;
-  else
+  else if (!STMT_VINFO_GATHER_SCATTER_P (stmt_info))
     {
       int cmp = compare_step_with_zero (vinfo, stmt_info);
       if (cmp < 0)
@@ -2349,19 +2352,71 @@ get_group_load_store_type (vec_info *vinfo, stmt_vec_info stmt_info,
      allows us to use contiguous accesses.  */
   if ((*memory_access_type == VMAT_ELEMENTWISE
        || *memory_access_type == VMAT_STRIDED_SLP)
+      && !STMT_VINFO_GATHER_SCATTER_P (stmt_info)
       && single_element_p
       && SLP_TREE_LANES (slp_node) == 1
       && loop_vinfo
       && vect_use_strided_gather_scatters_p (stmt_info, loop_vinfo,
                                             masked_p, gs_info, elsvals))
     *memory_access_type = VMAT_GATHER_SCATTER;
+  else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
+    {
+      *memory_access_type = VMAT_GATHER_SCATTER;
+      if (!vect_check_gather_scatter (stmt_info, loop_vinfo, gs_info,
+                                     elsvals))
+       gcc_unreachable ();
+      /* When using internal functions, we rely on pattern recognition
+        to convert the type of the offset to the type that the target
+        requires, with the result being a call to an internal function.
+        If that failed for some reason (e.g. because another pattern
+        took priority), just handle cases in which the offset already
+        has the right type.  */
+      else if (GATHER_SCATTER_IFN_P (*gs_info)
+              && !is_gimple_call (stmt_info->stmt)
+              && !tree_nop_conversion_p (TREE_TYPE (gs_info->offset),
+                                         TREE_TYPE (gs_info->offset_vectype)))
+       {
+         if (dump_enabled_p ())
+           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+                            "%s offset requires a conversion\n",
+                            vls_type == VLS_LOAD ? "gather" : "scatter");
+         return false;
+       }
+      else if (!vect_is_simple_use (gs_info->offset, vinfo,
+                                   &gs_info->offset_dt,
+                                   &gs_info->offset_vectype))
+       {
+         if (dump_enabled_p ())
+           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+                            "%s index use not simple.\n",
+                            vls_type == VLS_LOAD ? "gather" : "scatter");
+         return false;
+       }
+      else if (GATHER_SCATTER_EMULATED_P (*gs_info))
+       {
+         if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant ()
+             || !TYPE_VECTOR_SUBPARTS (gs_info->offset_vectype).is_constant ()
+             || VECTOR_BOOLEAN_TYPE_P (gs_info->offset_vectype)
+             || !constant_multiple_p (TYPE_VECTOR_SUBPARTS
+                                        (gs_info->offset_vectype),
+                                      TYPE_VECTOR_SUBPARTS (vectype)))
+           {
+             if (dump_enabled_p ())
+               dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+                                "unsupported vector types for emulated "
+                                "gather.\n");
+             return false;
+           }
+       }
+    }
 
   if (*memory_access_type == VMAT_CONTIGUOUS_DOWN
       || *memory_access_type == VMAT_CONTIGUOUS_REVERSE)
     *poffset = neg_ldst_offset;
 
-  if (*memory_access_type == VMAT_GATHER_SCATTER
-      || *memory_access_type == VMAT_ELEMENTWISE
+  if (*memory_access_type == VMAT_ELEMENTWISE
+      || (*memory_access_type == VMAT_GATHER_SCATTER
+         && GATHER_SCATTER_LEGACY_P (*gs_info))
       || *memory_access_type == VMAT_STRIDED_SLP
       || *memory_access_type == VMAT_INVARIANT)
     {
@@ -2370,10 +2425,15 @@ get_group_load_store_type (vec_info *vinfo, stmt_vec_info stmt_info,
     }
   else
     {
-      *misalignment = dr_misalignment (first_dr_info, vectype, *poffset);
+      if (*memory_access_type == VMAT_GATHER_SCATTER
+         && !first_dr_info)
+       *misalignment = DR_MISALIGNMENT_UNKNOWN;
+      else
+       *misalignment = dr_misalignment (first_dr_info, vectype, *poffset);
       *alignment_support_scheme
-       = vect_supportable_dr_alignment (vinfo, first_dr_info, vectype,
-                                        *misalignment);
+       = vect_supportable_dr_alignment
+          (vinfo, first_dr_info, vectype, *misalignment,
+           *memory_access_type == VMAT_GATHER_SCATTER ? gs_info : nullptr);
     }
 
   if (vls_type != VLS_LOAD && first_stmt_info == stmt_info)
@@ -2443,58 +2503,12 @@ get_load_store_type (vec_info  *vinfo, stmt_vec_info stmt_info,
   poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
   *misalignment = DR_MISALIGNMENT_UNKNOWN;
   *poffset = 0;
-  if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
-    {
-      *memory_access_type = VMAT_GATHER_SCATTER;
-      if (!vect_check_gather_scatter (stmt_info, loop_vinfo, gs_info,
-                                     elsvals))
-       gcc_unreachable ();
-      /* When using internal functions, we rely on pattern recognition
-        to convert the type of the offset to the type that the target
-        requires, with the result being a call to an internal function.
-        If that failed for some reason (e.g. because another pattern
-        took priority), just handle cases in which the offset already
-        has the right type.  */
-      else if (GATHER_SCATTER_IFN_P (*gs_info)
-              && !is_gimple_call (stmt_info->stmt)
-              && !tree_nop_conversion_p (TREE_TYPE (gs_info->offset),
-                                         TREE_TYPE (gs_info->offset_vectype)))
-       {
-         if (dump_enabled_p ())
-           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
-                            "%s offset requires a conversion\n",
-                            vls_type == VLS_LOAD ? "gather" : "scatter");
-         return false;
-       }
-      slp_tree offset_node = SLP_TREE_CHILDREN (slp_node)[0];
-      gs_info->offset_dt = SLP_TREE_DEF_TYPE (offset_node);
-      gs_info->offset_vectype = SLP_TREE_VECTYPE (offset_node);
-      if (gs_info->ifn == IFN_LAST && !gs_info->decl)
-       {
-         if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant ()
-             || !TYPE_VECTOR_SUBPARTS (gs_info->offset_vectype).is_constant ()
-             || VECTOR_BOOLEAN_TYPE_P (gs_info->offset_vectype)
-             || !constant_multiple_p (TYPE_VECTOR_SUBPARTS
-                                        (gs_info->offset_vectype),
-                                      TYPE_VECTOR_SUBPARTS (vectype)))
-           {
-             if (dump_enabled_p ())
-               dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
-                                "unsupported vector types for emulated "
-                                "gather.\n");
-             return false;
-           }
-       }
-      /* Gather-scatter accesses perform only component accesses, alignment
-        is irrelevant for them.  */
-      *alignment_support_scheme = dr_unaligned_supported;
-    }
-  else if (!get_group_load_store_type (vinfo, stmt_info, vectype, slp_node,
-                                      masked_p,
-                                      vls_type, memory_access_type, poffset,
-                                      alignment_support_scheme,
-                                      misalignment, gs_info, lanes_ifn,
-                                      elsvals))
+  if (!get_group_load_store_type (vinfo, stmt_info, vectype, slp_node,
+                                 masked_p,
+                                 vls_type, memory_access_type, poffset,
+                                 alignment_support_scheme,
+                                 misalignment, gs_info, lanes_ifn,
+                                 elsvals))
     return false;
 
   if ((*memory_access_type == VMAT_ELEMENTWISE
@@ -2528,17 +2542,18 @@ get_load_store_type (vec_info  *vinfo, stmt_vec_info stmt_info,
                           "alignment. With non-contiguous memory vectorization"
                           " could read out of bounds at %G ",
                           STMT_VINFO_STMT (stmt_info));
-       if (inbounds)
-         LOOP_VINFO_MUST_USE_PARTIAL_VECTORS_P (loop_vinfo) = true;
-       else
-         return false;
+      if (inbounds)
+       LOOP_VINFO_MUST_USE_PARTIAL_VECTORS_P (loop_vinfo) = true;
+      else
+       return false;
     }
 
   /* If this DR needs alignment for correctness, we must ensure the target
      alignment is a constant power-of-two multiple of the amount read per
      vector iteration or force masking.  */
   if (dr_safe_speculative_read_required (stmt_info)
-      && *alignment_support_scheme == dr_aligned)
+      && (*alignment_support_scheme == dr_aligned
+         && *memory_access_type != VMAT_GATHER_SCATTER))
     {
       /* We can only peel for loops, of course.  */
       gcc_checking_assert (loop_vinfo);
@@ -8178,7 +8193,6 @@ vectorizable_store (vec_info *vinfo,
 
       if (dump_enabled_p ()
          && memory_access_type != VMAT_ELEMENTWISE
-         && memory_access_type != VMAT_GATHER_SCATTER
          && memory_access_type != VMAT_STRIDED_SLP
          && memory_access_type != VMAT_INVARIANT
          && alignment_support_scheme != dr_aligned)
@@ -8878,24 +8892,31 @@ vectorizable_store (vec_info *vinfo,
                {
                  if (VECTOR_TYPE_P (TREE_TYPE (vec_offset)))
                    call = gimple_build_call_internal (
-                           IFN_MASK_LEN_SCATTER_STORE, 7, dataref_ptr,
+                           IFN_MASK_LEN_SCATTER_STORE, 8, dataref_ptr,
+                           gs_info.alias_ptr,
                            vec_offset, scale, vec_oprnd, final_mask, final_len,
                            bias);
                  else
                    /* Non-vector offset indicates that prefer to take
                       MASK_LEN_STRIDED_STORE instead of the
-                      IFN_MASK_SCATTER_STORE with direct stride arg.  */
+                      IFN_MASK_SCATTER_STORE with direct stride arg.
+                      Similar to the gather case we have checked the
+                      alignment for a scatter already and assume
+                      that the strided store has the same requirements.  */
                    call = gimple_build_call_internal (
                            IFN_MASK_LEN_STRIDED_STORE, 6, dataref_ptr,
                            vec_offset, vec_oprnd, final_mask, final_len, bias);
                }
              else if (final_mask)
                call = gimple_build_call_internal
-                            (IFN_MASK_SCATTER_STORE, 5, dataref_ptr,
+                            (IFN_MASK_SCATTER_STORE, 6, dataref_ptr,
+                             gs_info.alias_ptr,
                              vec_offset, scale, vec_oprnd, final_mask);
              else
-               call = gimple_build_call_internal (IFN_SCATTER_STORE, 4,
-                                                  dataref_ptr, vec_offset,
+               call = gimple_build_call_internal (IFN_SCATTER_STORE, 5,
+                                                  dataref_ptr,
+                                                  gs_info.alias_ptr,
+                                                  vec_offset,
                                                   scale, vec_oprnd);
              gimple_call_set_nothrow (call, true);
              vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
@@ -10362,7 +10383,6 @@ vectorizable_load (vec_info *vinfo,
       vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
     }
 
-  gcc_assert (alignment_support_scheme);
   vec_loop_masks *loop_masks
     = (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
        ? &LOOP_VINFO_MASKS (loop_vinfo)
@@ -10382,10 +10402,12 @@ vectorizable_load (vec_info *vinfo,
 
   /* Targets with store-lane instructions must not require explicit
      realignment.  vect_supportable_dr_alignment always returns either
-     dr_aligned or dr_unaligned_supported for masked operations.  */
+     dr_aligned or dr_unaligned_supported for (non-length) masked
+     operations.  */
   gcc_assert ((memory_access_type != VMAT_LOAD_STORE_LANES
               && !mask
               && !loop_masks)
+             || memory_access_type == VMAT_GATHER_SCATTER
              || alignment_support_scheme == dr_aligned
              || alignment_support_scheme == dr_unaligned_supported);
 
@@ -10730,8 +10752,6 @@ vectorizable_load (vec_info *vinfo,
 
   if (memory_access_type == VMAT_GATHER_SCATTER)
     {
-      gcc_assert (alignment_support_scheme == dr_aligned
-                 || alignment_support_scheme == dr_unaligned_supported);
       gcc_assert (!grouped_load && !slp_perm);
 
       unsigned int inside_cost = 0, prologue_cost = 0;
@@ -10820,7 +10840,8 @@ vectorizable_load (vec_info *vinfo,
                {
                  if (VECTOR_TYPE_P (TREE_TYPE (vec_offset)))
                    call = gimple_build_call_internal (IFN_MASK_LEN_GATHER_LOAD,
-                                                      8, dataref_ptr,
+                                                      9, dataref_ptr,
+                                                      gs_info.alias_ptr,
                                                       vec_offset, scale, zero,
                                                       final_mask, vec_els,
                                                       final_len, bias);
@@ -10835,13 +10856,15 @@ vectorizable_load (vec_info *vinfo,
                }
              else if (final_mask)
                call = gimple_build_call_internal (IFN_MASK_GATHER_LOAD,
-                                                  6, dataref_ptr,
+                                                  7, dataref_ptr,
+                                                  gs_info.alias_ptr,
                                                   vec_offset, scale,
                                                   zero, final_mask, vec_els);
              else
-               call = gimple_build_call_internal (IFN_GATHER_LOAD, 4,
-                                                  dataref_ptr, vec_offset,
-                                                  scale, zero);
+               call = gimple_build_call_internal (IFN_GATHER_LOAD, 5,
+                                                  dataref_ptr,
+                                                  gs_info.alias_ptr,
+                                                  vec_offset, scale, zero);
              gimple_call_set_nothrow (call, true);
              new_stmt = call;
              data_ref = NULL_TREE;
index 0f9b529fcea559abad467f5c297e7dff9a219d51..c94418f2d7ce6062a05fe86069e8a8df383842cb 100644 (file)
@@ -1557,6 +1557,10 @@ struct gather_scatter_info {
   /* The loop-invariant base value.  */
   tree base;
 
+  /* The TBBA alias pointer the value of which determines the alignment
+     of the scalar accesses.  */
+  tree alias_ptr;
+
   /* The original scalar offset, which is a non-loop-invariant SSA_NAME.  */
   tree offset;
 
@@ -2542,7 +2546,8 @@ extern bool ref_within_array_bound (gimple *, tree);
 /* In tree-vect-data-refs.cc.  */
 extern bool vect_can_force_dr_alignment_p (const_tree, poly_uint64);
 extern enum dr_alignment_support vect_supportable_dr_alignment
-                                  (vec_info *, dr_vec_info *, tree, int);
+                                  (vec_info *, dr_vec_info *, tree, int,
+                                    gather_scatter_info * = nullptr);
 extern tree vect_get_smallest_scalar_type (stmt_vec_info, tree);
 extern opt_result vect_analyze_data_ref_dependences (loop_vec_info, unsigned int *);
 extern bool vect_slp_analyze_instance_dependence (vec_info *, slp_instance);