vect: Handle grouped accesses via gather/scatter.

author Robin Dapp <rdapp@ventanamicro.com>

Fri, 5 Sep 2025 14:19:18 +0000 (16:19 +0200)

committer Robin Dapp <rdapp@ventanamicro.com>

Mon, 13 Oct 2025 09:57:30 +0000 (11:57 +0200)
author Robin Dapp <rdapp@ventanamicro.com>
Fri, 5 Sep 2025 14:19:18 +0000 (16:19 +0200)
committer Robin Dapp <rdapp@ventanamicro.com>
Mon, 13 Oct 2025 09:57:30 +0000 (11:57 +0200)
diff --git a/gcc/internal-fn.cc b/gcc/internal-fn.cc

index bf2fac8180706ec418de7eb97cd1260f1d078c03..fb76e64e53e5e36112ae2aceddec22b464e64b70 100644 (file)
--- a/gcc/internal-fn.cc
+++ b/gcc/internal-fn.cc
@@ -5234,7 +5234,7 @@ get_supported_else_vals (enum insn_code icode, unsigned else_index,
                          vec<int> &else_vals)
  {
    const struct insn_data_d *data = &insn_data[icode];
-  if ((char)else_index >= data->n_operands)
+  if ((int) else_index >= data->n_operands || (int) else_index == -1)
      return;
  
    machine_mode else_mode = data->operand[else_index].mode;
@@ -5309,6 +5309,26 @@ internal_gather_scatter_fn_supported_p (internal_fn ifn, tree vector_type,
    return ok;
  }
  
+/* Return true if the target supports a strided load/store function IFN
+   with VECTOR_TYPE.  If supported and ELSVALS is nonzero the supported else
+   values will be added to the vector ELSVALS points to.  */
+
+bool
+internal_strided_fn_supported_p (internal_fn ifn, tree vector_type,
+                                vec<int> *elsvals)
+{
+  machine_mode mode = TYPE_MODE (vector_type);
+  optab optab = direct_internal_fn_optab (ifn);
+  insn_code icode = direct_optab_handler (optab, mode);
+
+  bool ok = icode != CODE_FOR_nothing;
+
+  if (ok && elsvals)
+    get_supported_else_vals (icode, internal_fn_else_index (ifn), *elsvals);
+
+  return ok;
+}
+
  /* Return true if the target supports IFN_CHECK_{RAW,WAR}_PTRS function IFN
     for pointers of type TYPE when the accesses have LENGTH bytes and their
     common byte alignment is ALIGN.  */
diff --git a/gcc/internal-fn.h b/gcc/internal-fn.h

index fd21694dfebfb8518810fd85f7aa8c45dd4c362e..dcb707251f89de47fdcf65268c73b53499a2f768 100644 (file)
--- a/gcc/internal-fn.h
+++ b/gcc/internal-fn.h
@@ -246,6 +246,8 @@ extern int internal_fn_alias_ptr_index (internal_fn fn);
  extern bool internal_gather_scatter_fn_supported_p (internal_fn, tree,
                                                     tree, tree, int,
                                                     vec<int> * = nullptr);
+extern bool internal_strided_fn_supported_p (internal_fn, tree,
+                                             vec<int> * = nullptr);
  extern bool internal_check_ptrs_fn_supported_p (internal_fn, tree,
                                                 poly_uint64, unsigned int);
  
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr118019-2.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr118019-2.c

new file mode 100644 (file)

index 0000000..c8c1a72
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr118019-2.c
@@ -0,0 +1,50 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -march=rv64gcv_zvl512b -mabi=lp64d -mno-vector-strict-align" } */
+
+/* Ensure we use strided loads.  */
+
+typedef unsigned char uint8_t;
+typedef unsigned short uint16_t;
+typedef unsigned int uint32_t;
+
+#define HADAMARD4(d0, d1, d2, d3, s0, s1, s2, s3) {\
+    int t0 = s0 + s1;\
+    int t1 = s0 - s1;\
+    int t2 = s2 + s3;\
+    int t3 = s2 - s3;\
+    d0 = t0 + t2;\
+    d2 = t0 - t2;\
+    d1 = t1 + t3;\
+    d3 = t1 - t3;\
+}
+
+uint32_t
+abs2 (uint32_t a)
+{
+  uint32_t s = ((a >> 15) & 0x10001) * 0xffff;
+  return (a + s) ^ s;
+}
+
+int
+x264_pixel_satd_8x4 (uint8_t *pix1, int i_pix1, uint8_t *pix2, int i_pix2)
+{
+  uint32_t tmp[4][4];
+  uint32_t a0, a1, a2, a3;
+  int sum = 0;
+  for (int i = 0; i < 4; i++, pix1 += i_pix1, pix2 += i_pix2)
+    {
+      a0 = (pix1[0] - pix2[0]) + ((pix1[4] - pix2[4]) << 16);
+      a1 = (pix1[1] - pix2[1]) + ((pix1[5] - pix2[5]) << 16);
+      a2 = (pix1[2] - pix2[2]) + ((pix1[6] - pix2[6]) << 16);
+      a3 = (pix1[3] - pix2[3]) + ((pix1[7] - pix2[7]) << 16);
+      HADAMARD4 (tmp[i][0], tmp[i][1], tmp[i][2], tmp[i][3], a0, a1, a2, a3);
+    }
+  for (int i = 0; i < 4; i++)
+    {
+      HADAMARD4 (a0, a1, a2, a3, tmp[0][i], tmp[1][i], tmp[2][i], tmp[3][i]);
+      sum += abs2 (a0) + abs2 (a1) + abs2 (a2) + abs2 (a3);
+    }
+  return (((uint16_t) sum) + ((uint32_t) sum >> 16)) >> 1;
+}
+
+/* { dg-final { scan-assembler-times "vlse64" 8 } } */
diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc

index 59e1d24e8d5da15dee85754a0224b1a48f5f9736..dcb25225b1be621dfe1ff9a38630dca161ee9d7f 100644 (file)
--- a/gcc/tree-vect-stmts.cc
+++ b/gcc/tree-vect-stmts.cc
@@ -62,6 +62,9 @@ along with GCC; see the file COPYING3.  If not see
  /* For lang_hooks.types.type_for_mode.  */
  #include "langhooks.h"
  
+static tree vector_vector_composition_type (tree, poly_uint64, tree *,
+                                           bool = false);
+
  /* Return TRUE iff the given statement is in an inner loop relative to
     the loop being vectorized.  */
  bool
@@ -1723,6 +1726,95 @@ vect_truncate_gather_scatter_offset (stmt_vec_info stmt_info, tree vectype,
    return false;
  }
  
+/* Return true if we can use gather/scatter or strided internal functions
+   to vectorize STMT_INFO, which is a grouped or strided load or store
+   with multiple lanes and will be implemented by a type-punned access
+   of a vector with element size that matches the number of lanes.
+
+   MASKED_P is true if load or store is conditional.
+   When returning true, fill in GS_INFO with the information required to
+   perform the operation.  Also, store the punning type in PUNNED_VECTYPE.
+
+   If successful and ELSVALS is nonzero the supported
+   else values will be stored in the vector ELSVALS points to.  */
+
+static bool
+vect_use_grouped_gather (dr_vec_info *dr_info, tree vectype,
+                        loop_vec_info loop_vinfo, bool masked_p,
+                        unsigned int nelts,
+                        gather_scatter_info *info, vec<int> *elsvals,
+                        tree *pun_vectype)
+{
+  data_reference *dr = dr_info->dr;
+
+  /* TODO: We can support nelts > BITS_PER_UNIT or non-power-of-two by
+     multiple gathers/scatter.  */
+  if (nelts > BITS_PER_UNIT || !pow2p_hwi (nelts))
+    return false;
+
+  /* Pun the vectype with one of the same size but an element spanning
+     NELTS elements of VECTYPE.
+     The punned type of a V16QI with NELTS = 4 would be V4SI.
+     */
+  tree tmp;
+  unsigned int pieces;
+  if (!can_div_trunc_p (TYPE_VECTOR_SUBPARTS (vectype), nelts, &pieces)
+      || !pieces)
+    return false;
+
+  *pun_vectype = vector_vector_composition_type (vectype, pieces, &tmp, true);
+
+  if (!*pun_vectype || !VECTOR_TYPE_P (*pun_vectype))
+    return false;
+
+  internal_fn ifn;
+  tree offset_vectype = *pun_vectype;
+
+  internal_fn strided_ifn = DR_IS_READ (dr)
+    ? IFN_MASK_LEN_STRIDED_LOAD : IFN_MASK_LEN_STRIDED_STORE;
+
+  /* Check if we have a gather/scatter with the new type.  We're just trying
+     with the type itself as offset for now.  If not, check if we have a
+     strided load/store.  These have fewer constraints (for example no offset
+     type must exist) so it is possible that even though a gather/scatter is
+     not available we still have a strided load/store.  */
+  bool ok = false;
+  if (vect_gather_scatter_fn_p
+      (loop_vinfo, DR_IS_READ (dr), masked_p, *pun_vectype,
+       TREE_TYPE (*pun_vectype), *pun_vectype, 1, &ifn,
+       &offset_vectype, elsvals))
+    ok = true;
+  else if (internal_strided_fn_supported_p (strided_ifn, *pun_vectype,
+                                           elsvals))
+    {
+      /* Use gather/scatter IFNs, vect_get_strided_load_store_ops
+        will switch back to the strided variants.  */
+      ifn = DR_IS_READ (dr) ? IFN_MASK_LEN_GATHER_LOAD :
+       IFN_MASK_LEN_SCATTER_STORE;
+      ok = true;
+    }
+
+  if (ok)
+    {
+      info->ifn = ifn;
+      info->decl = NULL_TREE;
+      info->base = dr->ref;
+      info->alias_ptr = build_int_cst
+       (reference_alias_ptr_type (DR_REF (dr)),
+        get_object_alignment (DR_REF (dr)));
+      info->element_type = TREE_TYPE (*pun_vectype);
+      info->offset_vectype = offset_vectype;
+      /* No need to set the offset, vect_get_strided_load_store_ops
+        will do that.  */
+      info->scale = 1;
+      info->memory_type = TREE_TYPE (DR_REF (dr));
+      return true;
+    }
+
+  return false;
+}
+
+
  /* Return true if we can use gather/scatter internal functions to
     vectorize STMT_INFO, which is a grouped or strided load or store.
     MASKED_P is true if load or store is conditional.  When returning
@@ -1888,12 +1980,14 @@ vect_get_store_rhs (stmt_vec_info stmt_info)
  
  /* Function VECTOR_VECTOR_COMPOSITION_TYPE
  
-   This function returns a vector type which can be composed with NETLS pieces,
+   This function returns a vector type which can be composed with NELTS pieces,
     whose type is recorded in PTYPE.  VTYPE should be a vector type, and has the
     same vector size as the return vector.  It checks target whether supports
     pieces-size vector mode for construction firstly, if target fails to, check
     pieces-size scalar mode for construction further.  It returns NULL_TREE if
-   fails to find the available composition.
+   fails to find the available composition.  If the caller only wants scalar
+   pieces where PTYPE e.g. is a possible gather/scatter element type
+   SCALAR_PTYPE_ONLY must be true.
  
     For example, for (vtype=V16QI, nelts=4), we can probably get:
       - V16QI with PTYPE V4QI.
@@ -1901,7 +1995,8 @@ vect_get_store_rhs (stmt_vec_info stmt_info)
       - NULL_TREE.  */
  
  static tree
-vector_vector_composition_type (tree vtype, poly_uint64 nelts, tree *ptype)
+vector_vector_composition_type (tree vtype, poly_uint64 nelts, tree *ptype,
+                               bool scalar_ptype_only)
  {
    gcc_assert (VECTOR_TYPE_P (vtype));
    gcc_assert (known_gt (nelts, 0U));
@@ -1927,7 +2022,8 @@ vector_vector_composition_type (tree vtype, poly_uint64 nelts, tree *ptype)
        scalar_mode elmode = SCALAR_TYPE_MODE (TREE_TYPE (vtype));
        poly_uint64 inelts = pbsize / GET_MODE_BITSIZE (elmode);
        machine_mode rmode;
-      if (related_vector_mode (vmode, elmode, inelts).exists (&rmode)
+      if (!scalar_ptype_only
+         && related_vector_mode (vmode, elmode, inelts).exists (&rmode)
           && (convert_optab_handler (vec_init_optab, vmode, rmode)
               != CODE_FOR_nothing))
         {
@@ -1938,12 +2034,15 @@ vector_vector_composition_type (tree vtype, poly_uint64 nelts, tree *ptype)
        /* Otherwise check if exists an integer type of the same piece size and
          if vec_init optab supports construction from it directly.  */
        if (int_mode_for_size (pbsize, 0).exists (&elmode)
-         && related_vector_mode (vmode, elmode, nelts).exists (&rmode)
-         && (convert_optab_handler (vec_init_optab, rmode, elmode)
-             != CODE_FOR_nothing))
+         && related_vector_mode (vmode, elmode, nelts).exists (&rmode))
         {
-         *ptype = build_nonstandard_integer_type (pbsize, 1);
-         return build_vector_type (*ptype, nelts);
+         if (scalar_ptype_only
+             || convert_optab_handler (vec_init_optab, rmode, elmode)
+             != CODE_FOR_nothing)
+           {
+             *ptype = build_nonstandard_integer_type (pbsize, 1);
+             return build_vector_type (*ptype, nelts);
+           }
         }
      }
  
@@ -1981,6 +2080,7 @@ get_load_store_type (vec_info  *vinfo, stmt_vec_info stmt_info,
    int *misalignment = &ls->misalignment;
    internal_fn *lanes_ifn = &ls->lanes_ifn;
    vec<int> *elsvals = &ls->elsvals;
+  tree *ls_type = &ls->ls_type;
    loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
    poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
    class loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
@@ -1992,6 +2092,7 @@ get_load_store_type (vec_info  *vinfo, stmt_vec_info stmt_info,
  
    *misalignment = DR_MISALIGNMENT_UNKNOWN;
    *poffset = 0;
+  *ls_type = NULL_TREE;
  
    if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
      {
@@ -2298,18 +2399,19 @@ get_load_store_type (vec_info  *vinfo, stmt_vec_info stmt_info,
       on nearby locations.  Or, even if it's a win over scalar code,
       it might not be a win over vectorizing at a lower VF, if that
       allows us to use contiguous accesses.  */
+  vect_memory_access_type grouped_gather_fallback = VMAT_UNINITIALIZED;
    if (loop_vinfo
        && (*memory_access_type == VMAT_ELEMENTWISE
           || *memory_access_type == VMAT_STRIDED_SLP)
-      && !STMT_VINFO_GATHER_SCATTER_P (stmt_info)
-      && SLP_TREE_LANES (slp_node) == 1
-      && (!SLP_TREE_LOAD_PERMUTATION (slp_node).exists ()
-         || single_element_p))
+      && !STMT_VINFO_GATHER_SCATTER_P (stmt_info))
      {
        gather_scatter_info gs_info;
-      if (vect_use_strided_gather_scatters_p (stmt_info, vectype, loop_vinfo,
-                                             masked_p, &gs_info, elsvals,
-                                             group_size, single_element_p))
+      if (SLP_TREE_LANES (slp_node) == 1
+         && (!SLP_TREE_LOAD_PERMUTATION (slp_node).exists ()
+             || single_element_p)
+         && vect_use_strided_gather_scatters_p (stmt_info, vectype, loop_vinfo,
+                                                masked_p, &gs_info, elsvals,
+                                                group_size, single_element_p))
         {
           SLP_TREE_GS_SCALE (slp_node) = gs_info.scale;
           SLP_TREE_GS_BASE (slp_node) = error_mark_node;
@@ -2317,6 +2419,22 @@ get_load_store_type (vec_info  *vinfo, stmt_vec_info stmt_info,
           ls->strided_offset_vectype = gs_info.offset_vectype;
           *memory_access_type = VMAT_GATHER_SCATTER_IFN;
         }
+      else if (SLP_TREE_LANES (slp_node) > 1
+              && !masked_p
+              && !single_element_p
+              && vect_use_grouped_gather (STMT_VINFO_DR_INFO (stmt_info),
+                                          vectype, loop_vinfo,
+                                          masked_p, group_size,
+                                          &gs_info, elsvals, ls_type))
+       {
+         SLP_TREE_GS_SCALE (slp_node) = gs_info.scale;
+         SLP_TREE_GS_BASE (slp_node) = error_mark_node;
+         grouped_gather_fallback = *memory_access_type;
+         *memory_access_type = VMAT_GATHER_SCATTER_IFN;
+         ls->gs.ifn = gs_info.ifn;
+         vectype = *ls_type;
+         ls->strided_offset_vectype = gs_info.offset_vectype;
+       }
      }
  
    if (*memory_access_type == VMAT_CONTIGUOUS_DOWN
@@ -2342,6 +2460,18 @@ get_load_store_type (vec_info  *vinfo, stmt_vec_info stmt_info,
         = vect_supportable_dr_alignment
            (vinfo, first_dr_info, vectype, *misalignment,
             mat_gather_scatter_p (*memory_access_type));
+      if (grouped_gather_fallback != VMAT_UNINITIALIZED
+         && *alignment_support_scheme != dr_aligned
+         && *alignment_support_scheme != dr_unaligned_supported)
+       {
+         /* No supportable alignment for a grouped gather, fall back to the
+            original memory access type.  Even though VMAT_STRIDED_SLP might
+            also try aligned vector loads it can still choose vector
+            construction from scalars.  */
+         *memory_access_type = grouped_gather_fallback;
+         *alignment_support_scheme = dr_unaligned_supported;
+         *misalignment = DR_MISALIGNMENT_UNKNOWN;
+       }
      }
  
    if (overrun_p)
@@ -8355,10 +8485,13 @@ vectorizable_store (vec_info *vinfo,
      {
        aggr_type = elem_type;
        if (!costing_p)
-       vect_get_strided_load_store_ops (stmt_info, slp_node, vectype,
-                                        ls.strided_offset_vectype,
-                                        loop_vinfo, gsi,
-                                        &bump, &vec_offset, loop_lens);
+       {
+         tree vtype = ls.ls_type ? ls.ls_type : vectype;
+         vect_get_strided_load_store_ops (stmt_info, slp_node, vtype,
+                                          ls.strided_offset_vectype,
+                                          loop_vinfo, gsi,
+                                          &bump, &vec_offset, loop_lens);
+       }
      }
    else
      {
@@ -8544,7 +8677,9 @@ vectorizable_store (vec_info *vinfo,
  
    if (mat_gather_scatter_p (memory_access_type))
      {
-      gcc_assert (!grouped_store);
+      gcc_assert (!grouped_store || ls.ls_type);
+      if (ls.ls_type)
+       vectype = ls.ls_type;
        auto_vec<tree> vec_offsets;
        unsigned int inside_cost = 0, prologue_cost = 0;
        int num_stmts = vec_num;
@@ -8591,8 +8726,9 @@ vectorizable_store (vec_info *vinfo,
               if (mask_node)
                 vec_mask = vec_masks[j];
               /* We should have catched mismatched types earlier.  */
-             gcc_assert (useless_type_conversion_p (vectype,
-                                                    TREE_TYPE (vec_oprnd)));
+             gcc_assert (ls.ls_type
+                         || useless_type_conversion_p
+                         (vectype, TREE_TYPE (vec_oprnd)));
             }
           tree final_mask = NULL_TREE;
           tree final_len = NULL_TREE;
@@ -8645,6 +8781,18 @@ vectorizable_store (vec_info *vinfo,
                     }
                 }
  
+             if (ls.ls_type)
+               {
+                 gimple *conv_stmt
+                   = gimple_build_assign (make_ssa_name (vectype),
+                                          VIEW_CONVERT_EXPR,
+                                          build1 (VIEW_CONVERT_EXPR, vectype,
+                                                  vec_oprnd));
+                 vect_finish_stmt_generation (vinfo, stmt_info, conv_stmt,
+                                              gsi);
+                 vec_oprnd = gimple_get_lhs (conv_stmt);
+               }
+
               gcall *call;
               if (final_len && final_mask)
                 {
@@ -10027,7 +10175,8 @@ vectorizable_load (vec_info *vinfo,
        return true;
      }
  
-  if (mat_gather_scatter_p (memory_access_type))
+  if (mat_gather_scatter_p (memory_access_type)
+      && !ls.ls_type)
      grouped_load = false;
  
    if (grouped_load
@@ -10085,6 +10234,7 @@ vectorizable_load (vec_info *vinfo,
         {
           group_gap_adj = group_size - scalar_lanes;
         }
+      dr_chain.create (vec_num);
  
        ref_type = get_group_alias_ptr_type (first_stmt_info);
      }
@@ -10418,7 +10568,14 @@ vectorizable_load (vec_info *vinfo,
  
    if (mat_gather_scatter_p (memory_access_type))
      {
-      gcc_assert (!grouped_load && !slp_perm);
+      gcc_assert ((!grouped_load && !slp_perm) || ls.ls_type);
+
+      /* If we pun the original vectype the loads as well as costing, length,
+        etc. is performed with the new type.  After loading we VIEW_CONVERT
+        the data to the original vectype.  */
+      tree original_vectype = vectype;
+      if (ls.ls_type)
+       vectype = ls.ls_type;
  
        /* 1. Create the vector or array pointer update chain.  */
        if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
@@ -10759,8 +10916,42 @@ vectorizable_load (vec_info *vinfo,
               new_temp = new_temp2;
             }
  
+         if (ls.ls_type)
+           {
+             new_stmt = gimple_build_assign (make_ssa_name
+                                             (original_vectype),
+                                             VIEW_CONVERT_EXPR,
+                                             build1 (VIEW_CONVERT_EXPR,
+                                                     original_vectype,
+                                                     new_temp));
+             vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
+           }
+
           /* Store vector loads in the corresponding SLP_NODE.  */
-         slp_node->push_vec_def (new_stmt);
+         if (!costing_p)
+           {
+             if (slp_perm)
+               dr_chain.quick_push (gimple_assign_lhs (new_stmt));
+             else
+               slp_node->push_vec_def (new_stmt);
+           }
+       }
+
+      if (slp_perm)
+       {
+         if (costing_p)
+           {
+             gcc_assert (ls.n_perms != -1U);
+             inside_cost += record_stmt_cost (cost_vec, ls.n_perms, vec_perm,
+                                              slp_node, 0, vect_body);
+           }
+         else
+           {
+             unsigned n_perms2;
+             vect_transform_slp_perm_load (vinfo, slp_node, dr_chain, gsi, vf,
+                                           false, &n_perms2);
+             gcc_assert (ls.n_perms == n_perms2);
+           }
         }
  
        if (costing_p && dump_enabled_p ())
diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h

index 7275348237200538ad3ca0c32f57676261f5ead1..52bc0d672bfffabef3ea577a0e650f5ba902c645 100644 (file)
--- a/gcc/tree-vectorizer.h
+++ b/gcc/tree-vectorizer.h
@@ -288,6 +288,7 @@ struct vect_load_store_data : vect_data {
        tree decl;       // VMAT_GATHER_SCATTER_DECL
    } gs;
    tree strided_offset_vectype; // VMAT_GATHER_SCATTER_IFN, originally strided
+  tree ls_type; // VMAT_GATHER_SCATTER_IFN
    auto_vec<int> elsvals;
    unsigned n_perms; // SLP_TREE_LOAD_PERMUTATION
  };
author	Robin Dapp <rdapp@ventanamicro.com>
	Fri, 5 Sep 2025 14:19:18 +0000 (16:19 +0200)
committer	Robin Dapp <rdapp@ventanamicro.com>
	Mon, 13 Oct 2025 09:57:30 +0000 (11:57 +0200)
gcc/internal-fn.cc		patch \| blob \| blame \| history
gcc/internal-fn.h		patch \| blob \| blame \| history
gcc/testsuite/gcc.target/riscv/rvv/autovec/pr118019-2.c	[new file with mode: 0644]	patch \| blob
gcc/tree-vect-stmts.cc		patch \| blob \| blame \| history
gcc/tree-vectorizer.h		patch \| blob \| blame \| history