]> git.ipfire.org Git - thirdparty/gcc.git/commitdiff
vect: Move VMAT_GATHER_SCATTER handlings from final loop nest
authorKewen Lin <linkw@linux.ibm.com>
Wed, 23 Aug 2023 05:09:14 +0000 (00:09 -0500)
committerKewen Lin <linkw@linux.ibm.com>
Wed, 23 Aug 2023 05:09:14 +0000 (00:09 -0500)
Like r14-3317 which moves the handlings on memory access
type VMAT_GATHER_SCATTER in vectorizable_load final loop
nest, this one is to deal with vectorizable_store side.

gcc/ChangeLog:

* tree-vect-stmts.cc (vectorizable_store): Move the handlings on
VMAT_GATHER_SCATTER in the final loop nest to its own loop,
and update the final nest accordingly.

gcc/tree-vect-stmts.cc

index bd8a4b9ab88fd819d3342f0c08d6a19185cf8259..413a88750d629dac6203350b8d270212f028caba 100644 (file)
@@ -8920,44 +8920,23 @@ vectorizable_store (vec_info *vinfo,
       return true;
     }
 
-  auto_vec<tree> result_chain (group_size);
-  auto_vec<tree> vec_offsets;
-  auto_vec<tree, 1> vec_oprnds;
-  for (j = 0; j < ncopies; j++)
+  if (memory_access_type == VMAT_GATHER_SCATTER)
     {
-      gimple *new_stmt;
-      if (j == 0)
+      gcc_assert (!slp && !grouped_store);
+      auto_vec<tree> vec_offsets;
+      for (j = 0; j < ncopies; j++)
        {
-         if (slp)
-           {
-             /* Get vectorized arguments for SLP_NODE.  */
-             vect_get_vec_defs (vinfo, stmt_info, slp_node, 1, op,
-                                &vec_oprnds);
-             vec_oprnd = vec_oprnds[0];
-           }
-         else
+         gimple *new_stmt;
+         if (j == 0)
            {
-             /* For interleaved stores we collect vectorized defs for all the
-                stores in the group in DR_CHAIN. DR_CHAIN is then used as an
-                input to vect_permute_store_chain().
-
-                If the store is not grouped, DR_GROUP_SIZE is 1, and DR_CHAIN
-                is of size 1.  */
-             stmt_vec_info next_stmt_info = first_stmt_info;
-             for (i = 0; i < group_size; i++)
-               {
-                 /* Since gaps are not supported for interleaved stores,
-                    DR_GROUP_SIZE is the exact number of stmts in the chain.
-                    Therefore, NEXT_STMT_INFO can't be NULL_TREE.  In case
-                    that there is no interleaving, DR_GROUP_SIZE is 1,
-                    and only one iteration of the loop will be executed.  */
-                 op = vect_get_store_rhs (next_stmt_info);
-                 vect_get_vec_defs_for_operand (vinfo, next_stmt_info, ncopies,
-                                                op, gvec_oprnds[i]);
-                 vec_oprnd = (*gvec_oprnds[i])[0];
-                 dr_chain.quick_push (vec_oprnd);
-                 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
-               }
+             /* Since the store is not grouped, DR_GROUP_SIZE is 1, and
+                DR_CHAIN is of size 1.  */
+             gcc_assert (group_size == 1);
+             op = vect_get_store_rhs (first_stmt_info);
+             vect_get_vec_defs_for_operand (vinfo, first_stmt_info, ncopies,
+                                            op, gvec_oprnds[0]);
+             vec_oprnd = (*gvec_oprnds[0])[0];
+             dr_chain.quick_push (vec_oprnd);
              if (mask)
                {
                  vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
@@ -8965,91 +8944,55 @@ vectorizable_store (vec_info *vinfo,
                                                 mask_vectype);
                  vec_mask = vec_masks[0];
                }
-           }
 
-         /* We should have catched mismatched types earlier.  */
-         gcc_assert (useless_type_conversion_p (vectype,
-                                                TREE_TYPE (vec_oprnd)));
-         bool simd_lane_access_p
-           = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) != 0;
-         if (simd_lane_access_p
-             && !loop_masks
-             && TREE_CODE (DR_BASE_ADDRESS (first_dr_info->dr)) == ADDR_EXPR
-             && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr_info->dr), 0))
-             && integer_zerop (get_dr_vinfo_offset (vinfo, first_dr_info))
-             && integer_zerop (DR_INIT (first_dr_info->dr))
-             && alias_sets_conflict_p (get_alias_set (aggr_type),
-                                       get_alias_set (TREE_TYPE (ref_type))))
-           {
-             dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr_info->dr));
-             dataref_offset = build_int_cst (ref_type, 0);
+             /* We should have catched mismatched types earlier.  */
+             gcc_assert (useless_type_conversion_p (vectype,
+                                                    TREE_TYPE (vec_oprnd)));
+             if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
+               vect_get_gather_scatter_ops (loop_vinfo, loop, stmt_info,
+                                            slp_node, &gs_info, &dataref_ptr,
+                                            &vec_offsets);
+             else
+               dataref_ptr
+                 = vect_create_data_ref_ptr (vinfo, first_stmt_info, aggr_type,
+                                             NULL, offset, &dummy, gsi,
+                                             &ptr_incr, false, bump);
            }
-         else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
-           vect_get_gather_scatter_ops (loop_vinfo, loop, stmt_info, slp_node,
-                                        &gs_info, &dataref_ptr, &vec_offsets);
          else
-           dataref_ptr
-             = vect_create_data_ref_ptr (vinfo, first_stmt_info, aggr_type,
-                                         simd_lane_access_p ? loop : NULL,
-                                         offset, &dummy, gsi, &ptr_incr,
-                                         simd_lane_access_p, bump);
-       }
-      else
-       {
-         gcc_assert (!LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo));
-         /* DR_CHAIN is then used as an input to vect_permute_store_chain().
-            If the store is not grouped, DR_GROUP_SIZE is 1, and DR_CHAIN is
-            of size 1.  */
-         for (i = 0; i < group_size; i++)
            {
-             vec_oprnd = (*gvec_oprnds[i])[j];
-             dr_chain[i] = vec_oprnd;
+             gcc_assert (!LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo));
+             vec_oprnd = (*gvec_oprnds[0])[j];
+             dr_chain[0] = vec_oprnd;
+             if (mask)
+               vec_mask = vec_masks[j];
+             if (!STMT_VINFO_GATHER_SCATTER_P (stmt_info))
+               dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr,
+                                              gsi, stmt_info, bump);
            }
-         if (mask)
-           vec_mask = vec_masks[j];
-         if (dataref_offset)
-           dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset, bump);
-         else if (!STMT_VINFO_GATHER_SCATTER_P (stmt_info))
-           dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
-                                          stmt_info, bump);
-       }
-
-      new_stmt = NULL;
-      if (grouped_store)
-       /* Permute.  */
-       vect_permute_store_chain (vinfo, dr_chain, group_size, stmt_info, gsi,
-                                 &result_chain);
 
-      stmt_vec_info next_stmt_info = first_stmt_info;
-      for (i = 0; i < vec_num; i++)
-       {
-         unsigned misalign;
+         new_stmt = NULL;
          unsigned HOST_WIDE_INT align;
-
          tree final_mask = NULL_TREE;
          tree final_len = NULL_TREE;
          tree bias = NULL_TREE;
          if (loop_masks)
            final_mask = vect_get_loop_mask (loop_vinfo, gsi, loop_masks,
-                                            vec_num * ncopies, vectype,
-                                            vec_num * j + i);
+                                            ncopies, vectype, j);
          if (vec_mask)
            final_mask = prepare_vec_mask (loop_vinfo, mask_vectype, final_mask,
                                           vec_mask, gsi);
 
-         if (memory_access_type == VMAT_GATHER_SCATTER
-             && gs_info.ifn != IFN_LAST)
+         if (gs_info.ifn != IFN_LAST)
            {
              if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
-               vec_offset = vec_offsets[vec_num * j + i];
+               vec_offset = vec_offsets[j];
              tree scale = size_int (gs_info.scale);
 
              if (gs_info.ifn == IFN_MASK_LEN_SCATTER_STORE)
                {
                  if (loop_lens)
                    final_len = vect_get_loop_len (loop_vinfo, gsi, loop_lens,
-                                                  vec_num * ncopies, vectype,
-                                                  vec_num * j + i, 1);
+                                                  ncopies, vectype, j, 1);
                  else
                    final_len = build_int_cst (sizetype,
                                               TYPE_VECTOR_SUBPARTS (vectype));
@@ -9081,9 +9024,8 @@ vectorizable_store (vec_info *vinfo,
              gimple_call_set_nothrow (call, true);
              vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
              new_stmt = call;
-             break;
            }
-         else if (memory_access_type == VMAT_GATHER_SCATTER)
+         else
            {
              /* Emulated scatter.  */
              gcc_assert (!final_mask);
@@ -9132,8 +9074,126 @@ vectorizable_store (vec_info *vinfo,
                  new_stmt = gimple_build_assign (ref, elt);
                  vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
                }
-             break;
            }
+         if (j == 0)
+           *vec_stmt = new_stmt;
+         STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
+       }
+      return true;
+    }
+
+  auto_vec<tree> result_chain (group_size);
+  auto_vec<tree, 1> vec_oprnds;
+  for (j = 0; j < ncopies; j++)
+    {
+      gimple *new_stmt;
+      if (j == 0)
+       {
+         if (slp)
+           {
+             /* Get vectorized arguments for SLP_NODE.  */
+             vect_get_vec_defs (vinfo, stmt_info, slp_node, 1, op,
+                                &vec_oprnds);
+             vec_oprnd = vec_oprnds[0];
+           }
+         else
+           {
+             /* For interleaved stores we collect vectorized defs for all the
+                stores in the group in DR_CHAIN. DR_CHAIN is then used as an
+                input to vect_permute_store_chain().
+
+                If the store is not grouped, DR_GROUP_SIZE is 1, and DR_CHAIN
+                is of size 1.  */
+             stmt_vec_info next_stmt_info = first_stmt_info;
+             for (i = 0; i < group_size; i++)
+               {
+                 /* Since gaps are not supported for interleaved stores,
+                    DR_GROUP_SIZE is the exact number of stmts in the chain.
+                    Therefore, NEXT_STMT_INFO can't be NULL_TREE.  In case
+                    that there is no interleaving, DR_GROUP_SIZE is 1,
+                    and only one iteration of the loop will be executed.  */
+                 op = vect_get_store_rhs (next_stmt_info);
+                 vect_get_vec_defs_for_operand (vinfo, next_stmt_info, ncopies,
+                                                op, gvec_oprnds[i]);
+                 vec_oprnd = (*gvec_oprnds[i])[0];
+                 dr_chain.quick_push (vec_oprnd);
+                 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
+               }
+             if (mask)
+               {
+                 vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
+                                                mask, &vec_masks,
+                                                mask_vectype);
+                 vec_mask = vec_masks[0];
+               }
+           }
+
+         /* We should have catched mismatched types earlier.  */
+         gcc_assert (useless_type_conversion_p (vectype,
+                                                TREE_TYPE (vec_oprnd)));
+         bool simd_lane_access_p
+           = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) != 0;
+         if (simd_lane_access_p
+             && !loop_masks
+             && TREE_CODE (DR_BASE_ADDRESS (first_dr_info->dr)) == ADDR_EXPR
+             && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr_info->dr), 0))
+             && integer_zerop (get_dr_vinfo_offset (vinfo, first_dr_info))
+             && integer_zerop (DR_INIT (first_dr_info->dr))
+             && alias_sets_conflict_p (get_alias_set (aggr_type),
+                                       get_alias_set (TREE_TYPE (ref_type))))
+           {
+             dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr_info->dr));
+             dataref_offset = build_int_cst (ref_type, 0);
+           }
+         else
+           dataref_ptr
+             = vect_create_data_ref_ptr (vinfo, first_stmt_info, aggr_type,
+                                         simd_lane_access_p ? loop : NULL,
+                                         offset, &dummy, gsi, &ptr_incr,
+                                         simd_lane_access_p, bump);
+       }
+      else
+       {
+         gcc_assert (!LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo));
+         /* DR_CHAIN is then used as an input to vect_permute_store_chain().
+            If the store is not grouped, DR_GROUP_SIZE is 1, and DR_CHAIN is
+            of size 1.  */
+         for (i = 0; i < group_size; i++)
+           {
+             vec_oprnd = (*gvec_oprnds[i])[j];
+             dr_chain[i] = vec_oprnd;
+           }
+         if (mask)
+           vec_mask = vec_masks[j];
+         if (dataref_offset)
+           dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset, bump);
+         else
+           dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
+                                          stmt_info, bump);
+       }
+
+      new_stmt = NULL;
+      if (grouped_store)
+       /* Permute.  */
+       vect_permute_store_chain (vinfo, dr_chain, group_size, stmt_info, gsi,
+                                 &result_chain);
+
+      stmt_vec_info next_stmt_info = first_stmt_info;
+      for (i = 0; i < vec_num; i++)
+       {
+         unsigned misalign;
+         unsigned HOST_WIDE_INT align;
+
+         tree final_mask = NULL_TREE;
+         tree final_len = NULL_TREE;
+         tree bias = NULL_TREE;
+         if (loop_masks)
+           final_mask = vect_get_loop_mask (loop_vinfo, gsi, loop_masks,
+                                            vec_num * ncopies, vectype,
+                                            vec_num * j + i);
+         if (vec_mask)
+           final_mask = prepare_vec_mask (loop_vinfo, mask_vectype, final_mask,
+                                          vec_mask, gsi);
 
          if (i > 0)
            /* Bump the vector pointer.  */