]> git.ipfire.org Git - thirdparty/gcc.git/commitdiff
tree-optimization/108752 - vectorize emulated vectors in lowered form
authorRichard Biener <rguenther@suse.de>
Fri, 10 Feb 2023 12:09:10 +0000 (13:09 +0100)
committerRichard Biener <rguenther@suse.de>
Fri, 28 Apr 2023 09:05:53 +0000 (11:05 +0200)
The following makes sure to emit operations lowered to bit operations
when vectorizing using emulated vectors.  This avoids relying on
the vector lowering pass adhering to the exact same cost considerations
as the vectorizer.

PR tree-optimization/108752
* tree-vect-generic.cc (build_replicated_const): Rename
to build_replicated_int_cst and move to tree.{h,cc}.
(do_plus_minus): Adjust.
(do_negate): Likewise.
* tree-vect-stmts.cc (vectorizable_operation): Emit emulated
arithmetic vector operations in lowered form.
* tree.h (build_replicated_int_cst): Declare.
* tree.cc (build_replicated_int_cst): Moved from
tree-vect-generic.cc build_replicated_const.

gcc/tree-vect-generic.cc
gcc/tree-vect-stmts.cc
gcc/tree.cc
gcc/tree.h

index 445da53292e9d1d2db62ca962fc017bb0e6c9bbe..59115b2e1629358e85cb770f6da04cc5a2adb27a 100644 (file)
@@ -103,35 +103,6 @@ subparts_gt (tree type1, tree type2)
   return known_gt (n1, n2);
 }
 
-/* Build a constant of type TYPE, made of VALUE's bits replicated
-   every WIDTH bits to fit TYPE's precision.  */
-static tree
-build_replicated_const (tree type, unsigned int width, HOST_WIDE_INT value)
-{
-  int n = (TYPE_PRECISION (type) + HOST_BITS_PER_WIDE_INT - 1) 
-    / HOST_BITS_PER_WIDE_INT;
-  unsigned HOST_WIDE_INT low, mask;
-  HOST_WIDE_INT a[WIDE_INT_MAX_ELTS];
-  int i;
-
-  gcc_assert (n && n <= WIDE_INT_MAX_ELTS);
-
-  if (width == HOST_BITS_PER_WIDE_INT)
-    low = value;
-  else
-    {
-      mask = ((HOST_WIDE_INT)1 << width) - 1;
-      low = (unsigned HOST_WIDE_INT) ~0 / mask * (value & mask);
-    }
-
-  for (i = 0; i < n; i++)
-    a[i] = low;
-
-  gcc_assert (TYPE_PRECISION (type) <= MAX_BITSIZE_MODE_ANY_INT);
-  return wide_int_to_tree
-    (type, wide_int::from_array (a, n, TYPE_PRECISION (type)));
-}
-
 static GTY(()) tree vector_inner_type;
 static GTY(()) tree vector_last_type;
 static GTY(()) int vector_last_nunits;
@@ -255,8 +226,8 @@ do_plus_minus (gimple_stmt_iterator *gsi, tree word_type, tree a, tree b,
   tree low_bits, high_bits, a_low, b_low, result_low, signs;
 
   max = GET_MODE_MASK (TYPE_MODE (inner_type));
-  low_bits = build_replicated_const (word_type, width, max >> 1);
-  high_bits = build_replicated_const (word_type, width, max & ~(max >> 1));
+  low_bits = build_replicated_int_cst (word_type, width, max >> 1);
+  high_bits = build_replicated_int_cst (word_type, width, max & ~(max >> 1));
 
   a = tree_vec_extract (gsi, word_type, a, bitsize, bitpos);
   b = tree_vec_extract (gsi, word_type, b, bitsize, bitpos);
@@ -289,8 +260,8 @@ do_negate (gimple_stmt_iterator *gsi, tree word_type, tree b,
   tree low_bits, high_bits, b_low, result_low, signs;
 
   max = GET_MODE_MASK (TYPE_MODE (inner_type));
-  low_bits = build_replicated_const (word_type, width, max >> 1);
-  high_bits = build_replicated_const (word_type, width, max & ~(max >> 1));
+  low_bits = build_replicated_int_cst (word_type, width, max >> 1);
+  high_bits = build_replicated_int_cst (word_type, width, max & ~(max >> 1));
 
   b = tree_vec_extract (gsi, word_type, b, bitsize, bitpos);
 
index 272839a658cfb0db94ebc065dfcf1416484baeff..dc2dc2cfa7e94cfc6b9ef7679d9ca3d9a0b58f4c 100644 (file)
@@ -6134,7 +6134,6 @@ vectorizable_shift (vec_info *vinfo,
   return true;
 }
 
-
 /* Function vectorizable_operation.
 
    Check if STMT_INFO performs a binary, unary or ternary operation that can
@@ -6405,20 +6404,6 @@ vectorizable_operation (vec_info *vinfo,
       return false;
     }
 
-  /* ???  We should instead expand the operations here, instead of
-     relying on vector lowering which has this hard cap on the number
-     of vector elements below it performs elementwise operations.  */
-  if (using_emulated_vectors_p
-      && (code == PLUS_EXPR || code == MINUS_EXPR || code == NEGATE_EXPR)
-      && ((BITS_PER_WORD / vector_element_bits (vectype)) < 4
-         || maybe_lt (nunits_out, 4U)))
-    {
-      if (dump_enabled_p ())
-       dump_printf (MSG_NOTE, "not using word mode for +- and less than "
-                    "four vector elements\n");
-      return false;
-    }
-
   int reduc_idx = STMT_VINFO_REDUC_IDX (stmt_info);
   vec_loop_masks *masks = (loop_vinfo ? &LOOP_VINFO_MASKS (loop_vinfo) : NULL);
   internal_fn cond_fn = get_conditional_internal_fn (code);
@@ -6581,7 +6566,96 @@ vectorizable_operation (vec_info *vinfo,
       vop1 = ((op_type == binary_op || op_type == ternary_op)
              ? vec_oprnds1[i] : NULL_TREE);
       vop2 = ((op_type == ternary_op) ? vec_oprnds2[i] : NULL_TREE);
-      if (masked_loop_p && mask_out_inactive)
+      if (using_emulated_vectors_p
+         && (code == PLUS_EXPR || code == MINUS_EXPR || code == NEGATE_EXPR))
+       {
+         /* Lower the operation.  This follows vector lowering.  */
+         unsigned int width = vector_element_bits (vectype);
+         tree inner_type = TREE_TYPE (vectype);
+         tree word_type
+           = build_nonstandard_integer_type (GET_MODE_BITSIZE (word_mode), 1);
+         HOST_WIDE_INT max = GET_MODE_MASK (TYPE_MODE (inner_type));
+         tree low_bits = build_replicated_int_cst (word_type, width, max >> 1);
+         tree high_bits
+           = build_replicated_int_cst (word_type, width, max & ~(max >> 1));
+         tree wvop0 = make_ssa_name (word_type);
+         new_stmt = gimple_build_assign (wvop0, VIEW_CONVERT_EXPR,
+                                         build1 (VIEW_CONVERT_EXPR,
+                                                 word_type, vop0));
+         vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
+         tree result_low, signs;
+         if (code == PLUS_EXPR || code == MINUS_EXPR)
+           {
+             tree wvop1 = make_ssa_name (word_type);
+             new_stmt = gimple_build_assign (wvop1, VIEW_CONVERT_EXPR,
+                                             build1 (VIEW_CONVERT_EXPR,
+                                                     word_type, vop1));
+             vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
+             signs = make_ssa_name (word_type);
+             new_stmt = gimple_build_assign (signs,
+                                             BIT_XOR_EXPR, wvop0, wvop1);
+             vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
+             tree b_low = make_ssa_name (word_type);
+             new_stmt = gimple_build_assign (b_low,
+                                             BIT_AND_EXPR, wvop1, low_bits);
+             vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
+             tree a_low = make_ssa_name (word_type);
+             if (code == PLUS_EXPR)
+               new_stmt = gimple_build_assign (a_low,
+                                               BIT_AND_EXPR, wvop0, low_bits);
+             else
+               new_stmt = gimple_build_assign (a_low,
+                                               BIT_IOR_EXPR, wvop0, high_bits);
+             vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
+             if (code == MINUS_EXPR)
+               {
+                 new_stmt = gimple_build_assign (NULL_TREE,
+                                                 BIT_NOT_EXPR, signs);
+                 signs = make_ssa_name (word_type);
+                 gimple_assign_set_lhs (new_stmt, signs);
+                 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
+               }
+             new_stmt = gimple_build_assign (NULL_TREE,
+                                             BIT_AND_EXPR, signs, high_bits);
+             signs = make_ssa_name (word_type);
+             gimple_assign_set_lhs (new_stmt, signs);
+             vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
+             result_low = make_ssa_name (word_type);
+             new_stmt = gimple_build_assign (result_low, code, a_low, b_low);
+             vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
+           }
+         else
+           {
+             tree a_low = make_ssa_name (word_type);
+             new_stmt = gimple_build_assign (a_low,
+                                             BIT_AND_EXPR, wvop0, low_bits);
+             vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
+             signs = make_ssa_name (word_type);
+             new_stmt = gimple_build_assign (signs, BIT_NOT_EXPR, wvop0);
+             vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
+             new_stmt = gimple_build_assign (NULL_TREE,
+                                             BIT_AND_EXPR, signs, high_bits);
+             signs = make_ssa_name (word_type);
+             gimple_assign_set_lhs (new_stmt, signs);
+             vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
+             result_low = make_ssa_name (word_type);
+             new_stmt = gimple_build_assign (result_low,
+                                             MINUS_EXPR, high_bits, a_low);
+             vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
+           }
+         new_stmt = gimple_build_assign (NULL_TREE, BIT_XOR_EXPR, result_low,
+                                         signs);
+         result_low = make_ssa_name (word_type);
+         gimple_assign_set_lhs (new_stmt, result_low);
+         vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
+         new_stmt = gimple_build_assign (NULL_TREE, VIEW_CONVERT_EXPR,
+                                         build1 (VIEW_CONVERT_EXPR,
+                                                 vectype, result_low));
+         result_low = make_ssa_name (vectype);
+         gimple_assign_set_lhs (new_stmt, result_low);
+         vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
+       }
+      else if (masked_loop_p && mask_out_inactive)
        {
          tree mask = vect_get_loop_mask (gsi, masks, vec_num * ncopies,
                                          vectype, i);
index ead4248b8e5c449185922273ebf8c9db3aa35076..7e6de2888861e68352ad884e0014fa15d08f7c7d 100644 (file)
@@ -2667,6 +2667,36 @@ build_zero_cst (tree type)
     }
 }
 
+/* Build a constant of integer type TYPE, made of VALUE's bits replicated
+   every WIDTH bits to fit TYPE's precision.  */
+
+tree
+build_replicated_int_cst (tree type, unsigned int width, HOST_WIDE_INT value)
+{
+  int n = (TYPE_PRECISION (type) + HOST_BITS_PER_WIDE_INT - 1)
+    / HOST_BITS_PER_WIDE_INT;
+  unsigned HOST_WIDE_INT low, mask;
+  HOST_WIDE_INT a[WIDE_INT_MAX_ELTS];
+  int i;
+
+  gcc_assert (n && n <= WIDE_INT_MAX_ELTS);
+
+  if (width == HOST_BITS_PER_WIDE_INT)
+    low = value;
+  else
+    {
+      mask = ((HOST_WIDE_INT)1 << width) - 1;
+      low = (unsigned HOST_WIDE_INT) ~0 / mask * (value & mask);
+    }
+
+  for (i = 0; i < n; i++)
+    a[i] = low;
+
+  gcc_assert (TYPE_PRECISION (type) <= MAX_BITSIZE_MODE_ANY_INT);
+  return wide_int_to_tree
+    (type, wide_int::from_array (a, n, TYPE_PRECISION (type)));
+}
+
 /* If floating-point type TYPE has an IEEE-style sign bit, return an
    unsigned constant in which only the sign bit is set.  Return null
    otherwise.  */
index dc94c17db76269faf44e14b3206feb4b23156014..0b72663e6a1a94406127f6253460f498b7a3ea9c 100644 (file)
@@ -4685,6 +4685,7 @@ extern tree build_one_cst (tree);
 extern tree build_minus_one_cst (tree);
 extern tree build_all_ones_cst (tree);
 extern tree build_zero_cst (tree);
+extern tree build_replicated_int_cst (tree, unsigned, HOST_WIDE_INT);
 extern tree sign_mask_for (tree);
 extern tree build_string (unsigned, const char * = NULL);
 extern tree build_poly_int_cst (tree, const poly_wide_int_ref &);