Vect: Add support for dot-product where the sign for the multiplicant changes.

author Tamar Christina <tamar.christina@arm.com>

Wed, 14 Jul 2021 13:54:26 +0000 (14:54 +0100)

committer Tamar Christina <tamar.christina@arm.com>

Wed, 14 Jul 2021 13:54:26 +0000 (14:54 +0100)
author Tamar Christina <tamar.christina@arm.com>
Wed, 14 Jul 2021 13:54:26 +0000 (14:54 +0100)
committer Tamar Christina <tamar.christina@arm.com>
Wed, 14 Jul 2021 13:54:26 +0000 (14:54 +0100)
diff --git a/gcc/doc/md.texi b/gcc/doc/md.texi

index 8225a768a5ccd1f2e710fc03df200da4915013a8..07681e2ad2926cbf9bae4f7463c37d2830c42dee 100644 (file)
--- a/gcc/doc/md.texi
+++ b/gcc/doc/md.texi
@@ -5449,13 +5449,53 @@ Like @samp{fold_left_plus_@var{m}}, but takes an additional mask operand
  
  @cindex @code{sdot_prod@var{m}} instruction pattern
  @item @samp{sdot_prod@var{m}}
+
+Compute the sum of the products of two signed elements.
+Operand 1 and operand 2 are of the same mode. Their
+product, which is of a wider mode, is computed and added to operand 3.
+Operand 3 is of a mode equal or wider than the mode of the product. The
+result is placed in operand 0, which is of the same mode as operand 3.
+
+Semantically the expressions perform the multiplication in the following signs
+
+@smallexample
+sdot<signed op0, signed op1, signed op2, signed op3> ==
+   op0 = sign-ext (op1) * sign-ext (op2) + op3
+@dots{}
+@end smallexample
+
  @cindex @code{udot_prod@var{m}} instruction pattern
-@itemx @samp{udot_prod@var{m}}
-Compute the sum of the products of two signed/unsigned elements.
-Operand 1 and operand 2 are of the same mode. Their product, which is of a
-wider mode, is computed and added to operand 3. Operand 3 is of a mode equal or
-wider than the mode of the product. The result is placed in operand 0, which
-is of the same mode as operand 3.
+@item @samp{udot_prod@var{m}}
+
+Compute the sum of the products of two unsigned elements.
+Operand 1 and operand 2 are of the same mode. Their
+product, which is of a wider mode, is computed and added to operand 3.
+Operand 3 is of a mode equal or wider than the mode of the product. The
+result is placed in operand 0, which is of the same mode as operand 3.
+
+Semantically the expressions perform the multiplication in the following signs
+
+@smallexample
+udot<unsigned op0, unsigned op1, unsigned op2, unsigned op3> ==
+   op0 = zero-ext (op1) * zero-ext (op2) + op3
+@dots{}
+@end smallexample
+
+@cindex @code{usdot_prod@var{m}} instruction pattern
+@item @samp{usdot_prod@var{m}}
+Compute the sum of the products of elements of different signs.
+Operand 1 must be unsigned and operand 2 signed. Their
+product, which is of a wider mode, is computed and added to operand 3.
+Operand 3 is of a mode equal or wider than the mode of the product. The
+result is placed in operand 0, which is of the same mode as operand 3.
+
+Semantically the expressions perform the multiplication in the following signs
+
+@smallexample
+usdot<signed op0, unsigned op1, signed op2, signed op3> ==
+   op0 = ((signed-conv) zero-ext (op1)) * sign-ext (op2) + op3
+@dots{}
+@end smallexample
  
  @cindex @code{ssad@var{m}} instruction pattern
  @item @samp{ssad@var{m}}
diff --git a/gcc/optabs-tree.c b/gcc/optabs-tree.c

index 95ffe397c23e80c105afea52e9d47216bf52f55a..eeb5aeed3202cc6971b6447994bc5311e9c010bb 100644 (file)
--- a/gcc/optabs-tree.c
+++ b/gcc/optabs-tree.c
@@ -127,7 +127,12 @@ optab_for_tree_code (enum tree_code code, const_tree type,
        return TYPE_UNSIGNED (type) ? usum_widen_optab : ssum_widen_optab;
  
      case DOT_PROD_EXPR:
-      return TYPE_UNSIGNED (type) ? udot_prod_optab : sdot_prod_optab;
+      {
+       if (subtype == optab_vector_mixed_sign)
+         return usdot_prod_optab;
+
+       return (TYPE_UNSIGNED (type) ? udot_prod_optab : sdot_prod_optab);
+      }
  
      case SAD_EXPR:
        return TYPE_UNSIGNED (type) ? usad_optab : ssad_optab;
diff --git a/gcc/optabs-tree.h b/gcc/optabs-tree.h

index c3aaa1a416991e856d3e24da45968a92ebada82c..fbd2b06b8dbfd560dfb66b314830e6b564b37abb 100644 (file)
--- a/gcc/optabs-tree.h
+++ b/gcc/optabs-tree.h
@@ -29,7 +29,8 @@ enum optab_subtype
  {
    optab_default,
    optab_scalar,
-  optab_vector
+  optab_vector,
+  optab_vector_mixed_sign
  };
  
  /* Return the optab used for computing the given operation on the type given by
diff --git a/gcc/optabs.c b/gcc/optabs.c

index 62a6bdb4c59bf8263c499245795576199606d372..14d8ad2f33fd75388435fe912380e177f8f3c54b 100644 (file)
--- a/gcc/optabs.c
+++ b/gcc/optabs.c
@@ -262,6 +262,11 @@ expand_widen_pattern_expr (sepops ops, rtx op0, rtx op1, rtx wide_op,
    bool sbool = false;
  
    oprnd0 = ops->op0;
+  if (nops >= 2)
+    oprnd1 = ops->op1;
+  if (nops >= 3)
+    oprnd2 = ops->op2;
+
    tmode0 = TYPE_MODE (TREE_TYPE (oprnd0));
    if (ops->code == VEC_UNPACK_FIX_TRUNC_HI_EXPR
        || ops->code == VEC_UNPACK_FIX_TRUNC_LO_EXPR)
@@ -285,6 +290,27 @@ expand_widen_pattern_expr (sepops ops, rtx op0, rtx op1, rtx wide_op,
            ? vec_unpacks_sbool_hi_optab : vec_unpacks_sbool_lo_optab);
        sbool = true;
      }
+  else if (ops->code == DOT_PROD_EXPR)
+    {
+      enum optab_subtype subtype = optab_default;
+      signop sign1 = TYPE_SIGN (TREE_TYPE (oprnd0));
+      signop sign2 = TYPE_SIGN (TREE_TYPE (oprnd1));
+      if (sign1 == sign2)
+       ;
+      else if (sign1 == SIGNED && sign2 == UNSIGNED)
+       {
+         subtype = optab_vector_mixed_sign;
+         /* Same as optab_vector_mixed_sign but flip the operands.  */
+         std::swap (op0, op1);
+       }
+      else if (sign1 == UNSIGNED && sign2 == SIGNED)
+       subtype = optab_vector_mixed_sign;
+      else
+       gcc_unreachable ();
+
+      widen_pattern_optab
+       = optab_for_tree_code (ops->code, TREE_TYPE (oprnd0), subtype);
+    }
    else
      widen_pattern_optab
        = optab_for_tree_code (ops->code, TREE_TYPE (oprnd0), optab_default);
@@ -298,10 +324,7 @@ expand_widen_pattern_expr (sepops ops, rtx op0, rtx op1, rtx wide_op,
    gcc_assert (icode != CODE_FOR_nothing);
  
    if (nops >= 2)
-    {
-      oprnd1 = ops->op1;
-      tmode1 = TYPE_MODE (TREE_TYPE (oprnd1));
-    }
+    tmode1 = TYPE_MODE (TREE_TYPE (oprnd1));
    else if (sbool)
      {
        nops = 2;
@@ -316,7 +339,6 @@ expand_widen_pattern_expr (sepops ops, rtx op0, rtx op1, rtx wide_op,
      {
        gcc_assert (tmode1 == tmode0);
        gcc_assert (op1);
-      oprnd2 = ops->op2;
        wmode = TYPE_MODE (TREE_TYPE (oprnd2));
      }
  
diff --git a/gcc/optabs.def b/gcc/optabs.def

index 51acc1be8f55fa46ee8be6f1577bdfb5eda1440e..201b8aae1c03d4e63628a43bce9cf5d25ac0e912 100644 (file)
--- a/gcc/optabs.def
+++ b/gcc/optabs.def
@@ -352,6 +352,7 @@ OPTAB_D (uavg_ceil_optab, "uavg$a3_ceil")
  OPTAB_D (sdot_prod_optab, "sdot_prod$I$a")
  OPTAB_D (ssum_widen_optab, "widen_ssum$I$a3")
  OPTAB_D (udot_prod_optab, "udot_prod$I$a")
+OPTAB_D (usdot_prod_optab, "usdot_prod$I$a")
  OPTAB_D (usum_widen_optab, "widen_usum$I$a3")
  OPTAB_D (usad_optab, "usad$I$a")
  OPTAB_D (ssad_optab, "ssad$I$a")
diff --git a/gcc/tree-cfg.c b/gcc/tree-cfg.c

index 1f0f4a2c6eb2c8229d46a3881c658046d67eeaf1..28208477b6a25f4bd38ede74884aa8a57c2e3566 100644 (file)
--- a/gcc/tree-cfg.c
+++ b/gcc/tree-cfg.c
@@ -4438,7 +4438,8 @@ verify_gimple_assign_ternary (gassign *stmt)
                   && !SCALAR_FLOAT_TYPE_P (rhs1_type))
                  || (!INTEGRAL_TYPE_P (lhs_type)
                      && !SCALAR_FLOAT_TYPE_P (lhs_type))))
-           || !types_compatible_p (rhs1_type, rhs2_type)
+           /* rhs1_type and rhs2_type may differ in sign.  */
+           || !tree_nop_conversion_p (rhs1_type, rhs2_type)
             || !useless_type_conversion_p (lhs_type, rhs3_type)
             || maybe_lt (GET_MODE_SIZE (element_mode (rhs3_type)),
                          2 * GET_MODE_SIZE (element_mode (rhs1_type))))
diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c

index e9780158a51d22c10aeb393abbb3f0246a9576b7..fc3dab0d143a18572ebec197893fdf82bffbcabc 100644 (file)
--- a/gcc/tree-vect-loop.c
+++ b/gcc/tree-vect-loop.c
@@ -6661,6 +6661,12 @@ vectorizable_reduction (loop_vec_info loop_vinfo,
    bool lane_reduc_code_p
      = (code == DOT_PROD_EXPR || code == WIDEN_SUM_EXPR || code == SAD_EXPR);
    int op_type = TREE_CODE_LENGTH (code);
+  enum optab_subtype optab_query_kind = optab_vector;
+  if (code == DOT_PROD_EXPR
+      && TYPE_SIGN (TREE_TYPE (gimple_assign_rhs1 (stmt)))
+          != TYPE_SIGN (TREE_TYPE (gimple_assign_rhs2 (stmt))))
+    optab_query_kind = optab_vector_mixed_sign;
+
  
    scalar_dest = gimple_assign_lhs (stmt);
    scalar_type = TREE_TYPE (scalar_dest);
@@ -7189,7 +7195,7 @@ vectorizable_reduction (loop_vec_info loop_vinfo,
        bool ok = true;
  
        /* 4.1. check support for the operation in the loop  */
-      optab optab = optab_for_tree_code (code, vectype_in, optab_vector);
+      optab optab = optab_for_tree_code (code, vectype_in, optab_query_kind);
        if (!optab)
         {
           if (dump_enabled_p ())
diff --git a/gcc/tree-vect-patterns.c b/gcc/tree-vect-patterns.c

index b2e7fc2cc7adad72697b8d76deb0448d0b03e0a8..71533e61c934c63dd05a33c8f7159185e9b11a1b 100644 (file)
--- a/gcc/tree-vect-patterns.c
+++ b/gcc/tree-vect-patterns.c
@@ -191,9 +191,9 @@ vect_get_external_def_edge (vec_info *vinfo, tree var)
  }
  
  /* Return true if the target supports a vector version of CODE,
-   where CODE is known to map to a direct optab.  ITYPE specifies
-   the type of (some of) the scalar inputs and OTYPE specifies the
-   type of the scalar result.
+   where CODE is known to map to a direct optab with the given SUBTYPE.
+   ITYPE specifies the type of (some of) the scalar inputs and OTYPE
+   specifies the type of the scalar result.
  
     If CODE allows the inputs and outputs to have different type
     (such as for WIDEN_SUM_EXPR), it is the input mode rather
@@ -208,7 +208,8 @@ vect_get_external_def_edge (vec_info *vinfo, tree var)
  static bool
  vect_supportable_direct_optab_p (vec_info *vinfo, tree otype, tree_code code,
                                  tree itype, tree *vecotype_out,
-                                tree *vecitype_out = NULL)
+                                tree *vecitype_out = NULL,
+                                enum optab_subtype subtype = optab_default)
  {
    tree vecitype = get_vectype_for_scalar_type (vinfo, itype);
    if (!vecitype)
@@ -218,7 +219,7 @@ vect_supportable_direct_optab_p (vec_info *vinfo, tree otype, tree_code code,
    if (!vecotype)
      return false;
  
-  optab optab = optab_for_tree_code (code, vecitype, optab_default);
+  optab optab = optab_for_tree_code (code, vecitype, subtype);
    if (!optab)
      return false;
  
@@ -521,6 +522,7 @@ vect_joust_widened_type (tree type, tree new_type, tree *common_type)
    unsigned int precision = MAX (TYPE_PRECISION (*common_type),
                                 TYPE_PRECISION (new_type));
    precision *= 2;
+
    if (precision * 2 > TYPE_PRECISION (type))
      return false;
  
@@ -539,6 +541,10 @@ vect_joust_widened_type (tree type, tree new_type, tree *common_type)
     to a type that (a) is narrower than the result of STMT_INFO and
     (b) can hold all leaf operand values.
  
+   If SUBTYPE then allow that the signs of the operands
+   may differ in signs but not in precision.  SUBTYPE is updated to reflect
+   this.
+
     Return 0 if STMT_INFO isn't such a tree, or if no such COMMON_TYPE
     exists.  */
  
@@ -546,7 +552,8 @@ static unsigned int
  vect_widened_op_tree (vec_info *vinfo, stmt_vec_info stmt_info, tree_code code,
                       tree_code widened_code, bool shift_p,
                       unsigned int max_nops,
-                     vect_unpromoted_value *unprom, tree *common_type)
+                     vect_unpromoted_value *unprom, tree *common_type,
+                     enum optab_subtype *subtype = NULL)
  {
    /* Check for an integer operation with the right code.  */
    gassign *assign = dyn_cast <gassign *> (stmt_info->stmt);
@@ -607,7 +614,8 @@ vect_widened_op_tree (vec_info *vinfo, stmt_vec_info stmt_info, tree_code code,
                 = vinfo->lookup_def (this_unprom->op);
               nops = vect_widened_op_tree (vinfo, def_stmt_info, code,
                                            widened_code, shift_p, max_nops,
-                                          this_unprom, common_type);
+                                          this_unprom, common_type,
+                                          subtype);
               if (nops == 0)
                 return 0;
  
@@ -625,7 +633,18 @@ vect_widened_op_tree (vec_info *vinfo, stmt_vec_info stmt_info, tree_code code,
                 *common_type = this_unprom->type;
               else if (!vect_joust_widened_type (type, this_unprom->type,
                                                  common_type))
-               return 0;
+               {
+                 if (subtype)
+                   {
+                     /* See if we can sign extend the smaller type.  */
+                     if (TYPE_PRECISION (this_unprom->type)
+                         > TYPE_PRECISION (*common_type))
+                       *common_type = this_unprom->type;
+                     *subtype = optab_vector_mixed_sign;
+                   }
+                 else
+                   return 0;
+               }
             }
         }
        next_op += nops;
@@ -725,12 +744,22 @@ vect_split_statement (vec_info *vinfo, stmt_vec_info stmt2_info, tree new_rhs,
  
  /* Convert UNPROM to TYPE and return the result, adding new statements
     to STMT_INFO's pattern definition statements if no better way is
-   available.  VECTYPE is the vector form of TYPE.  */
+   available.  VECTYPE is the vector form of TYPE.
+
+   If SUBTYPE then convert the type based on the subtype.  */
  
  static tree
  vect_convert_input (vec_info *vinfo, stmt_vec_info stmt_info, tree type,
-                   vect_unpromoted_value *unprom, tree vectype)
+                   vect_unpromoted_value *unprom, tree vectype,
+                   enum optab_subtype subtype = optab_default)
  {
+
+  /* Update the type if the signs differ.  */
+  if (subtype == optab_vector_mixed_sign
+      && TYPE_SIGN (type) != TYPE_SIGN (TREE_TYPE (unprom->op)))
+    type = build_nonstandard_integer_type (TYPE_PRECISION (type),
+                                          TYPE_SIGN (unprom->type));
+
    /* Check for a no-op conversion.  */
    if (types_compatible_p (type, TREE_TYPE (unprom->op)))
      return unprom->op;
@@ -806,12 +835,14 @@ vect_convert_input (vec_info *vinfo, stmt_vec_info stmt_info, tree type,
  }
  
  /* Invoke vect_convert_input for N elements of UNPROM and store the
-   result in the corresponding elements of RESULT.  */
+   result in the corresponding elements of RESULT.
+
+   If SUBTYPE then convert the type based on the subtype.  */
  
  static void
  vect_convert_inputs (vec_info *vinfo, stmt_vec_info stmt_info, unsigned int n,
                      tree *result, tree type, vect_unpromoted_value *unprom,
-                    tree vectype)
+                    tree vectype, enum optab_subtype subtype = optab_default)
  {
    for (unsigned int i = 0; i < n; ++i)
      {
@@ -819,11 +850,12 @@ vect_convert_inputs (vec_info *vinfo, stmt_vec_info stmt_info, unsigned int n,
        for (j = 0; j < i; ++j)
         if (unprom[j].op == unprom[i].op)
           break;
+
        if (j < i)
         result[i] = result[j];
        else
         result[i] = vect_convert_input (vinfo, stmt_info,
-                                       type, &unprom[i], vectype);
+                                       type, &unprom[i], vectype, subtype);
      }
  }
  
@@ -895,7 +927,8 @@ vect_reassociating_reduction_p (vec_info *vinfo,
  
     Try to find the following pattern:
  
-     type x_t, y_t;
+     type1a x_t
+     type1b y_t;
       TYPE1 prod;
       TYPE2 sum = init;
     loop:
@@ -908,9 +941,9 @@ vect_reassociating_reduction_p (vec_info *vinfo,
       [S6  prod = (TYPE2) prod;  #optional]
       S7  sum_1 = prod + sum_0;
  
-   where 'TYPE1' is exactly double the size of type 'type', and 'TYPE2' is the
-   same size of 'TYPE1' or bigger. This is a special case of a reduction
-   computation.
+   where 'TYPE1' is exactly double the size of type 'type1a' and 'type1b',
+   the sign of 'TYPE1' must be one of 'type1a' or 'type1b' but the sign of
+   'type1a' and 'type1b' can differ.
  
     Input:
  
@@ -953,7 +986,8 @@ vect_recog_dot_prod_pattern (vec_info *vinfo,
       In which
       - DX is double the size of X
       - DY is double the size of Y
-     - DX, DY, DPROD all have the same type
+     - DX, DY, DPROD all have the same type but the sign
+       between X, Y and DPROD can differ.
       - sum is the same size of DPROD or bigger
       - sum has been recognized as a reduction variable.
  
@@ -991,8 +1025,18 @@ vect_recog_dot_prod_pattern (vec_info *vinfo,
    /* FORNOW.  Can continue analyzing the def-use chain when this stmt in a phi
       inside the loop (in case we are analyzing an outer-loop).  */
    vect_unpromoted_value unprom0[2];
+  enum optab_subtype subtype = optab_vector;
    if (!vect_widened_op_tree (vinfo, mult_vinfo, MULT_EXPR, WIDEN_MULT_EXPR,
-                            false, 2, unprom0, &half_type))
+                            false, 2, unprom0, &half_type, &subtype))
+    return NULL;
+
+  /* If there are two widening operations, make sure they agree on the sign
+     of the extension.  The result of an optab_vector_mixed_sign operation
+     is signed; otherwise, the result has the same sign as the operands.  */
+  if (TYPE_PRECISION (unprom_mult.type) != TYPE_PRECISION (type)
+      && (subtype == optab_vector_mixed_sign
+       ? TYPE_UNSIGNED (unprom_mult.type)
+       : TYPE_SIGN (unprom_mult.type) != TYPE_SIGN (half_type)))
      return NULL;
  
    /* If there are two widening operations, make sure they agree on
@@ -1005,13 +1049,13 @@ vect_recog_dot_prod_pattern (vec_info *vinfo,
  
    tree half_vectype;
    if (!vect_supportable_direct_optab_p (vinfo, type, DOT_PROD_EXPR, half_type,
-                                       type_out, &half_vectype))
+                                       type_out, &half_vectype, subtype))
      return NULL;
  
    /* Get the inputs in the appropriate types.  */
    tree mult_oprnd[2];
    vect_convert_inputs (vinfo, stmt_vinfo, 2, mult_oprnd, half_type,
-                      unprom0, half_vectype);
+                      unprom0, half_vectype, subtype);
  
    var = vect_recog_temp_ssa_var (type, NULL);
    pattern_stmt = gimple_build_assign (var, DOT_PROD_EXPR,
author	Tamar Christina <tamar.christina@arm.com>
	Wed, 14 Jul 2021 13:54:26 +0000 (14:54 +0100)
committer	Tamar Christina <tamar.christina@arm.com>
	Wed, 14 Jul 2021 13:54:26 +0000 (14:54 +0100)
gcc/doc/md.texi		patch \| blob \| blame \| history
gcc/optabs-tree.c		patch \| blob \| blame \| history
gcc/optabs-tree.h		patch \| blob \| blame \| history
gcc/optabs.c		patch \| blob \| blame \| history
gcc/optabs.def		patch \| blob \| blame \| history
gcc/tree-cfg.c		patch \| blob \| blame \| history
gcc/tree-vect-loop.c		patch \| blob \| blame \| history
gcc/tree-vect-patterns.c		patch \| blob \| blame \| history