Use intermiediate integer type for float_expr/fix_trunc_expr when direct optab is...

author liuhongt <hongtao.liu@intel.com>

Wed, 31 May 2023 03:20:46 +0000 (11:20 +0800)

committer liuhongt <hongtao.liu@intel.com>

Wed, 21 Jun 2023 02:16:07 +0000 (10:16 +0800)
author liuhongt <hongtao.liu@intel.com>
Wed, 31 May 2023 03:20:46 +0000 (11:20 +0800)
committer liuhongt <hongtao.liu@intel.com>
Wed, 21 Jun 2023 02:16:07 +0000 (10:16 +0800)
diff --git a/gcc/testsuite/gcc.target/i386/pr110018-1.c b/gcc/testsuite/gcc.target/i386/pr110018-1.c

new file mode 100644 (file)

index 0000000..b1baffd
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr110018-1.c
@@ -0,0 +1,94 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512fp16 -mavx512vl -O2 -mavx512dq" } */
+/* { dg-final { scan-assembler-times {(?n)vcvttp[dsh]2[dqw]} 5 } } */
+/* { dg-final { scan-assembler-times {(?n)vcvt[dqw]*2p[dsh]} 5 } } */
+
+void
+foo (double* __restrict a, char* b)
+{
+  a[0] = b[0];
+  a[1] = b[1];
+}
+
+void
+foo1 (float* __restrict a, char* b)
+{
+  a[0] = b[0];
+  a[1] = b[1];
+  a[2] = b[2];
+  a[3] = b[3];
+}
+
+void
+foo2 (_Float16* __restrict a, char* b)
+{
+  a[0] = b[0];
+  a[1] = b[1];
+  a[2] = b[2];
+  a[3] = b[3];
+  a[4] = b[4];
+  a[5] = b[5];
+  a[6] = b[6];
+  a[7] = b[7];
+}
+
+void
+foo3 (double* __restrict a, short* b)
+{
+  a[0] = b[0];
+  a[1] = b[1];
+}
+
+void
+foo4 (float* __restrict a, char* b)
+{
+  a[0] = b[0];
+  a[1] = b[1];
+  a[2] = b[2];
+  a[3] = b[3];
+}
+
+void
+foo5 (double* __restrict b, char* a)
+{
+  a[0] = b[0];
+  a[1] = b[1];
+}
+
+void
+foo6 (float* __restrict b, char* a)
+{
+  a[0] = b[0];
+  a[1] = b[1];
+  a[2] = b[2];
+  a[3] = b[3];
+}
+
+void
+foo7 (_Float16* __restrict b, char* a)
+{
+  a[0] = b[0];
+  a[1] = b[1];
+  a[2] = b[2];
+  a[3] = b[3];
+  a[4] = b[4];
+  a[5] = b[5];
+  a[6] = b[6];
+  a[7] = b[7];
+}
+
+void
+foo8 (double* __restrict b, short* a)
+{
+  a[0] = b[0];
+  a[1] = b[1];
+}
+
+void
+foo9 (float* __restrict b, char* a)
+{
+  a[0] = b[0];
+  a[1] = b[1];
+  a[2] = b[2];
+  a[3] = b[3];
+}
diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc

index 056a0ecb2be151dfdd65824d111e320f3b351b3c..ae24f3e66e63d9bd9763284a47fb2c911335c4c1 100644 (file)
--- a/gcc/tree-vect-stmts.cc
+++ b/gcc/tree-vect-stmts.cc
@@ -5041,7 +5041,7 @@ vectorizable_conversion (vec_info *vinfo,
    tree scalar_dest;
    tree op0, op1 = NULL_TREE;
    loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
-  tree_code tc1;
+  tree_code tc1, tc2;
    code_helper code, code1, code2;
    code_helper codecvt1 = ERROR_MARK, codecvt2 = ERROR_MARK;
    tree new_temp;
@@ -5249,6 +5249,57 @@ vectorizable_conversion (vec_info *vinfo,
         code1 = tc1;
         break;
        }
+
+      /* For conversions between float and smaller integer types try whether we
+        can use intermediate signed integer types to support the
+        conversion.  */
+      if ((code == FLOAT_EXPR
+          && GET_MODE_SIZE (lhs_mode) > GET_MODE_SIZE (rhs_mode))
+         || (code == FIX_TRUNC_EXPR
+             && GET_MODE_SIZE (rhs_mode) > GET_MODE_SIZE (lhs_mode)))
+       {
+         bool float_expr_p = code == FLOAT_EXPR;
+         scalar_mode imode = float_expr_p ? rhs_mode : lhs_mode;
+         fltsz = GET_MODE_SIZE (float_expr_p ? lhs_mode : rhs_mode);
+         code1 = float_expr_p ? code : NOP_EXPR;
+         codecvt1 = float_expr_p ? NOP_EXPR : code;
+         FOR_EACH_2XWIDER_MODE (rhs_mode_iter, imode)
+           {
+             imode = rhs_mode_iter.require ();
+             if (GET_MODE_SIZE (imode) > fltsz)
+               break;
+
+             cvt_type
+               = build_nonstandard_integer_type (GET_MODE_BITSIZE (imode),
+                                                 0);
+             cvt_type = get_vectype_for_scalar_type (vinfo, cvt_type,
+                                                     slp_node);
+             /* This should only happened for SLP as long as loop vectorizer
+                only supports same-sized vector.  */
+             if (cvt_type == NULL_TREE
+                 || maybe_ne (TYPE_VECTOR_SUBPARTS (cvt_type), nunits_in)
+                 || !supportable_convert_operation ((tree_code) code1,
+                                                    vectype_out,
+                                                    cvt_type, &tc1)
+                 || !supportable_convert_operation ((tree_code) codecvt1,
+                                                    cvt_type,
+                                                    vectype_in, &tc2))
+               continue;
+
+             found_mode = true;
+             break;
+           }
+
+         if (found_mode)
+           {
+             multi_step_cvt++;
+             interm_types.safe_push (cvt_type);
+             cvt_type = NULL_TREE;
+             code1 = tc1;
+             codecvt1 = tc2;
+             break;
+           }
+       }
        /* FALLTHRU */
      unsupported:
        if (dump_enabled_p ())
@@ -5513,7 +5564,18 @@ vectorizable_conversion (vec_info *vinfo,
        FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
         {
           /* Arguments are ready, create the new vector stmt.  */
-         gimple *new_stmt = vect_gimple_build (vec_dest, code1, vop0);
+         gimple* new_stmt;
+         if (multi_step_cvt)
+           {
+             gcc_assert (multi_step_cvt == 1);
+             new_stmt = vect_gimple_build (vec_dest, codecvt1, vop0);
+             new_temp = make_ssa_name (vec_dest, new_stmt);
+             gimple_assign_set_lhs (new_stmt, new_temp);
+             vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
+             vop0 = new_temp;
+             vec_dest = vec_dsts[0];
+           }
+         new_stmt = vect_gimple_build (vec_dest, code1, vop0);
           new_temp = make_ssa_name (vec_dest, new_stmt);
           gimple_set_lhs (new_stmt, new_temp);
           vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
author	liuhongt <hongtao.liu@intel.com>
	Wed, 31 May 2023 03:20:46 +0000 (11:20 +0800)
committer	liuhongt <hongtao.liu@intel.com>
	Wed, 21 Jun 2023 02:16:07 +0000 (10:16 +0800)
gcc/testsuite/gcc.target/i386/pr110018-1.c	[new file with mode: 0644]	patch \| blob
gcc/tree-vect-stmts.cc		patch \| blob \| blame \| history