Add internal functions for iround etc. [PR106253]

author Richard Sandiford <richard.sandiford@arm.com>

Tue, 12 Jul 2022 13:09:44 +0000 (14:09 +0100)

committer Richard Sandiford <richard.sandiford@arm.com>

Tue, 12 Jul 2022 13:09:44 +0000 (14:09 +0100)
author Richard Sandiford <richard.sandiford@arm.com>
Tue, 12 Jul 2022 13:09:44 +0000 (14:09 +0100)
committer Richard Sandiford <richard.sandiford@arm.com>
Tue, 12 Jul 2022 13:09:44 +0000 (14:09 +0100)
diff --git a/gcc/config/aarch64/aarch64-builtins.cc b/gcc/config/aarch64/aarch64-builtins.cc

index a486321e10fcdb027771011f30c467c6e3c7a5ec..adfddb8b215731511752206b7c93902573963ed2 100644 (file)
--- a/gcc/config/aarch64/aarch64-builtins.cc
+++ b/gcc/config/aarch64/aarch64-builtins.cc
@@ -2555,89 +2555,6 @@ aarch64_general_expand_builtin (unsigned int fcode, tree exp, rtx target,
    gcc_unreachable ();
  }
  
-tree
-aarch64_builtin_vectorized_function (unsigned int fn, tree type_out,
-                                    tree type_in)
-{
-  machine_mode in_mode, out_mode;
-
-  if (TREE_CODE (type_out) != VECTOR_TYPE
-      || TREE_CODE (type_in) != VECTOR_TYPE)
-    return NULL_TREE;
-
-  out_mode = TYPE_MODE (type_out);
-  in_mode = TYPE_MODE (type_in);
-
-#undef AARCH64_CHECK_BUILTIN_MODE
-#define AARCH64_CHECK_BUILTIN_MODE(C, N) 1
-#define AARCH64_FIND_FRINT_VARIANT(N) \
-  (AARCH64_CHECK_BUILTIN_MODE (2, D) \
-    ? aarch64_builtin_decls[AARCH64_SIMD_BUILTIN_UNOP_##N##v2df] \
-    : (AARCH64_CHECK_BUILTIN_MODE (4, S) \
-       ? aarch64_builtin_decls[AARCH64_SIMD_BUILTIN_UNOP_##N##v4sf] \
-       : (AARCH64_CHECK_BUILTIN_MODE (2, S) \
-          ? aarch64_builtin_decls[AARCH64_SIMD_BUILTIN_UNOP_##N##v2sf] \
-          : NULL_TREE)))
-  switch (fn)
-    {
-#undef AARCH64_CHECK_BUILTIN_MODE
-#define AARCH64_CHECK_BUILTIN_MODE(C, N) \
-  (out_mode == V##C##N##Imode && in_mode == V##C##N##Fmode)
-    CASE_CFN_IFLOOR:
-    CASE_CFN_LFLOOR:
-    CASE_CFN_LLFLOOR:
-      {
-       enum aarch64_builtins builtin;
-       if (AARCH64_CHECK_BUILTIN_MODE (2, D))
-         builtin = AARCH64_SIMD_BUILTIN_UNOP_lfloorv2dfv2di;
-       else if (AARCH64_CHECK_BUILTIN_MODE (4, S))
-         builtin = AARCH64_SIMD_BUILTIN_UNOP_lfloorv4sfv4si;
-       else if (AARCH64_CHECK_BUILTIN_MODE (2, S))
-         builtin = AARCH64_SIMD_BUILTIN_UNOP_lfloorv2sfv2si;
-       else
-         return NULL_TREE;
-
-       return aarch64_builtin_decls[builtin];
-      }
-    CASE_CFN_ICEIL:
-    CASE_CFN_LCEIL:
-    CASE_CFN_LLCEIL:
-      {
-       enum aarch64_builtins builtin;
-       if (AARCH64_CHECK_BUILTIN_MODE (2, D))
-         builtin = AARCH64_SIMD_BUILTIN_UNOP_lceilv2dfv2di;
-       else if (AARCH64_CHECK_BUILTIN_MODE (4, S))
-         builtin = AARCH64_SIMD_BUILTIN_UNOP_lceilv4sfv4si;
-       else if (AARCH64_CHECK_BUILTIN_MODE (2, S))
-         builtin = AARCH64_SIMD_BUILTIN_UNOP_lceilv2sfv2si;
-       else
-         return NULL_TREE;
-
-       return aarch64_builtin_decls[builtin];
-      }
-    CASE_CFN_IROUND:
-    CASE_CFN_LROUND:
-    CASE_CFN_LLROUND:
-      {
-       enum aarch64_builtins builtin;
-       if (AARCH64_CHECK_BUILTIN_MODE (2, D))
-         builtin =     AARCH64_SIMD_BUILTIN_UNOP_lroundv2dfv2di;
-       else if (AARCH64_CHECK_BUILTIN_MODE (4, S))
-         builtin =     AARCH64_SIMD_BUILTIN_UNOP_lroundv4sfv4si;
-       else if (AARCH64_CHECK_BUILTIN_MODE (2, S))
-         builtin =     AARCH64_SIMD_BUILTIN_UNOP_lroundv2sfv2si;
-       else
-         return NULL_TREE;
-
-       return aarch64_builtin_decls[builtin];
-      }
-    default:
-      return NULL_TREE;
-    }
-
-  return NULL_TREE;
-}
-
  /* Return builtin for reciprocal square root.  */
  
  tree
diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h

index dabd047d7ba2c532238720d59ecd59f0f5ba822f..19c9d3cb17995e4ffb047a51831627ba95d20e72 100644 (file)
--- a/gcc/config/aarch64/aarch64-protos.h
+++ b/gcc/config/aarch64/aarch64-protos.h
@@ -986,7 +986,6 @@ gimple *aarch64_general_gimple_fold_builtin (unsigned int, gcall *,
  rtx aarch64_general_expand_builtin (unsigned int, tree, rtx, int);
  tree aarch64_general_builtin_decl (unsigned, bool);
  tree aarch64_general_builtin_rsqrt (unsigned int);
-tree aarch64_builtin_vectorized_function (unsigned int, tree, tree);
  void handle_arm_acle_h (void);
  void handle_arm_neon_h (void);
  
diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc

index d049f9a9819628a73bfd57114c3b89d848da7d9c..25f4cbb466d3b84c88c40b264e0742d7a65b75f7 100644 (file)
--- a/gcc/config/aarch64/aarch64.cc
+++ b/gcc/config/aarch64/aarch64.cc
@@ -27584,10 +27584,6 @@ aarch64_libgcc_floating_mode_supported_p
  #undef TARGET_VECTORIZE_BUILTINS
  #define TARGET_VECTORIZE_BUILTINS
  
-#undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
-#define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
-  aarch64_builtin_vectorized_function
-
  #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_MODES
  #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_MODES \
    aarch64_autovectorize_vector_modes
diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc

index 95cb1e2ce7008c5e643d71d5c561e6fb3ebf029b..3a3c7299eb40d1ecd2ea45fda6a3b9970e566810 100644 (file)
--- a/gcc/config/i386/i386.cc
+++ b/gcc/config/i386/i386.cc
@@ -24004,6 +24004,7 @@ ix86_optab_supported_p (int op, machine_mode mode1, machine_mode,
      case ldexp_optab:
      case scalb_optab:
      case round_optab:
+    case lround_optab:
        return opt_type == OPTIMIZE_FOR_SPEED;
  
      case rint_optab:
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md

index 3b02d0cd567b0e966ce3e06435d893c83849d8dc..bf29f444382248f1c3c4207a8e1aac21a052afd2 100644 (file)
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -19926,9 +19926,6 @@
         && ((<SWI248x:MODE>mode != DImode) || TARGET_64BIT)
         && !flag_trapping_math && !flag_rounding_math)"
  {
-  if (optimize_insn_for_size_p ())
-    FAIL;
-
    if (SSE_FLOAT_MODE_P (<X87MODEF:MODE>mode) && TARGET_SSE_MATH
        && <SWI248x:MODE>mode != HImode
        && ((<SWI248x:MODE>mode != DImode) || TARGET_64BIT)
diff --git a/gcc/internal-fn.cc b/gcc/internal-fn.cc

index d666ccccf670b5c639c5ae21659aae4e3f789f6a..28973d957fbf7a3265d8984d35b89488ae7e0607 100644 (file)
--- a/gcc/internal-fn.cc
+++ b/gcc/internal-fn.cc
@@ -120,6 +120,7 @@ init_internal_fns ()
  #define len_store_direct { 3, 3, false }
  #define vec_set_direct { 3, 3, false }
  #define unary_direct { 0, 0, true }
+#define unary_convert_direct { -1, 0, true }
  #define binary_direct { 0, 0, true }
  #define ternary_direct { 0, 0, true }
  #define cond_unary_direct { 1, 1, true }
@@ -3679,6 +3680,19 @@ expand_while_optab_fn (internal_fn, gcall *stmt, convert_optab optab)
      emit_move_insn (lhs_rtx, ops[0].value);
  }
  
+/* Expand a call to a convert-like optab using the operands in STMT.
+   FN has a single output operand and NARGS input operands.  */
+
+static void
+expand_convert_optab_fn (internal_fn fn, gcall *stmt, convert_optab optab,
+                        unsigned int nargs)
+{
+  tree_pair types = direct_internal_fn_types (fn, stmt);
+  insn_code icode = convert_optab_handler (optab, TYPE_MODE (types.first),
+                                         TYPE_MODE (types.second));
+  expand_fn_using_insn (stmt, icode, 1, nargs);
+}
+
  /* Expanders for optabs that can use expand_direct_optab_fn.  */
  
  #define expand_unary_optab_fn(FN, STMT, OPTAB) \
@@ -3711,6 +3725,11 @@ expand_while_optab_fn (internal_fn, gcall *stmt, convert_optab optab)
  #define expand_check_ptrs_optab_fn(FN, STMT, OPTAB) \
    expand_direct_optab_fn (FN, STMT, OPTAB, 4)
  
+/* Expanders for optabs that can use expand_convert_optab_fn.  */
+
+#define expand_unary_convert_optab_fn(FN, STMT, OPTAB) \
+  expand_convert_optab_fn (FN, STMT, OPTAB, 1)
+
  /* RETURN_TYPE and ARGS are a return type and argument list that are
     in principle compatible with FN (which satisfies direct_internal_fn_p).
     Return the types that should be used to determine whether the
@@ -3783,6 +3802,7 @@ multi_vector_optab_supported_p (convert_optab optab, tree_pair types,
  }
  
  #define direct_unary_optab_supported_p direct_optab_supported_p
+#define direct_unary_convert_optab_supported_p convert_optab_supported_p
  #define direct_binary_optab_supported_p direct_optab_supported_p
  #define direct_ternary_optab_supported_p direct_optab_supported_p
  #define direct_cond_unary_optab_supported_p direct_optab_supported_p
diff --git a/gcc/internal-fn.def b/gcc/internal-fn.def

index d2d550d358606022b1cb44fa842f06e0be507bc3..7c398baadc86501052c5fb13a114326eb9c3b9d2 100644 (file)
--- a/gcc/internal-fn.def
+++ b/gcc/internal-fn.def
@@ -61,6 +61,9 @@ along with GCC; see the file COPYING3.  If not see
     - binary: a normal binary optab, such as vec_interleave_lo_<mode>
     - ternary: a normal ternary optab, such as fma<mode>4
  
+   - unary_convert: a single-input conversion optab, such as
+     lround<srcmode><dstmode>2.
+
     - cond_binary: a conditional binary optab, such as cond_add<mode>
     - cond_ternary: a conditional ternary optab, such as cond_fma_rev<mode>
  
@@ -267,6 +270,26 @@ DEF_INTERNAL_FLT_FLOATN_FN (SQRT, ECF_CONST, sqrt, unary)
  DEF_INTERNAL_FLT_FN (TAN, ECF_CONST, tan, unary)
  DEF_INTERNAL_FLT_FN (TANH, ECF_CONST, tanh, unary)
  
+/* Floating-point to integer conversions.
+
+   ??? Here we preserve the I/L/LL prefix convention from the
+   corresponding built-in functions, rather than make the internal
+   functions polymorphic in both the argument and the return types.
+   Perhaps an alternative would be to pass a zero of the required
+   return type as a second parameter.  */
+DEF_INTERNAL_FLT_FN (ICEIL, ECF_CONST, lceil, unary_convert)
+DEF_INTERNAL_FLT_FN (IFLOOR, ECF_CONST, lfloor, unary_convert)
+DEF_INTERNAL_FLT_FN (IRINT, ECF_CONST, lrint, unary_convert)
+DEF_INTERNAL_FLT_FN (IROUND, ECF_CONST, lround, unary_convert)
+DEF_INTERNAL_FLT_FN (LCEIL, ECF_CONST, lceil, unary_convert)
+DEF_INTERNAL_FLT_FN (LFLOOR, ECF_CONST, lfloor, unary_convert)
+DEF_INTERNAL_FLT_FN (LRINT, ECF_CONST, lrint, unary_convert)
+DEF_INTERNAL_FLT_FN (LROUND, ECF_CONST, lround, unary_convert)
+DEF_INTERNAL_FLT_FN (LLCEIL, ECF_CONST, lceil, unary_convert)
+DEF_INTERNAL_FLT_FN (LLFLOOR, ECF_CONST, lfloor, unary_convert)
+DEF_INTERNAL_FLT_FN (LLRINT, ECF_CONST, lrint, unary_convert)
+DEF_INTERNAL_FLT_FN (LLROUND, ECF_CONST, lround, unary_convert)
+
  /* FP rounding.  */
  DEF_INTERNAL_FLT_FLOATN_FN (CEIL, ECF_CONST, ceil, unary)
  DEF_INTERNAL_FLT_FLOATN_FN (FLOOR, ECF_CONST, floor, unary)
diff --git a/gcc/optabs.cc b/gcc/optabs.cc

index a50dd798f2a454ac54e247f3e6cbab17577ea304..165f8d1fa22432b96967c69a58dbb7b4bf18120d 100644 (file)
--- a/gcc/optabs.cc
+++ b/gcc/optabs.cc
@@ -5828,7 +5828,8 @@ expand_sfix_optab (rtx to, rtx from, convert_optab tab)
    FOR_EACH_MODE_FROM (fmode, GET_MODE (from))
      FOR_EACH_MODE_FROM (imode, GET_MODE (to))
        {
-       icode = convert_optab_handler (tab, imode, fmode);
+       icode = convert_optab_handler (tab, imode, fmode,
+                                      insn_optimization_type ());
         if (icode != CODE_FOR_nothing)
           {
             rtx_insn *last = get_last_insn ();
diff --git a/gcc/predict.cc b/gcc/predict.cc

index b36caa3ae82b38211b0f134275fa7d7cc098f09b..1bc7ab944540478d9b1b1fb58b88eecd8a51bac0 100644 (file)
--- a/gcc/predict.cc
+++ b/gcc/predict.cc
@@ -362,6 +362,17 @@ optimize_insn_for_speed_p (void)
    return !optimize_insn_for_size_p ();
  }
  
+/* Return the optimization type that should be used for the current
+   instruction.  */
+
+optimization_type
+insn_optimization_type ()
+{
+  return (optimize_insn_for_speed_p ()
+         ? OPTIMIZE_FOR_SPEED
+         : OPTIMIZE_FOR_SIZE);
+}
+
  /* Return TRUE if LOOP should be optimized for size.  */
  
  optimize_size_level
diff --git a/gcc/predict.h b/gcc/predict.h

index 864997498eca299ffdddd2a886f745570cfffc55..2548437376928b2f50ccae6ea0020ac9465e3500 100644 (file)
--- a/gcc/predict.h
+++ b/gcc/predict.h
@@ -68,6 +68,7 @@ extern enum optimize_size_level optimize_edge_for_size_p (edge);
  extern bool optimize_edge_for_speed_p (edge);
  extern enum optimize_size_level optimize_insn_for_size_p (void);
  extern bool optimize_insn_for_speed_p (void);
+extern optimization_type insn_optimization_type ();
  extern enum optimize_size_level optimize_loop_for_size_p (class loop *);
  extern bool optimize_loop_for_speed_p (class loop *);
  extern bool optimize_loop_nest_for_speed_p (class loop *);
diff --git a/gcc/testsuite/gcc.target/aarch64/vect_unary_1.c b/gcc/testsuite/gcc.target/aarch64/vect_unary_1.c

index 8516808becffad60165473dc43e2a76860ff6a9b..94d9af1a55d91cf865d7988e054d10abaf67b017 100644 (file)
--- a/gcc/testsuite/gcc.target/aarch64/vect_unary_1.c
+++ b/gcc/testsuite/gcc.target/aarch64/vect_unary_1.c
@@ -1,4 +1,4 @@
-/* { dg-options "-O3 --save-temps" } */
+/* { dg-options "-O3 -fno-math-errno --save-temps" } */
  /* { dg-final { check-function-bodies "**" "" "" } } */
  
  #include <stdint.h>
@@ -184,3 +184,66 @@ TEST2 (int, ctz, int)
  **     ret
  */
  TEST4 (int, ctz, int)
+
+/*
+** test2_int_iroundf_float:
+**     fcvtas  v0.2s, v1.2s
+**     ret
+*/
+TEST2 (int, iroundf, float)
+
+/*
+** test2_int64_t_llround_double:
+**     fcvtas  v0.2d, v1.2d
+**     ret
+*/
+TEST2 (int64_t, llround, double)
+
+/*
+** test4_int_iroundf_float:
+**     fcvtas  v0.4s, v1.4s
+**     ret
+*/
+TEST4 (int, iroundf, float)
+
+/*
+** test2_int_ifloorf_float:
+**     fcvtms  v0.2s, v1.2s
+**     ret
+*/
+TEST2 (int, ifloorf, float)
+
+/*
+** test2_int64_t_llfloor_double:
+**     fcvtms  v0.2d, v1.2d
+**     ret
+*/
+TEST2 (int64_t, llfloor, double)
+
+/*
+** test4_int_ifloorf_float:
+**     fcvtms  v0.4s, v1.4s
+**     ret
+*/
+TEST4 (int, ifloorf, float)
+
+/*
+** test2_int_iceilf_float:
+**     fcvtps  v0.2s, v1.2s
+**     ret
+*/
+TEST2 (int, iceilf, float)
+
+/*
+** test2_int64_t_llceil_double:
+**     fcvtps  v0.2d, v1.2d
+**     ret
+*/
+TEST2 (int64_t, llceil, double)
+
+/*
+** test4_int_iceilf_float:
+**     fcvtps  v0.4s, v1.4s
+**     ret
+*/
+TEST4 (int, iceilf, float)
diff --git a/gcc/testsuite/gfortran.dg/vect/pr106253.f b/gcc/testsuite/gfortran.dg/vect/pr106253.f

new file mode 100644 (file)

index 0000000..1b6b7e8
--- /dev/null
+++ b/gcc/testsuite/gfortran.dg/vect/pr106253.f
@@ -0,0 +1,35 @@
+! { dg-do compile }
+
+      SUBROUTINE DGEMV ( TRANS, M, N, ALPHA, A, LDA, X, INCX,           &
+     &                   BETA, Y, INCY )
+      LOGICAL            LSAME
+      IF     ( .NOT.LSAME( TRANS, 'N' ).AND.                            &
+     &         .NOT.LSAME( TRANS, 'C' )      )THEN
+      END IF
+      END
+      subroutine evlrnf (ptrs0t, nclsm, prnf0t) 
+      real, dimension (1:nclsm,1:nclsm), intent (in) :: ptrs0t
+      real, dimension (1:nclsm,1:nclsm), intent (out):: prnf0t
+      real, allocatable, dimension (:,:) :: utrsft ! probas up
+      real, allocatable, dimension (:,:) :: dtrsft ! probas down
+      real, allocatable, dimension (:,:) :: xwrkt ! matrice
+      do icls = 1, nclsm
+         do ival = ipic - 1, 1, -1
+            xwrkt = trs2a2 (ival, ipic, utrsft, dtrsft, ncls)
+         enddo
+      enddo
+      contains
+      function trs2a2 (j, k, u, d, m)
+      real, dimension (1:m,1:m) :: trs2a2  ! resultat
+      real, dimension (1:m,1:m) :: u, d    ! matrices utrsft, dtrsft
+      end function trs2a2
+      end
+      program rnflow
+      integer, parameter :: ncls  =     256 ! nombre de classes
+      integer, dimension (1:ncls,1:ncls) :: mrnftt ! matrice theorique
+      real, dimension (1:ncls,1:ncls)    :: ptrst  ! matrice Markov
+      real, dimension (1:ncls,1:ncls)    :: prnft  ! matrice Rainflow
+      call evlrnf (ptrst, ncls, prnft)
+      mrnftt = nint (real (nsim) * real (npic) * prnft)
+      call cmpmat (mrnftt, mrnfst)
+      end program rnflow
author	Richard Sandiford <richard.sandiford@arm.com>
	Tue, 12 Jul 2022 13:09:44 +0000 (14:09 +0100)
committer	Richard Sandiford <richard.sandiford@arm.com>
	Tue, 12 Jul 2022 13:09:44 +0000 (14:09 +0100)
gcc/config/aarch64/aarch64-builtins.cc		patch \| blob \| blame \| history
gcc/config/aarch64/aarch64-protos.h		patch \| blob \| blame \| history
gcc/config/aarch64/aarch64.cc		patch \| blob \| blame \| history
gcc/config/i386/i386.cc		patch \| blob \| blame \| history
gcc/config/i386/i386.md		patch \| blob \| blame \| history
gcc/internal-fn.cc		patch \| blob \| blame \| history
gcc/internal-fn.def		patch \| blob \| blame \| history
gcc/optabs.cc		patch \| blob \| blame \| history
gcc/predict.cc		patch \| blob \| blame \| history
gcc/predict.h		patch \| blob \| blame \| history
gcc/testsuite/gcc.target/aarch64/vect_unary_1.c		patch \| blob \| blame \| history
gcc/testsuite/gfortran.dg/vect/pr106253.f	[new file with mode: 0644]	patch \| blob