]> git.ipfire.org Git - thirdparty/gcc.git/commitdiff
amdgcn: Add support for additional natively supported floating-point operations
authorKwok Cheung Yeung <kcy@codesourcery.com>
Fri, 9 Sep 2022 13:36:42 +0000 (15:36 +0200)
committerTobias Burnus <tobias@codesourcery.com>
Fri, 9 Sep 2022 13:36:42 +0000 (15:36 +0200)
This adds support for the following natively supported floating-point
operations, in scalar and vectorized modes:

floor, ceil, exp2*, log2*, sin*, cos*, ldexp, frexp

* These operations are single-precision float only and are only active
if unsafe_math_optimizations are enabled (due to potential numerical
precision issues).

2022-09-09  Kwok Cheung Yeung  <kcy@codesourcery.com>

gcc/
* config/gcn/gcn-builtins.def (FABSVF, LDEXPVF, LDEXPV, FREXPVF_EXP,
FREXPVF_MANT, FREXPV_EXP, FREXPV_MANT): Add new builtins.
* config/gcn/gcn-protos.h (gcn_dconst1over2pi): New prototype.
* config/gcn/gcn-valu.md (MATH_UNOP_1OR2REG, MATH_UNOP_1REG,
MATH_UNOP_TRIG): New iterators.
(math_unop): New attributes.
(<math_unop><mode>2, <math_unop><mode>2<exec>,
<math_unop><mode>2, <math_unop><mode>2<exec>,
*<math_unop><mode>2_insn, *<math_unop><mode>2<exec>_insn,
ldexp<mode>3, ldexp<mode>3<exec>,
frexp<mode>_exp2, frexp<mode>_mant2,
frexp<mode>_exp2<exec>, frexp<mode>_mant2<exec>): New instructions.
(<math_unop><mode>2, <math_unop><mode>2<exec>): New expanders.
* config/gcn/gcn.cc (init_ext_gcn_constants): Update definition of
dconst1over2pi.
(gcn_dconst1over2pi): New.
(gcn_builtin_type_index): Add entry for v64df type.
(v64df_type_node): New.
(gcn_init_builtin_types): Initialize v64df_type_node.
(gcn_expand_builtin_1): Expand new builtins to instructions.
(print_operand): Fix assembler output for 1/(2*PI) constant.
* config/gcn/gcn.md (unspec): Add new entries.

(cherry picked from commit eff73c104a3db882f3bc7f567f322e40470c7571)

gcc/ChangeLog.omp
gcc/config/gcn/gcn-builtins.def
gcc/config/gcn/gcn-protos.h
gcc/config/gcn/gcn-valu.md
gcc/config/gcn/gcn.cc
gcc/config/gcn/gcn.md

index ae066d893ccf5d19acba25ffab3f9ce031f0069c..0ad8f78103c5b95bb2cf470cbc1a6b278b0b0113 100644 (file)
@@ -1,3 +1,31 @@
+2022-09-09  Tobias Burnus  <tobias@codesourcery.com>
+
+       Backport from mainline:
+       2022-09-09  Kwok Cheung Yeung  <kcy@codesourcery.com>
+
+       * config/gcn/gcn-builtins.def (FABSVF, LDEXPVF, LDEXPV, FREXPVF_EXP,
+       FREXPVF_MANT, FREXPV_EXP, FREXPV_MANT): Add new builtins.
+       * config/gcn/gcn-protos.h (gcn_dconst1over2pi): New prototype.
+       * config/gcn/gcn-valu.md (MATH_UNOP_1OR2REG, MATH_UNOP_1REG,
+       MATH_UNOP_TRIG): New iterators.
+       (math_unop): New attributes.
+       (<math_unop><mode>2, <math_unop><mode>2<exec>,
+       <math_unop><mode>2, <math_unop><mode>2<exec>,
+       *<math_unop><mode>2_insn, *<math_unop><mode>2<exec>_insn,
+       ldexp<mode>3, ldexp<mode>3<exec>,
+       frexp<mode>_exp2, frexp<mode>_mant2,
+       frexp<mode>_exp2<exec>, frexp<mode>_mant2<exec>): New instructions.
+       (<math_unop><mode>2, <math_unop><mode>2<exec>): New expanders.
+       * config/gcn/gcn.cc (init_ext_gcn_constants): Update definition of
+       dconst1over2pi.
+       (gcn_dconst1over2pi): New.
+       (gcn_builtin_type_index): Add entry for v64df type.
+       (v64df_type_node): New.
+       (gcn_init_builtin_types): Initialize v64df_type_node.
+       (gcn_expand_builtin_1): Expand new builtins to instructions.
+       (print_operand): Fix assembler output for 1/(2*PI) constant.
+       * config/gcn/gcn.md (unspec): Add new entries.
+
 2022-09-08  Tobias Burnus  <tobias@codesourcery.com>
 
        Backport from mainline:
index 54e4ea4e953dc656bfb873272116b00481c2f6de..27691909925be65364217ef3aac24d7818e132af 100644 (file)
@@ -59,6 +59,41 @@ DEF_BUILTIN (SQRTF, 3 /*CODE_FOR_sqrtf */,
             _A2 (GCN_BTI_SF, GCN_BTI_SF),
             gcn_expand_builtin_1)
 
+DEF_BUILTIN (FABSVF, 3 /*CODE_FOR_fabsvf */,
+            "fabsvf", B_INSN,
+            _A2 (GCN_BTI_V64SF, GCN_BTI_V64SF),
+            gcn_expand_builtin_1)
+
+DEF_BUILTIN (LDEXPVF, 3 /*CODE_FOR_ldexpvf */,
+            "ldexpvf", B_INSN,
+            _A3 (GCN_BTI_V64SF, GCN_BTI_V64SF, GCN_BTI_V64SI),
+            gcn_expand_builtin_1)
+
+DEF_BUILTIN (LDEXPV, 3 /*CODE_FOR_ldexpv */,
+            "ldexpv", B_INSN,
+            _A3 (GCN_BTI_V64DF, GCN_BTI_V64DF, GCN_BTI_V64SI),
+            gcn_expand_builtin_1)
+
+DEF_BUILTIN (FREXPVF_EXP, 3 /*CODE_FOR_frexpvf_exp */,
+            "frexpvf_exp", B_INSN,
+            _A2 (GCN_BTI_V64SI, GCN_BTI_V64SF),
+            gcn_expand_builtin_1)
+
+DEF_BUILTIN (FREXPVF_MANT, 3 /*CODE_FOR_frexpvf_mant */,
+            "frexpvf_mant", B_INSN,
+            _A2 (GCN_BTI_V64SF, GCN_BTI_V64SF),
+            gcn_expand_builtin_1)
+
+DEF_BUILTIN (FREXPV_EXP, 3 /*CODE_FOR_frexpv_exp */,
+            "frexpv_exp", B_INSN,
+            _A2 (GCN_BTI_V64SI, GCN_BTI_V64DF),
+            gcn_expand_builtin_1)
+
+DEF_BUILTIN (FREXPV_MANT, 3 /*CODE_FOR_frexpv_mant */,
+            "frexpv_mant", B_INSN,
+            _A2 (GCN_BTI_V64DF, GCN_BTI_V64DF),
+            gcn_expand_builtin_1)
+
 DEF_BUILTIN (CMP_SWAP, -1,
            "cmp_swap", B_INSN,
            _A4 (GCN_BTI_UINT, GCN_BTI_VOIDPTR, GCN_BTI_UINT, GCN_BTI_UINT),
index 38197b929fd7dfc2d4180e4cb01d776ea1977343..ca804609c09e16464e96e9a9fb276ca2439dd856 100644 (file)
@@ -54,6 +54,7 @@ extern int gcn_hard_regno_nregs (int regno, machine_mode mode);
 extern void gcn_hsa_declare_function_name (FILE *file, const char *name,
                                           tree decl);
 extern HOST_WIDE_INT gcn_initial_elimination_offset (int, int);
+extern REAL_VALUE_TYPE gcn_dconst1over2pi (void);
 extern bool gcn_inline_constant64_p (rtx, bool);
 extern bool gcn_inline_constant_p (rtx);
 extern int gcn_inline_fp_constant_p (rtx, bool);
index a3099f7db178e5143f1d68de153bc73f225f90b5..5c66f4f680ba88687684f24adaa1fa51a6563e27 100644 (file)
   [(set_attr "type" "vop1")
    (set_attr "length" "8")])
 
+; These FP unops have f64, f32 and f16 versions.
+(define_int_iterator MATH_UNOP_1OR2REG
+  [UNSPEC_FLOOR UNSPEC_CEIL])
+
+; These FP unops only have f16/f32 versions.
+(define_int_iterator MATH_UNOP_1REG
+  [UNSPEC_EXP2 UNSPEC_LOG2])
+
+(define_int_iterator MATH_UNOP_TRIG
+  [UNSPEC_SIN UNSPEC_COS])
+
+(define_int_attr math_unop
+  [(UNSPEC_FLOOR "floor")
+   (UNSPEC_CEIL "ceil")
+   (UNSPEC_EXP2 "exp2")
+   (UNSPEC_LOG2 "log2")
+   (UNSPEC_SIN "sin")
+   (UNSPEC_COS "cos")])
+
+(define_insn "<math_unop><mode>2"
+  [(set (match_operand:FP 0 "register_operand"  "=  v")
+       (unspec:FP
+         [(match_operand:FP 1 "gcn_alu_operand" "vSvB")]
+         MATH_UNOP_1OR2REG))]
+  ""
+  "v_<math_unop>%i0\t%0, %1"
+  [(set_attr "type" "vop1")
+   (set_attr "length" "8")])
+
+(define_insn "<math_unop><mode>2<exec>"
+  [(set (match_operand:V_FP 0 "register_operand"  "=  v")
+       (unspec:V_FP
+         [(match_operand:V_FP 1 "gcn_alu_operand" "vSvB")]
+         MATH_UNOP_1OR2REG))]
+  ""
+  "v_<math_unop>%i0\t%0, %1"
+  [(set_attr "type" "vop1")
+   (set_attr "length" "8")])
+
+(define_insn "<math_unop><mode>2"
+  [(set (match_operand:FP_1REG 0 "register_operand"  "=  v")
+       (unspec:FP_1REG
+         [(match_operand:FP_1REG 1 "gcn_alu_operand" "vSvB")]
+         MATH_UNOP_1REG))]
+  "flag_unsafe_math_optimizations"
+  "v_<math_unop>%i0\t%0, %1"
+  [(set_attr "type" "vop1")
+   (set_attr "length" "8")])
+
+(define_insn "<math_unop><mode>2<exec>"
+  [(set (match_operand:V_FP_1REG 0 "register_operand"  "=  v")
+       (unspec:V_FP_1REG
+         [(match_operand:V_FP_1REG 1 "gcn_alu_operand" "vSvB")]
+         MATH_UNOP_1REG))]
+  "flag_unsafe_math_optimizations"
+  "v_<math_unop>%i0\t%0, %1"
+  [(set_attr "type" "vop1")
+   (set_attr "length" "8")])
+
+(define_insn "*<math_unop><mode>2_insn"
+  [(set (match_operand:FP_1REG 0 "register_operand"  "=  v")
+       (unspec:FP_1REG
+         [(match_operand:FP_1REG 1 "gcn_alu_operand" "vSvB")]
+         MATH_UNOP_TRIG))]
+  "flag_unsafe_math_optimizations"
+  "v_<math_unop>%i0\t%0, %1"
+  [(set_attr "type" "vop1")
+   (set_attr "length" "8")])
+
+(define_insn "*<math_unop><mode>2<exec>_insn"
+  [(set (match_operand:V_FP_1REG 0 "register_operand"  "=  v")
+       (unspec:V_FP_1REG
+         [(match_operand:V_FP_1REG 1 "gcn_alu_operand" "vSvB")]
+         MATH_UNOP_TRIG))]
+  "flag_unsafe_math_optimizations"
+  "v_<math_unop>%i0\t%0, %1"
+  [(set_attr "type" "vop1")
+   (set_attr "length" "8")])
+
+; Trigonometric functions need their input scaled by 1/(2*PI) first.
+
+(define_expand "<math_unop><mode>2"
+  [(set (match_dup 2)
+       (mult:FP_1REG
+         (match_dup 3)
+         (match_operand:FP_1REG 1 "gcn_alu_operand")))
+   (set (match_operand:FP_1REG 0 "register_operand")
+       (unspec:FP_1REG
+         [(match_dup 2)]
+         MATH_UNOP_TRIG))]
+  "flag_unsafe_math_optimizations"
+  {
+    operands[2] = gen_reg_rtx (<MODE>mode);
+    operands[3] = const_double_from_real_value (gcn_dconst1over2pi (),
+                                               <MODE>mode);
+  })
+
+(define_expand "<math_unop><mode>2<exec>"
+  [(set (match_dup 2)
+       (mult:V_FP_1REG
+         (match_dup 3)
+         (match_operand:V_FP_1REG 1 "gcn_alu_operand")))
+   (set (match_operand:V_FP_1REG 0 "register_operand")
+       (unspec:V_FP_1REG
+         [(match_dup 2)]
+         MATH_UNOP_TRIG))]
+  "flag_unsafe_math_optimizations"
+  {
+    operands[2] = gen_reg_rtx (<MODE>mode);
+    operands[3] =
+       gcn_vec_constant (<MODE>mode,
+                         const_double_from_real_value (gcn_dconst1over2pi (),
+                                                       <SCALAR_MODE>mode));
+  })
+
+; Implement ldexp pattern
+
+(define_insn "ldexp<mode>3"
+  [(set (match_operand:FP 0 "register_operand"  "=v")
+       (unspec:FP
+         [(match_operand:FP 1 "gcn_alu_operand" "vB")
+          (match_operand:SI 2 "gcn_alu_operand" "vSvA")]
+         UNSPEC_LDEXP))]
+  ""
+  "v_ldexp%i0\t%0, %1, %2"
+  [(set_attr "type" "vop3a")
+   (set_attr "length" "8")])
+
+(define_insn "ldexp<mode>3<exec>"
+  [(set (match_operand:V_FP 0 "register_operand"  "=v")
+       (unspec:V_FP
+         [(match_operand:V_FP 1 "gcn_alu_operand" "vB")
+          (match_operand:V64SI 2 "gcn_alu_operand" "vSvA")]
+         UNSPEC_LDEXP))]
+  ""
+  "v_ldexp%i0\t%0, %1, %2"
+  [(set_attr "type" "vop3a")
+   (set_attr "length" "8")])
+
+; Implement frexp patterns
+
+(define_insn "frexp<mode>_exp2"
+  [(set (match_operand:SI 0 "register_operand" "=v")
+       (unspec:SI
+         [(match_operand:FP 1 "gcn_alu_operand" "vB")]
+         UNSPEC_FREXP_EXP))]
+  ""
+  "v_frexp_exp_i32%i1\t%0, %1"
+  [(set_attr "type" "vop1")
+   (set_attr "length" "8")])
+
+(define_insn "frexp<mode>_mant2"
+  [(set (match_operand:FP 0 "register_operand" "=v")
+       (unspec:FP
+         [(match_operand:FP 1 "gcn_alu_operand" "vB")]
+         UNSPEC_FREXP_MANT))]
+  ""
+  "v_frexp_mant%i1\t%0, %1"
+  [(set_attr "type" "vop1")
+   (set_attr "length" "8")])
+
+(define_insn "frexp<mode>_exp2<exec>"
+  [(set (match_operand:V64SI 0 "register_operand" "=v")
+       (unspec:V64SI
+         [(match_operand:V_FP 1 "gcn_alu_operand" "vB")]
+         UNSPEC_FREXP_EXP))]
+  ""
+  "v_frexp_exp_i32%i1\t%0, %1"
+  [(set_attr "type" "vop1")
+   (set_attr "length" "8")])
+
+(define_insn "frexp<mode>_mant2<exec>"
+  [(set (match_operand:V_FP 0 "register_operand" "=v")
+       (unspec:V_FP
+         [(match_operand:V_FP 1 "gcn_alu_operand" "vB")]
+         UNSPEC_FREXP_MANT))]
+  ""
+  "v_frexp_mant%i1\t%0, %1"
+  [(set_attr "type" "vop1")
+   (set_attr "length" "8")])
+
 ;; }}}
 ;; {{{ FP fused multiply and add
 
index 557e051603c4f0bcf983d358c348dab9a01294be..ef6bca53d9dca7deb8d727ff5d8e871b35689bd8 100644 (file)
@@ -789,12 +789,20 @@ init_ext_gcn_constants (void)
   /* FIXME: this constant probably does not match what hardware really loads.
      Reality check it eventually.  */
   real_from_string (&dconst1over2pi,
-                   "0.1591549430918953357663423455968866839");
+                   "0.15915494309189532");
   real_convert (&dconst1over2pi, SFmode, &dconst1over2pi);
 
   ext_gcn_constants_init = 1;
 }
 
+REAL_VALUE_TYPE
+gcn_dconst1over2pi (void)
+{
+  if (!ext_gcn_constants_init)
+    init_ext_gcn_constants ();
+  return dconst1over2pi;
+}
+
 /* Return non-zero if X is a constant that can appear as an inline operand.
    This is 0, 0.5, -0.5, 1, -1, 2, -2, 4,-4, 1/(2*pi)
    Or a vector of those.
@@ -3636,6 +3644,7 @@ enum gcn_builtin_type_index
   GCN_BTI_SF,
   GCN_BTI_V64SI,
   GCN_BTI_V64SF,
+  GCN_BTI_V64DF,
   GCN_BTI_V64PTR,
   GCN_BTI_SIPTR,
   GCN_BTI_SFPTR,
@@ -3652,6 +3661,7 @@ static GTY(()) tree gcn_builtin_types[GCN_BTI_MAX];
 #define sf_type_node (gcn_builtin_types[GCN_BTI_SF])
 #define v64si_type_node (gcn_builtin_types[GCN_BTI_V64SI])
 #define v64sf_type_node (gcn_builtin_types[GCN_BTI_V64SF])
+#define v64df_type_node (gcn_builtin_types[GCN_BTI_V64DF])
 #define v64ptr_type_node (gcn_builtin_types[GCN_BTI_V64PTR])
 #define siptr_type_node (gcn_builtin_types[GCN_BTI_SIPTR])
 #define sfptr_type_node (gcn_builtin_types[GCN_BTI_SFPTR])
@@ -3741,6 +3751,7 @@ gcn_init_builtin_types (void)
   sf_type_node = float32_type_node;
   v64si_type_node = build_vector_type (intSI_type_node, 64);
   v64sf_type_node = build_vector_type (float_type_node, 64);
+  v64df_type_node = build_vector_type (double_type_node, 64);
   v64ptr_type_node = build_vector_type (unsigned_intDI_type_node
                                        /*build_pointer_type
                                          (integer_type_node) */
@@ -4008,6 +4019,105 @@ gcn_expand_builtin_1 (tree exp, rtx target, rtx /*subtarget */ ,
        emit_insn (gen_sqrtsf2 (target, arg));
        return target;
       }
+    case GCN_BUILTIN_FABSVF:
+      {
+       if (ignore)
+         return target;
+       rtx exec = gcn_full_exec_reg ();
+       rtx arg = force_reg (V64SFmode,
+                            expand_expr (CALL_EXPR_ARG (exp, 0), NULL_RTX,
+                                         V64SFmode,
+                                         EXPAND_NORMAL));
+       emit_insn (gen_absv64sf2_exec
+                  (target, arg, gcn_gen_undef (V64SFmode), exec));
+       return target;
+      }
+    case GCN_BUILTIN_LDEXPVF:
+      {
+       if (ignore)
+         return target;
+       rtx exec = gcn_full_exec_reg ();
+       rtx arg1 = force_reg (V64SFmode,
+                             expand_expr (CALL_EXPR_ARG (exp, 0), NULL_RTX,
+                                          V64SFmode,
+                                          EXPAND_NORMAL));
+       rtx arg2 = force_reg (V64SImode,
+                             expand_expr (CALL_EXPR_ARG (exp, 1), NULL_RTX,
+                                          V64SImode,
+                                          EXPAND_NORMAL));
+       emit_insn (gen_ldexpv64sf3_exec
+                  (target, arg1, arg2, gcn_gen_undef (V64SFmode), exec));
+       return target;
+      }
+    case GCN_BUILTIN_LDEXPV:
+      {
+       if (ignore)
+         return target;
+       rtx exec = gcn_full_exec_reg ();
+       rtx arg1 = force_reg (V64DFmode,
+                             expand_expr (CALL_EXPR_ARG (exp, 0), NULL_RTX,
+                                          V64SFmode,
+                                          EXPAND_NORMAL));
+       rtx arg2 = force_reg (V64SImode,
+                             expand_expr (CALL_EXPR_ARG (exp, 1), NULL_RTX,
+                                          V64SImode,
+                                          EXPAND_NORMAL));
+       emit_insn (gen_ldexpv64df3_exec
+                  (target, arg1, arg2, gcn_gen_undef (V64DFmode), exec));
+       return target;
+      }
+    case GCN_BUILTIN_FREXPVF_EXP:
+      {
+       if (ignore)
+         return target;
+       rtx exec = gcn_full_exec_reg ();
+       rtx arg = force_reg (V64SFmode,
+                            expand_expr (CALL_EXPR_ARG (exp, 0), NULL_RTX,
+                                         V64SFmode,
+                                         EXPAND_NORMAL));
+       emit_insn (gen_frexpv64sf_exp2_exec
+                  (target, arg, gcn_gen_undef (V64SImode), exec));
+       return target;
+      }
+    case GCN_BUILTIN_FREXPVF_MANT:
+      {
+       if (ignore)
+         return target;
+       rtx exec = gcn_full_exec_reg ();
+       rtx arg = force_reg (V64SFmode,
+                            expand_expr (CALL_EXPR_ARG (exp, 0), NULL_RTX,
+                                         V64SFmode,
+                                         EXPAND_NORMAL));
+       emit_insn (gen_frexpv64sf_mant2_exec
+                  (target, arg, gcn_gen_undef (V64SFmode), exec));
+       return target;
+      }
+    case GCN_BUILTIN_FREXPV_EXP:
+      {
+       if (ignore)
+         return target;
+       rtx exec = gcn_full_exec_reg ();
+       rtx arg = force_reg (V64DFmode,
+                            expand_expr (CALL_EXPR_ARG (exp, 0), NULL_RTX,
+                                         V64DFmode,
+                                         EXPAND_NORMAL));
+       emit_insn (gen_frexpv64df_exp2_exec
+                  (target, arg, gcn_gen_undef (V64SImode), exec));
+       return target;
+      }
+    case GCN_BUILTIN_FREXPV_MANT:
+      {
+       if (ignore)
+         return target;
+       rtx exec = gcn_full_exec_reg ();
+       rtx arg = force_reg (V64DFmode,
+                            expand_expr (CALL_EXPR_ARG (exp, 0), NULL_RTX,
+                                         V64DFmode,
+                                         EXPAND_NORMAL));
+       emit_insn (gen_frexpv64df_mant2_exec
+                  (target, arg, gcn_gen_undef (V64DFmode), exec));
+       return target;
+      }
     case GCN_BUILTIN_OMP_DIM_SIZE:
       {
        if (ignore)
@@ -6507,7 +6617,7 @@ print_operand (FILE *file, rtx x, int code)
              str = "-4.0";
              break;
            case 248:
-             str = "1/pi";
+             str = "0.15915494";
              break;
            default:
              rtx ix = simplify_gen_subreg (GET_MODE (x) == DFmode
index 70d88adc59a86f8d035e0bfb49605c54308cf47d..d13e7d2678d087db07610f3e5d25f8343055eb39 100644 (file)
@@ -82,7 +82,9 @@
   UNSPEC_GATHER
   UNSPEC_SCATTER
   UNSPEC_RCP
-  UNSPEC_FLBIT_INT])
+  UNSPEC_FLBIT_INT
+  UNSPEC_FLOOR UNSPEC_CEIL UNSPEC_SIN UNSPEC_COS UNSPEC_EXP2 UNSPEC_LOG2
+  UNSPEC_LDEXP UNSPEC_FREXP_EXP UNSPEC_FREXP_MANT])
 
 ;; }}}
 ;; {{{ Attributes