_A2 (GCN_BTI_SF, GCN_BTI_SF),
gcn_expand_builtin_1)
+DEF_BUILTIN (FABSVF, 3 /*CODE_FOR_fabsvf */,
+ "fabsvf", B_INSN,
+ _A2 (GCN_BTI_V64SF, GCN_BTI_V64SF),
+ gcn_expand_builtin_1)
+
+DEF_BUILTIN (LDEXPVF, 3 /*CODE_FOR_ldexpvf */,
+ "ldexpvf", B_INSN,
+ _A3 (GCN_BTI_V64SF, GCN_BTI_V64SF, GCN_BTI_V64SI),
+ gcn_expand_builtin_1)
+
+DEF_BUILTIN (LDEXPV, 3 /*CODE_FOR_ldexpv */,
+ "ldexpv", B_INSN,
+ _A3 (GCN_BTI_V64DF, GCN_BTI_V64DF, GCN_BTI_V64SI),
+ gcn_expand_builtin_1)
+
+DEF_BUILTIN (FREXPVF_EXP, 3 /*CODE_FOR_frexpvf_exp */,
+ "frexpvf_exp", B_INSN,
+ _A2 (GCN_BTI_V64SI, GCN_BTI_V64SF),
+ gcn_expand_builtin_1)
+
+DEF_BUILTIN (FREXPVF_MANT, 3 /*CODE_FOR_frexpvf_mant */,
+ "frexpvf_mant", B_INSN,
+ _A2 (GCN_BTI_V64SF, GCN_BTI_V64SF),
+ gcn_expand_builtin_1)
+
+DEF_BUILTIN (FREXPV_EXP, 3 /*CODE_FOR_frexpv_exp */,
+ "frexpv_exp", B_INSN,
+ _A2 (GCN_BTI_V64SI, GCN_BTI_V64DF),
+ gcn_expand_builtin_1)
+
+DEF_BUILTIN (FREXPV_MANT, 3 /*CODE_FOR_frexpv_mant */,
+ "frexpv_mant", B_INSN,
+ _A2 (GCN_BTI_V64DF, GCN_BTI_V64DF),
+ gcn_expand_builtin_1)
+
DEF_BUILTIN (CMP_SWAP, -1,
"cmp_swap", B_INSN,
_A4 (GCN_BTI_UINT, GCN_BTI_VOIDPTR, GCN_BTI_UINT, GCN_BTI_UINT),
extern void gcn_hsa_declare_function_name (FILE *file, const char *name,
tree decl);
extern HOST_WIDE_INT gcn_initial_elimination_offset (int, int);
+extern REAL_VALUE_TYPE gcn_dconst1over2pi (void);
extern bool gcn_inline_constant64_p (rtx, bool);
extern bool gcn_inline_constant_p (rtx);
extern int gcn_inline_fp_constant_p (rtx, bool);
[(set_attr "type" "vop1")
(set_attr "length" "8")])
+; These FP unops have f64, f32 and f16 versions.
+(define_int_iterator MATH_UNOP_1OR2REG
+ [UNSPEC_FLOOR UNSPEC_CEIL])
+
+; These FP unops only have f16/f32 versions.
+(define_int_iterator MATH_UNOP_1REG
+ [UNSPEC_EXP2 UNSPEC_LOG2])
+
+(define_int_iterator MATH_UNOP_TRIG
+ [UNSPEC_SIN UNSPEC_COS])
+
+(define_int_attr math_unop
+ [(UNSPEC_FLOOR "floor")
+ (UNSPEC_CEIL "ceil")
+ (UNSPEC_EXP2 "exp2")
+ (UNSPEC_LOG2 "log2")
+ (UNSPEC_SIN "sin")
+ (UNSPEC_COS "cos")])
+
+(define_insn "<math_unop><mode>2"
+ [(set (match_operand:FP 0 "register_operand" "= v")
+ (unspec:FP
+ [(match_operand:FP 1 "gcn_alu_operand" "vSvB")]
+ MATH_UNOP_1OR2REG))]
+ ""
+ "v_<math_unop>%i0\t%0, %1"
+ [(set_attr "type" "vop1")
+ (set_attr "length" "8")])
+
+(define_insn "<math_unop><mode>2<exec>"
+ [(set (match_operand:V_FP 0 "register_operand" "= v")
+ (unspec:V_FP
+ [(match_operand:V_FP 1 "gcn_alu_operand" "vSvB")]
+ MATH_UNOP_1OR2REG))]
+ ""
+ "v_<math_unop>%i0\t%0, %1"
+ [(set_attr "type" "vop1")
+ (set_attr "length" "8")])
+
+(define_insn "<math_unop><mode>2"
+ [(set (match_operand:FP_1REG 0 "register_operand" "= v")
+ (unspec:FP_1REG
+ [(match_operand:FP_1REG 1 "gcn_alu_operand" "vSvB")]
+ MATH_UNOP_1REG))]
+ "flag_unsafe_math_optimizations"
+ "v_<math_unop>%i0\t%0, %1"
+ [(set_attr "type" "vop1")
+ (set_attr "length" "8")])
+
+(define_insn "<math_unop><mode>2<exec>"
+ [(set (match_operand:V_FP_1REG 0 "register_operand" "= v")
+ (unspec:V_FP_1REG
+ [(match_operand:V_FP_1REG 1 "gcn_alu_operand" "vSvB")]
+ MATH_UNOP_1REG))]
+ "flag_unsafe_math_optimizations"
+ "v_<math_unop>%i0\t%0, %1"
+ [(set_attr "type" "vop1")
+ (set_attr "length" "8")])
+
+(define_insn "*<math_unop><mode>2_insn"
+ [(set (match_operand:FP_1REG 0 "register_operand" "= v")
+ (unspec:FP_1REG
+ [(match_operand:FP_1REG 1 "gcn_alu_operand" "vSvB")]
+ MATH_UNOP_TRIG))]
+ "flag_unsafe_math_optimizations"
+ "v_<math_unop>%i0\t%0, %1"
+ [(set_attr "type" "vop1")
+ (set_attr "length" "8")])
+
+(define_insn "*<math_unop><mode>2<exec>_insn"
+ [(set (match_operand:V_FP_1REG 0 "register_operand" "= v")
+ (unspec:V_FP_1REG
+ [(match_operand:V_FP_1REG 1 "gcn_alu_operand" "vSvB")]
+ MATH_UNOP_TRIG))]
+ "flag_unsafe_math_optimizations"
+ "v_<math_unop>%i0\t%0, %1"
+ [(set_attr "type" "vop1")
+ (set_attr "length" "8")])
+
+; Trigonometric functions need their input scaled by 1/(2*PI) first.
+
+(define_expand "<math_unop><mode>2"
+ [(set (match_dup 2)
+ (mult:FP_1REG
+ (match_dup 3)
+ (match_operand:FP_1REG 1 "gcn_alu_operand")))
+ (set (match_operand:FP_1REG 0 "register_operand")
+ (unspec:FP_1REG
+ [(match_dup 2)]
+ MATH_UNOP_TRIG))]
+ "flag_unsafe_math_optimizations"
+ {
+ operands[2] = gen_reg_rtx (<MODE>mode);
+ operands[3] = const_double_from_real_value (gcn_dconst1over2pi (),
+ <MODE>mode);
+ })
+
+(define_expand "<math_unop><mode>2<exec>"
+ [(set (match_dup 2)
+ (mult:V_FP_1REG
+ (match_dup 3)
+ (match_operand:V_FP_1REG 1 "gcn_alu_operand")))
+ (set (match_operand:V_FP_1REG 0 "register_operand")
+ (unspec:V_FP_1REG
+ [(match_dup 2)]
+ MATH_UNOP_TRIG))]
+ "flag_unsafe_math_optimizations"
+ {
+ operands[2] = gen_reg_rtx (<MODE>mode);
+ operands[3] =
+ gcn_vec_constant (<MODE>mode,
+ const_double_from_real_value (gcn_dconst1over2pi (),
+ <SCALAR_MODE>mode));
+ })
+
+; Implement ldexp pattern
+
+(define_insn "ldexp<mode>3"
+ [(set (match_operand:FP 0 "register_operand" "=v")
+ (unspec:FP
+ [(match_operand:FP 1 "gcn_alu_operand" "vB")
+ (match_operand:SI 2 "gcn_alu_operand" "vSvA")]
+ UNSPEC_LDEXP))]
+ ""
+ "v_ldexp%i0\t%0, %1, %2"
+ [(set_attr "type" "vop3a")
+ (set_attr "length" "8")])
+
+(define_insn "ldexp<mode>3<exec>"
+ [(set (match_operand:V_FP 0 "register_operand" "=v")
+ (unspec:V_FP
+ [(match_operand:V_FP 1 "gcn_alu_operand" "vB")
+ (match_operand:V64SI 2 "gcn_alu_operand" "vSvA")]
+ UNSPEC_LDEXP))]
+ ""
+ "v_ldexp%i0\t%0, %1, %2"
+ [(set_attr "type" "vop3a")
+ (set_attr "length" "8")])
+
+; Implement frexp patterns
+
+(define_insn "frexp<mode>_exp2"
+ [(set (match_operand:SI 0 "register_operand" "=v")
+ (unspec:SI
+ [(match_operand:FP 1 "gcn_alu_operand" "vB")]
+ UNSPEC_FREXP_EXP))]
+ ""
+ "v_frexp_exp_i32%i1\t%0, %1"
+ [(set_attr "type" "vop1")
+ (set_attr "length" "8")])
+
+(define_insn "frexp<mode>_mant2"
+ [(set (match_operand:FP 0 "register_operand" "=v")
+ (unspec:FP
+ [(match_operand:FP 1 "gcn_alu_operand" "vB")]
+ UNSPEC_FREXP_MANT))]
+ ""
+ "v_frexp_mant%i1\t%0, %1"
+ [(set_attr "type" "vop1")
+ (set_attr "length" "8")])
+
+(define_insn "frexp<mode>_exp2<exec>"
+ [(set (match_operand:V64SI 0 "register_operand" "=v")
+ (unspec:V64SI
+ [(match_operand:V_FP 1 "gcn_alu_operand" "vB")]
+ UNSPEC_FREXP_EXP))]
+ ""
+ "v_frexp_exp_i32%i1\t%0, %1"
+ [(set_attr "type" "vop1")
+ (set_attr "length" "8")])
+
+(define_insn "frexp<mode>_mant2<exec>"
+ [(set (match_operand:V_FP 0 "register_operand" "=v")
+ (unspec:V_FP
+ [(match_operand:V_FP 1 "gcn_alu_operand" "vB")]
+ UNSPEC_FREXP_MANT))]
+ ""
+ "v_frexp_mant%i1\t%0, %1"
+ [(set_attr "type" "vop1")
+ (set_attr "length" "8")])
+
;; }}}
;; {{{ FP fused multiply and add
/* FIXME: this constant probably does not match what hardware really loads.
Reality check it eventually. */
real_from_string (&dconst1over2pi,
- "0.1591549430918953357663423455968866839");
+ "0.15915494309189532");
real_convert (&dconst1over2pi, SFmode, &dconst1over2pi);
ext_gcn_constants_init = 1;
}
+REAL_VALUE_TYPE
+gcn_dconst1over2pi (void)
+{
+ if (!ext_gcn_constants_init)
+ init_ext_gcn_constants ();
+ return dconst1over2pi;
+}
+
/* Return non-zero if X is a constant that can appear as an inline operand.
This is 0, 0.5, -0.5, 1, -1, 2, -2, 4,-4, 1/(2*pi)
Or a vector of those.
GCN_BTI_SF,
GCN_BTI_V64SI,
GCN_BTI_V64SF,
+ GCN_BTI_V64DF,
GCN_BTI_V64PTR,
GCN_BTI_SIPTR,
GCN_BTI_SFPTR,
#define sf_type_node (gcn_builtin_types[GCN_BTI_SF])
#define v64si_type_node (gcn_builtin_types[GCN_BTI_V64SI])
#define v64sf_type_node (gcn_builtin_types[GCN_BTI_V64SF])
+#define v64df_type_node (gcn_builtin_types[GCN_BTI_V64DF])
#define v64ptr_type_node (gcn_builtin_types[GCN_BTI_V64PTR])
#define siptr_type_node (gcn_builtin_types[GCN_BTI_SIPTR])
#define sfptr_type_node (gcn_builtin_types[GCN_BTI_SFPTR])
sf_type_node = float32_type_node;
v64si_type_node = build_vector_type (intSI_type_node, 64);
v64sf_type_node = build_vector_type (float_type_node, 64);
+ v64df_type_node = build_vector_type (double_type_node, 64);
v64ptr_type_node = build_vector_type (unsigned_intDI_type_node
/*build_pointer_type
(integer_type_node) */
emit_insn (gen_sqrtsf2 (target, arg));
return target;
}
+ case GCN_BUILTIN_FABSVF:
+ {
+ if (ignore)
+ return target;
+ rtx exec = gcn_full_exec_reg ();
+ rtx arg = force_reg (V64SFmode,
+ expand_expr (CALL_EXPR_ARG (exp, 0), NULL_RTX,
+ V64SFmode,
+ EXPAND_NORMAL));
+ emit_insn (gen_absv64sf2_exec
+ (target, arg, gcn_gen_undef (V64SFmode), exec));
+ return target;
+ }
+ case GCN_BUILTIN_LDEXPVF:
+ {
+ if (ignore)
+ return target;
+ rtx exec = gcn_full_exec_reg ();
+ rtx arg1 = force_reg (V64SFmode,
+ expand_expr (CALL_EXPR_ARG (exp, 0), NULL_RTX,
+ V64SFmode,
+ EXPAND_NORMAL));
+ rtx arg2 = force_reg (V64SImode,
+ expand_expr (CALL_EXPR_ARG (exp, 1), NULL_RTX,
+ V64SImode,
+ EXPAND_NORMAL));
+ emit_insn (gen_ldexpv64sf3_exec
+ (target, arg1, arg2, gcn_gen_undef (V64SFmode), exec));
+ return target;
+ }
+ case GCN_BUILTIN_LDEXPV:
+ {
+ if (ignore)
+ return target;
+ rtx exec = gcn_full_exec_reg ();
+ rtx arg1 = force_reg (V64DFmode,
+ expand_expr (CALL_EXPR_ARG (exp, 0), NULL_RTX,
+ V64SFmode,
+ EXPAND_NORMAL));
+ rtx arg2 = force_reg (V64SImode,
+ expand_expr (CALL_EXPR_ARG (exp, 1), NULL_RTX,
+ V64SImode,
+ EXPAND_NORMAL));
+ emit_insn (gen_ldexpv64df3_exec
+ (target, arg1, arg2, gcn_gen_undef (V64DFmode), exec));
+ return target;
+ }
+ case GCN_BUILTIN_FREXPVF_EXP:
+ {
+ if (ignore)
+ return target;
+ rtx exec = gcn_full_exec_reg ();
+ rtx arg = force_reg (V64SFmode,
+ expand_expr (CALL_EXPR_ARG (exp, 0), NULL_RTX,
+ V64SFmode,
+ EXPAND_NORMAL));
+ emit_insn (gen_frexpv64sf_exp2_exec
+ (target, arg, gcn_gen_undef (V64SImode), exec));
+ return target;
+ }
+ case GCN_BUILTIN_FREXPVF_MANT:
+ {
+ if (ignore)
+ return target;
+ rtx exec = gcn_full_exec_reg ();
+ rtx arg = force_reg (V64SFmode,
+ expand_expr (CALL_EXPR_ARG (exp, 0), NULL_RTX,
+ V64SFmode,
+ EXPAND_NORMAL));
+ emit_insn (gen_frexpv64sf_mant2_exec
+ (target, arg, gcn_gen_undef (V64SFmode), exec));
+ return target;
+ }
+ case GCN_BUILTIN_FREXPV_EXP:
+ {
+ if (ignore)
+ return target;
+ rtx exec = gcn_full_exec_reg ();
+ rtx arg = force_reg (V64DFmode,
+ expand_expr (CALL_EXPR_ARG (exp, 0), NULL_RTX,
+ V64DFmode,
+ EXPAND_NORMAL));
+ emit_insn (gen_frexpv64df_exp2_exec
+ (target, arg, gcn_gen_undef (V64SImode), exec));
+ return target;
+ }
+ case GCN_BUILTIN_FREXPV_MANT:
+ {
+ if (ignore)
+ return target;
+ rtx exec = gcn_full_exec_reg ();
+ rtx arg = force_reg (V64DFmode,
+ expand_expr (CALL_EXPR_ARG (exp, 0), NULL_RTX,
+ V64DFmode,
+ EXPAND_NORMAL));
+ emit_insn (gen_frexpv64df_mant2_exec
+ (target, arg, gcn_gen_undef (V64DFmode), exec));
+ return target;
+ }
case GCN_BUILTIN_OMP_DIM_SIZE:
{
if (ignore)
str = "-4.0";
break;
case 248:
- str = "1/pi";
+ str = "0.15915494";
break;
default:
rtx ix = simplify_gen_subreg (GET_MODE (x) == DFmode
UNSPEC_GATHER
UNSPEC_SCATTER
UNSPEC_RCP
- UNSPEC_FLBIT_INT])
+ UNSPEC_FLBIT_INT
+ UNSPEC_FLOOR UNSPEC_CEIL UNSPEC_SIN UNSPEC_COS UNSPEC_EXP2 UNSPEC_LOG2
+ UNSPEC_LDEXP UNSPEC_FREXP_EXP UNSPEC_FREXP_MANT])
;; }}}
;; {{{ Attributes