(define_mode_iterator VHF_32_64 [V2HF (V4HF "TARGET_MMX_WITH_SSE")])
+(define_mode_iterator VBF_32_64 [V2BF (V4BF "TARGET_MMX_WITH_SSE")])
+
(define_expand "divv4hf3"
[(set (match_operand:V4HF 0 "register_operand")
(div:V4HF
DONE;
})
+;; VDIVNEPBF16 does not generate floating point exceptions.
+(define_expand "<insn><mode>3"
+ [(set (match_operand:VBF_32_64 0 "register_operand")
+ (plusminusmultdiv:VBF_32_64
+ (match_operand:VBF_32_64 1 "nonimmediate_operand")
+ (match_operand:VBF_32_64 2 "nonimmediate_operand")))]
+ "TARGET_AVX10_2_256"
+{
+ rtx op0 = gen_reg_rtx (V8BFmode);
+ rtx op1 = lowpart_subreg (V8BFmode,
+ force_reg (<MODE>mode, operands[1]), <MODE>mode);
+ rtx op2 = lowpart_subreg (V8BFmode,
+ force_reg (<MODE>mode, operands[2]), <MODE>mode);
+
+ emit_insn (gen_<insn>v8bf3 (op0, op1, op2));
+
+ emit_move_insn (operands[0], lowpart_subreg (<MODE>mode, op0, V8BFmode));
+ DONE;
+})
+
(define_expand "divv2hf3"
[(set (match_operand:V2HF 0 "register_operand")
(div:V2HF
DONE;
})
+(define_expand "sqrt<mode>2"
+ [(set (match_operand:VBF_32_64 0 "register_operand")
+ (sqrt:VBF_32_64 (match_operand:VBF_32_64 1 "vector_operand")))]
+ "TARGET_AVX10_2_256"
+{
+ rtx op0 = gen_reg_rtx (V8BFmode);
+ rtx op1 = lowpart_subreg (V8BFmode,
+ force_reg (<MODE>mode, operands[1]), <MODE>mode);
+
+ emit_insn (gen_sqrtv8bf2 (op0, op1));
+
+ emit_move_insn (operands[0], lowpart_subreg (<MODE>mode, op0, V8BFmode));
+ DONE;
+})
+
(define_expand "<code><mode>2"
[(set (match_operand:VHF_32_64 0 "register_operand")
(absneg:VHF_32_64
--- /dev/null
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-mavx10.2 -O2" } */
+/* { dg-final { scan-assembler-times "vmulnepbf16\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 2 } } */
+/* { dg-final { scan-assembler-times "vrcppbf16\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 2 } } */
+
+typedef __bf16 v4bf __attribute__ ((__vector_size__ (8)));
+typedef __bf16 v2bf __attribute__ ((__vector_size__ (4)));
+
+
+__attribute__((optimize("fast-math")))
+v4bf
+foo_div_fast_math_4 (v4bf a, v4bf b)
+{
+ return a / b;
+}
+
+__attribute__((optimize("fast-math")))
+v2bf
+foo_div_fast_math_2 (v2bf a, v2bf b)
+{
+ return a / b;
+}
--- /dev/null
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-mavx10.2 -O2" } */
+/* { dg-final { scan-assembler-times "vmulnepbf16\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 2 } } */
+/* { dg-final { scan-assembler-times "vaddnepbf16\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 2 } } */
+/* { dg-final { scan-assembler-times "vdivnepbf16\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 2 } } */
+/* { dg-final { scan-assembler-times "vsubnepbf16\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 2 } } */
+
+typedef __bf16 v4bf __attribute__ ((__vector_size__ (8)));
+typedef __bf16 v2bf __attribute__ ((__vector_size__ (4)));
+
+v4bf
+foo_mul_4 (v4bf a, v4bf b)
+{
+ return a * b;
+}
+
+v4bf
+foo_add_4 (v4bf a, v4bf b)
+{
+ return a + b;
+}
+
+v4bf
+foo_div_4 (v4bf a, v4bf b)
+{
+ return a / b;
+}
+
+v4bf
+foo_sub_4 (v4bf a, v4bf b)
+{
+ return a - b;
+}
+
+v2bf
+foo_mul_2 (v2bf a, v2bf b)
+{
+ return a * b;
+}
+
+v2bf
+foo_add_2 (v2bf a, v2bf b)
+{
+ return a + b;
+}
+
+v2bf
+foo_div_2 (v2bf a, v2bf b)
+{
+ return a / b;
+}
+
+v2bf
+foo_sub_2 (v2bf a, v2bf b)
+{
+ return a - b;
+}