;; Mapping of vector float modes to an integer mode of the same size
(define_mode_attr mmxintvecmode
[(V2SF "V2SI") (V2SI "V2SI") (V4HI "V4HI") (V8QI "V8QI")
- (V4HF "V4HI") (V2HF "V2HI")])
+ (V4HF "V4HI") (V2HF "V2HI") (V4BF "V4HI") (V2BF "V2HI")])
(define_mode_attr mmxintvecmodelower
[(V2SF "v2si") (V2SI "v2si") (V4HI "v4hi") (V8QI "v8qi")
DONE;
})
+(define_mode_iterator VHBF_32_64
+ [V2BF (V4BF "TARGET_MMX_WITH_SSE")
+ V2HF (V4HF "TARGET_MMX_WITH_SSE")])
+
(define_expand "<code><mode>2"
- [(set (match_operand:VHF_32_64 0 "register_operand")
- (absneg:VHF_32_64
- (match_operand:VHF_32_64 1 "register_operand")))]
+ [(set (match_operand:VHBF_32_64 0 "register_operand")
+ (absneg:VHBF_32_64
+ (match_operand:VHBF_32_64 1 "register_operand")))]
"TARGET_SSE"
"ix86_expand_fp_absneg_operator (<CODE>, <MODE>mode, operands); DONE;")
(define_insn_and_split "*mmx_<code><mode>"
- [(set (match_operand:VHF_32_64 0 "register_operand" "=x,x,x")
- (absneg:VHF_32_64
- (match_operand:VHF_32_64 1 "register_operand" "0,x,x")))
- (use (match_operand:VHF_32_64 2 "register_operand" "x,0,x"))]
+ [(set (match_operand:VHBF_32_64 0 "register_operand" "=x,x,x")
+ (absneg:VHBF_32_64
+ (match_operand:VHBF_32_64 1 "register_operand" "0,x,x")))
+ (use (match_operand:VHBF_32_64 2 "register_operand" "x,0,x"))]
"TARGET_SSE"
"#"
"&& reload_completed"
[(set_attr "isa" "noavx,noavx,avx")])
(define_insn_and_split "*mmx_nabs<mode>2"
- [(set (match_operand:VHF_32_64 0 "register_operand" "=x,x,x")
- (neg:VHF_32_64
- (abs:VHF_32_64
- (match_operand:VHF_32_64 1 "register_operand" "0,x,x"))))
- (use (match_operand:VHF_32_64 2 "register_operand" "x,0,x"))]
+ [(set (match_operand:VHBF_32_64 0 "register_operand" "=x,x,x")
+ (neg:VHBF_32_64
+ (abs:VHBF_32_64
+ (match_operand:VHBF_32_64 1 "register_operand" "0,x,x"))))
+ (use (match_operand:VHBF_32_64 2 "register_operand" "x,0,x"))]
"TARGET_SSE"
"#"
"&& reload_completed"
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(define_insn "*mmx_andnot<mode>3"
- [(set (match_operand:VHF_32_64 0 "register_operand" "=x,x")
- (and:VHF_32_64
- (not:VHF_32_64
- (match_operand:VHF_32_64 1 "register_operand" "0,x"))
- (match_operand:VHF_32_64 2 "register_operand" "x,x")))]
+ [(set (match_operand:VHBF_32_64 0 "register_operand" "=x,x")
+ (and:VHBF_32_64
+ (not:VHBF_32_64
+ (match_operand:VHBF_32_64 1 "register_operand" "0,x"))
+ (match_operand:VHBF_32_64 2 "register_operand" "x,x")))]
"TARGET_SSE"
"@
andnps\t{%2, %0|%0, %2}
(set_attr "mode" "V4SF")])
(define_insn "<code><mode>3"
- [(set (match_operand:VHF_32_64 0 "register_operand" "=x,x")
- (any_logic:VHF_32_64
- (match_operand:VHF_32_64 1 "register_operand" "%0,x")
- (match_operand:VHF_32_64 2 "register_operand" " x,x")))]
+ [(set (match_operand:VHBF_32_64 0 "register_operand" "=x,x")
+ (any_logic:VHBF_32_64
+ (match_operand:VHBF_32_64 1 "register_operand" "%0,x")
+ (match_operand:VHBF_32_64 2 "register_operand" " x,x")))]
"TARGET_SSE"
"@
<logic>ps\t{%2, %0|%0, %2}
(define_expand "copysign<mode>3"
[(set (match_dup 4)
- (and:VHF_32_64
- (not:VHF_32_64 (match_dup 3))
- (match_operand:VHF_32_64 1 "register_operand")))
+ (and:VHBF_32_64
+ (not:VHBF_32_64 (match_dup 3))
+ (match_operand:VHBF_32_64 1 "register_operand")))
(set (match_dup 5)
- (and:VHF_32_64 (match_dup 3)
- (match_operand:VHF_32_64 2 "register_operand")))
- (set (match_operand:VHF_32_64 0 "register_operand")
- (ior:VHF_32_64 (match_dup 4) (match_dup 5)))]
+ (and:VHBF_32_64 (match_dup 3)
+ (match_operand:VHBF_32_64 2 "register_operand")))
+ (set (match_operand:VHBF_32_64 0 "register_operand")
+ (ior:VHBF_32_64 (match_dup 4) (match_dup 5)))]
"TARGET_SSE"
{
operands[3] = ix86_build_signbit_mask (<MODE>mode, true, false);
(define_expand "xorsign<mode>3"
[(set (match_dup 4)
- (and:VHF_32_64 (match_dup 3)
- (match_operand:VHF_32_64 2 "register_operand")))
- (set (match_operand:VHF_32_64 0 "register_operand")
- (xor:VHF_32_64 (match_dup 4)
- (match_operand:VHF_32_64 1 "register_operand")))]
+ (and:VHBF_32_64 (match_dup 3)
+ (match_operand:VHBF_32_64 2 "register_operand")))
+ (set (match_operand:VHBF_32_64 0 "register_operand")
+ (xor:VHBF_32_64 (match_dup 4)
+ (match_operand:VHBF_32_64 1 "register_operand")))]
"TARGET_SSE"
{
operands[3] = ix86_build_signbit_mask (<MODE>mode, true, false);
[(set (match_operand:<mmxintvecmode> 0 "register_operand")
(lshiftrt:<mmxintvecmode>
(subreg:<mmxintvecmode>
- (match_operand:VHF_32_64 1 "register_operand") 0)
+ (match_operand:VHBF_32_64 1 "register_operand") 0)
(match_dup 2)))]
"TARGET_SSE2"
{
--- /dev/null
+/* { dg-do run } */
+/* { dg-options "-O1 -fdump-tree-vect-details -fdump-tree-slp-details -fdump-tree-optimized" } */
+
+extern void abort (void);
+static void do_test (void);
+
+#define DO_TEST do_test
+#define AVX512BF16
+#include "avx512-check.h"
+
+__bf16 b_32[2], r_abs_32[2], r_neg_32[2];
+__bf16 b_64[4], r_abs_64[4], r_neg_64[4];
+
+void
+__attribute__((optimize ("O2"), noinline, noipa, noclone, no_icf))
+abs_32 (void)
+{
+ for (int i = 0; i < 2; i++)
+ r_abs_32[i] = __builtin_fabsf16 (b_32[i]);
+}
+
+void
+__attribute__((optimize ("O2"), noinline, noipa, noclone, no_icf))
+neg_32 (void)
+{
+ for (int i = 0; i < 2; i++)
+ r_neg_32[i] = -b_32[i];
+}
+
+void
+__attribute__((optimize ("O2"), noinline, noipa, noclone, no_icf))
+abs_64 (void)
+{
+ for (int i = 0; i < 4; i++)
+ r_abs_64[i] = __builtin_fabsf16 (b_64[i]);
+}
+
+void
+__attribute__((optimize ("O2"), noinline, noipa, noclone, no_icf))
+neg_64 (void)
+{
+ for (int i = 0; i < 4; i++)
+ r_neg_64[i] = -b_64[i];
+}
+
+void
+check_absneg_results (__bf16 *b, __bf16 *r_abs, __bf16 *r_neg, int len)
+{
+ for (int i = 0; i < len; i++)
+ {
+ __bf16 expected_abs = __builtin_fabsf16 (b[i]);
+ __bf16 expected_neg = -b[i];
+ if (r_abs[i] != expected_abs || r_neg[i] != expected_neg)
+ abort ();
+ }
+}
+
+static void
+__attribute__ ((noinline, noclone))
+do_test (void)
+{
+ float float_b[16] = {-1.2f, 3.4f, -5.6f, 7.8f};
+
+ for (int i = 0; i < 2; i++)
+ b_32[i] = (__bf16) float_b[i];
+
+ for (int i = 0; i < 4; i++)
+ b_64[i] = (__bf16) float_b[i];
+
+ abs_32 ();
+ neg_32 ();
+ check_absneg_results (b_32, r_abs_32, r_neg_32, 2);
+
+ abs_64 ();
+ neg_64 ();
+ check_absneg_results (b_64, r_abs_64, r_neg_64, 4);
+}
+
+/* { dg-final { scan-tree-dump-times "vectorized using 4 byte vectors" 2 "slp1" } } */
+/* { dg-final { scan-tree-dump-times "loop vectorized using 8 byte vectors" 2 "vect" { target { ! ia32 } } } } */
+/* { dg-final { scan-tree-dump-times {(?n)ABS_EXPR <vect} 2 "optimized" { target { ! ia32 } } } } */