i386: Support partial signbit/xorsign/copysign/abs/neg/and/xor/ior/andn for V2BF...

author Levy Hsu <admin@levyhsu.com>

Mon, 2 Sep 2024 03:57:46 +0000 (13:27 +0930)

committer Levy Hsu <admin@levyhsu.com>

Thu, 5 Sep 2024 01:40:25 +0000 (01:40 +0000)
author Levy Hsu <admin@levyhsu.com>
Mon, 2 Sep 2024 03:57:46 +0000 (13:27 +0930)
committer Levy Hsu <admin@levyhsu.com>
Thu, 5 Sep 2024 01:40:25 +0000 (01:40 +0000)
diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc

index c18a2647c2a4da5c6667cc16a37bd577d4f956f6..707b75a6d5dbebc607f3ce56b33e558aa7827446 100644 (file)
--- a/gcc/config/i386/i386.cc
+++ b/gcc/config/i386/i386.cc
@@ -16187,6 +16187,8 @@ ix86_build_const_vector (machine_mode mode, bool vect, rtx value)
      case E_V32BFmode:
      case E_V16BFmode:
      case E_V8BFmode:
+    case E_V4BFmode:
+    case E_V2BFmode:
        n_elt = GET_MODE_NUNITS (mode);
        v = rtvec_alloc (n_elt);
        scalar_mode = GET_MODE_INNER (mode);
@@ -16226,6 +16228,8 @@ ix86_build_signbit_mask (machine_mode mode, bool vect, bool invert)
      case E_V32BFmode:
      case E_V16BFmode:
      case E_V8BFmode:
+    case E_V4BFmode:
+    case E_V2BFmode:
        vec_mode = mode;
        imode = HImode;
        break;
diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md

index fac90cfd4d4cc34ca8d8e4c9bc0bf55aedfc1be9..0cfa9bdabc3508c498fd2cea4f954b5704ed1ff1 100644 (file)
--- a/gcc/config/i386/mmx.md
+++ b/gcc/config/i386/mmx.md
@@ -121,7 +121,7 @@
  ;; Mapping of vector float modes to an integer mode of the same size
  (define_mode_attr mmxintvecmode
    [(V2SF "V2SI") (V2SI "V2SI") (V4HI "V4HI") (V8QI "V8QI")
-   (V4HF "V4HI") (V2HF "V2HI")])
+   (V4HF "V4HI") (V2HF "V2HI") (V4BF "V4HI") (V2BF "V2HI")])
  
  (define_mode_attr mmxintvecmodelower
    [(V2SF "v2si") (V2SI "v2si") (V4HI "v4hi") (V8QI "v8qi")
@@ -2147,18 +2147,22 @@
    DONE;
  })
  
+(define_mode_iterator VHBF_32_64
+ [V2BF (V4BF "TARGET_MMX_WITH_SSE")
+  V2HF (V4HF "TARGET_MMX_WITH_SSE")]) 
+
  (define_expand "<code><mode>2"
-  [(set (match_operand:VHF_32_64 0 "register_operand")
-       (absneg:VHF_32_64
-         (match_operand:VHF_32_64 1 "register_operand")))]
+  [(set (match_operand:VHBF_32_64 0 "register_operand")
+       (absneg:VHBF_32_64
+         (match_operand:VHBF_32_64 1 "register_operand")))]
    "TARGET_SSE"
    "ix86_expand_fp_absneg_operator (<CODE>, <MODE>mode, operands); DONE;")
  
  (define_insn_and_split "*mmx_<code><mode>"
-  [(set (match_operand:VHF_32_64 0 "register_operand" "=x,x,x")
-       (absneg:VHF_32_64
-         (match_operand:VHF_32_64 1 "register_operand" "0,x,x")))
-   (use (match_operand:VHF_32_64 2 "register_operand" "x,0,x"))]
+  [(set (match_operand:VHBF_32_64 0 "register_operand" "=x,x,x")
+       (absneg:VHBF_32_64
+         (match_operand:VHBF_32_64 1 "register_operand" "0,x,x")))
+   (use (match_operand:VHBF_32_64 2 "register_operand" "x,0,x"))]
    "TARGET_SSE"
    "#"
    "&& reload_completed"
@@ -2171,11 +2175,11 @@
    [(set_attr "isa" "noavx,noavx,avx")])
  
  (define_insn_and_split "*mmx_nabs<mode>2"
-  [(set (match_operand:VHF_32_64 0 "register_operand" "=x,x,x")
-       (neg:VHF_32_64
-         (abs:VHF_32_64
-           (match_operand:VHF_32_64 1 "register_operand" "0,x,x"))))
-   (use (match_operand:VHF_32_64 2 "register_operand" "x,0,x"))]
+  [(set (match_operand:VHBF_32_64 0 "register_operand" "=x,x,x")
+       (neg:VHBF_32_64
+         (abs:VHBF_32_64
+           (match_operand:VHBF_32_64 1 "register_operand" "0,x,x"))))
+   (use (match_operand:VHBF_32_64 2 "register_operand" "x,0,x"))]
    "TARGET_SSE"
    "#"
    "&& reload_completed"
@@ -2466,11 +2470,11 @@
  ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
  
  (define_insn "*mmx_andnot<mode>3"
-  [(set (match_operand:VHF_32_64 0 "register_operand"    "=x,x")
-       (and:VHF_32_64
-         (not:VHF_32_64
-           (match_operand:VHF_32_64 1 "register_operand" "0,x"))
-         (match_operand:VHF_32_64 2 "register_operand"   "x,x")))]
+  [(set (match_operand:VHBF_32_64 0 "register_operand"    "=x,x")
+       (and:VHBF_32_64
+         (not:VHBF_32_64
+           (match_operand:VHBF_32_64 1 "register_operand" "0,x"))
+         (match_operand:VHBF_32_64 2 "register_operand"   "x,x")))]
    "TARGET_SSE"
    "@
     andnps\t{%2, %0|%0, %2}
@@ -2481,10 +2485,10 @@
     (set_attr "mode" "V4SF")])
  
  (define_insn "<code><mode>3"
-  [(set (match_operand:VHF_32_64 0 "register_operand"   "=x,x")
-       (any_logic:VHF_32_64
-         (match_operand:VHF_32_64 1 "register_operand" "%0,x")
-         (match_operand:VHF_32_64 2 "register_operand" " x,x")))]
+  [(set (match_operand:VHBF_32_64 0 "register_operand"   "=x,x")
+       (any_logic:VHBF_32_64
+         (match_operand:VHBF_32_64 1 "register_operand" "%0,x")
+         (match_operand:VHBF_32_64 2 "register_operand" " x,x")))]
    "TARGET_SSE"
    "@
     <logic>ps\t{%2, %0|%0, %2}
@@ -2496,14 +2500,14 @@
  
  (define_expand "copysign<mode>3"
    [(set (match_dup 4)
-       (and:VHF_32_64
-         (not:VHF_32_64 (match_dup 3))
-         (match_operand:VHF_32_64 1 "register_operand")))
+       (and:VHBF_32_64
+         (not:VHBF_32_64 (match_dup 3))
+         (match_operand:VHBF_32_64 1 "register_operand")))
     (set (match_dup 5)
-       (and:VHF_32_64 (match_dup 3)
-                 (match_operand:VHF_32_64 2 "register_operand")))
-   (set (match_operand:VHF_32_64 0 "register_operand")
-       (ior:VHF_32_64 (match_dup 4) (match_dup 5)))]
+       (and:VHBF_32_64 (match_dup 3)
+                 (match_operand:VHBF_32_64 2 "register_operand")))
+   (set (match_operand:VHBF_32_64 0 "register_operand")
+       (ior:VHBF_32_64 (match_dup 4) (match_dup 5)))]
    "TARGET_SSE"
  {
    operands[3] = ix86_build_signbit_mask (<MODE>mode, true, false);
@@ -2514,11 +2518,11 @@
  
  (define_expand "xorsign<mode>3"
    [(set (match_dup 4)
-       (and:VHF_32_64 (match_dup 3)
-                 (match_operand:VHF_32_64 2 "register_operand")))
-   (set (match_operand:VHF_32_64 0 "register_operand")
-       (xor:VHF_32_64 (match_dup 4)
-                 (match_operand:VHF_32_64 1 "register_operand")))]
+       (and:VHBF_32_64 (match_dup 3)
+                 (match_operand:VHBF_32_64 2 "register_operand")))
+   (set (match_operand:VHBF_32_64 0 "register_operand")
+       (xor:VHBF_32_64 (match_dup 4)
+                 (match_operand:VHBF_32_64 1 "register_operand")))]
    "TARGET_SSE"
  {
    operands[3] = ix86_build_signbit_mask (<MODE>mode, true, false);
@@ -2530,7 +2534,7 @@
    [(set (match_operand:<mmxintvecmode> 0 "register_operand")
         (lshiftrt:<mmxintvecmode>
           (subreg:<mmxintvecmode>
-           (match_operand:VHF_32_64 1 "register_operand") 0)
+           (match_operand:VHBF_32_64 1 "register_operand") 0)
           (match_dup 2)))]
    "TARGET_SSE2"
  {
diff --git a/gcc/testsuite/gcc.target/i386/part-vect-absnegbf.c b/gcc/testsuite/gcc.target/i386/part-vect-absnegbf.c

new file mode 100644 (file)

index 0000000..2d7ae35
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/part-vect-absnegbf.c
@@ -0,0 +1,81 @@
+/* { dg-do run } */
+/* { dg-options "-O1 -fdump-tree-vect-details -fdump-tree-slp-details -fdump-tree-optimized" } */
+
+extern void abort (void);
+static void do_test (void);
+
+#define DO_TEST do_test
+#define AVX512BF16
+#include "avx512-check.h"
+
+__bf16 b_32[2], r_abs_32[2], r_neg_32[2];
+__bf16 b_64[4], r_abs_64[4], r_neg_64[4];
+
+void
+__attribute__((optimize ("O2"), noinline, noipa, noclone, no_icf))
+abs_32 (void)
+{
+  for (int i = 0; i < 2; i++)
+    r_abs_32[i] = __builtin_fabsf16 (b_32[i]);
+}
+
+void
+__attribute__((optimize ("O2"), noinline, noipa, noclone, no_icf))
+neg_32 (void)
+{
+  for (int i = 0; i < 2; i++)
+    r_neg_32[i] = -b_32[i];
+}
+
+void
+__attribute__((optimize ("O2"), noinline, noipa, noclone, no_icf))
+abs_64 (void)
+{
+  for (int i = 0; i < 4; i++)
+    r_abs_64[i] = __builtin_fabsf16 (b_64[i]);
+}
+
+void
+__attribute__((optimize ("O2"), noinline, noipa, noclone, no_icf))
+neg_64 (void)
+{
+  for (int i = 0; i < 4; i++)
+    r_neg_64[i] = -b_64[i];
+}
+
+void
+check_absneg_results (__bf16 *b, __bf16 *r_abs, __bf16 *r_neg, int len)
+{
+  for (int i = 0; i < len; i++)
+    {
+      __bf16 expected_abs = __builtin_fabsf16 (b[i]);
+      __bf16 expected_neg = -b[i];
+      if (r_abs[i] != expected_abs || r_neg[i] != expected_neg)
+        abort ();
+    }
+}
+
+static void
+__attribute__ ((noinline, noclone))
+do_test (void)
+{
+  float float_b[16] = {-1.2f, 3.4f, -5.6f, 7.8f};
+
+  for (int i = 0; i < 2; i++)
+    b_32[i] = (__bf16) float_b[i];
+
+  for (int i = 0; i < 4; i++)
+    b_64[i] = (__bf16) float_b[i];
+
+  abs_32 ();
+  neg_32 ();
+  check_absneg_results (b_32, r_abs_32, r_neg_32, 2);
+
+  abs_64 ();
+  neg_64 ();
+  check_absneg_results (b_64, r_abs_64, r_neg_64, 4);
+}
+
+/* { dg-final { scan-tree-dump-times "vectorized using 4 byte vectors" 2 "slp1" } } */
+/* { dg-final { scan-tree-dump-times "loop vectorized using 8 byte vectors" 2 "vect" { target { ! ia32 } } } } */
+/* { dg-final { scan-tree-dump-times {(?n)ABS_EXPR <vect} 2 "optimized" { target { ! ia32 } } } } */
author	Levy Hsu <admin@levyhsu.com>
	Mon, 2 Sep 2024 03:57:46 +0000 (13:27 +0930)
committer	Levy Hsu <admin@levyhsu.com>
	Thu, 5 Sep 2024 01:40:25 +0000 (01:40 +0000)
gcc/config/i386/i386.cc		patch \| blob \| blame \| history
gcc/config/i386/mmx.md		patch \| blob \| blame \| history
gcc/testsuite/gcc.target/i386/part-vect-absnegbf.c	[new file with mode: 0644]	patch \| blob