AVX512FP16: Enable vec_cmpmn/vcondmn expanders for HF modes.

author Hongyu Wang <hongyu.wang@intel.com>

Fri, 16 Jul 2021 02:01:17 +0000 (10:01 +0800)

committer liuhongt <hongtao.liu@intel.com>

Thu, 23 Sep 2021 09:05:02 +0000 (17:05 +0800)
author Hongyu Wang <hongyu.wang@intel.com>
Fri, 16 Jul 2021 02:01:17 +0000 (10:01 +0800)
committer liuhongt <hongtao.liu@intel.com>
Thu, 23 Sep 2021 09:05:02 +0000 (17:05 +0800)
diff --git a/gcc/config/i386/i386-expand.c b/gcc/config/i386/i386-expand.c

index dbbf5e3465668fc68c47ca24d07f498468bca3fe..94ac303585ec0510e7bf7de15bf329563acb5256 100644 (file)
--- a/gcc/config/i386/i386-expand.c
+++ b/gcc/config/i386/i386-expand.c
@@ -3638,6 +3638,8 @@ ix86_use_mask_cmp_p (machine_mode mode, machine_mode cmp_mode,
      return false;
    else if (vector_size == 64)
      return true;
+  else if (GET_MODE_INNER (cmp_mode) == HFmode)
+    return true;
  
    /* When op_true is NULL, op_false must be NULL, or vice versa.  */
    gcc_assert (!op_true == !op_false);
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md

index a48c8e8bede2496904302b7fd65492eb24bd81b7..084fc7f46939877c2827b1d2e0f33b9e0960e3b7 100644 (file)
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -989,9 +989,9 @@
     (V16HF "OI") (V8HF "TI")])
  
  (define_mode_attr sseintvecmodelower
-  [(V16SF "v16si") (V8DF "v8di")
-   (V8SF "v8si") (V4DF "v4di")
-   (V4SF "v4si") (V2DF "v2di")
+  [(V32HF "v32hi") (V16SF "v16si") (V8DF "v8di")
+   (V16HF "v16hi") (V8SF "v8si") (V4DF "v4di")
+   (V8HF "v8hi") (V4SF "v4si") (V2DF "v2di")
     (V8SI "v8si") (V4DI "v4di")
     (V4SI "v4si") (V2DI "v2di")
     (V16HI "v16hi") (V8HI "v8hi")
@@ -1568,9 +1568,9 @@
     (set_attr "mode" "<sseinsnmode>")])
  
  (define_insn "<avx512>_store<mode>_mask"
-  [(set (match_operand:VI12_AVX512VL 0 "memory_operand" "=m")
-       (vec_merge:VI12_AVX512VL
-         (match_operand:VI12_AVX512VL 1 "register_operand" "v")
+  [(set (match_operand:VI12HF_AVX512VL 0 "memory_operand" "=m")
+       (vec_merge:VI12HF_AVX512VL
+         (match_operand:VI12HF_AVX512VL 1 "register_operand" "v")
           (match_dup 0)
           (match_operand:<avx512fmaskmode> 2 "register_operand" "Yk")))]
    "TARGET_AVX512BW"
@@ -3810,8 +3810,8 @@
  (define_expand "vec_cmp<mode><avx512fmaskmodelower>"
    [(set (match_operand:<avx512fmaskmode> 0 "register_operand")
         (match_operator:<avx512fmaskmode> 1 ""
-         [(match_operand:V48_AVX512VL 2 "register_operand")
-          (match_operand:V48_AVX512VL 3 "nonimmediate_operand")]))]
+         [(match_operand:V48H_AVX512VL 2 "register_operand")
+          (match_operand:V48H_AVX512VL 3 "nonimmediate_operand")]))]
    "TARGET_AVX512F"
  {
    bool ok = ix86_expand_mask_vec_cmp (operands[0], GET_CODE (operands[1]),
@@ -4018,6 +4018,51 @@
    DONE;
  })
  
+(define_expand "vcond<mode><mode>"
+  [(set (match_operand:VF_AVX512FP16VL 0 "register_operand")
+       (if_then_else:VF_AVX512FP16VL
+         (match_operator 3 ""
+           [(match_operand:VF_AVX512FP16VL 4 "vector_operand")
+            (match_operand:VF_AVX512FP16VL 5 "vector_operand")])
+         (match_operand:VF_AVX512FP16VL 1 "general_operand")
+         (match_operand:VF_AVX512FP16VL 2 "general_operand")))]
+  "TARGET_AVX512FP16"
+{
+  bool ok = ix86_expand_fp_vcond (operands);
+  gcc_assert (ok);
+  DONE;
+})
+
+(define_expand "vcond<mode><sseintvecmodelower>"
+  [(set (match_operand:VF_AVX512FP16VL 0 "register_operand")
+       (if_then_else:VF_AVX512FP16VL
+         (match_operator 3 ""
+           [(match_operand:<sseintvecmode> 4 "vector_operand")
+            (match_operand:<sseintvecmode> 5 "vector_operand")])
+         (match_operand:VF_AVX512FP16VL 1 "general_operand")
+         (match_operand:VF_AVX512FP16VL 2 "general_operand")))]
+  "TARGET_AVX512FP16"
+{
+  bool ok = ix86_expand_int_vcond (operands);
+  gcc_assert (ok);
+  DONE;
+})
+
+(define_expand "vcond<sseintvecmodelower><mode>"
+  [(set (match_operand:<sseintvecmode> 0 "register_operand")
+       (if_then_else:<sseintvecmode>
+         (match_operator 3 ""
+           [(match_operand:VF_AVX512FP16VL 4 "vector_operand")
+            (match_operand:VF_AVX512FP16VL 5 "vector_operand")])
+         (match_operand:<sseintvecmode> 1 "general_operand")
+         (match_operand:<sseintvecmode> 2 "general_operand")))]
+  "TARGET_AVX512FP16"
+{
+  bool ok = ix86_expand_fp_vcond (operands);
+  gcc_assert (ok);
+  DONE;
+})
+
  (define_expand "vcond_mask_<mode><avx512fmaskmodelower>"
    [(set (match_operand:V48_AVX512VL 0 "register_operand")
         (vec_merge:V48_AVX512VL
@@ -4027,10 +4072,10 @@
    "TARGET_AVX512F")
  
  (define_expand "vcond_mask_<mode><avx512fmaskmodelower>"
-  [(set (match_operand:VI12_AVX512VL 0 "register_operand")
-       (vec_merge:VI12_AVX512VL
-         (match_operand:VI12_AVX512VL 1 "nonimmediate_operand")
-         (match_operand:VI12_AVX512VL 2 "nonimm_or_0_operand")
+  [(set (match_operand:VI12HF_AVX512VL 0 "register_operand")
+       (vec_merge:VI12HF_AVX512VL
+         (match_operand:VI12HF_AVX512VL 1 "nonimmediate_operand")
+         (match_operand:VI12HF_AVX512VL 2 "nonimm_or_0_operand")
           (match_operand:<avx512fmaskmode> 3 "register_operand")))]
    "TARGET_AVX512BW")
  
@@ -15538,6 +15583,21 @@
    DONE;
  })
  
+(define_expand "vcondu<mode><sseintvecmodelower>"
+  [(set (match_operand:VF_AVX512FP16VL 0 "register_operand")
+       (if_then_else:VF_AVX512FP16VL
+         (match_operator 3 ""
+           [(match_operand:<sseintvecmode> 4 "vector_operand")
+            (match_operand:<sseintvecmode> 5 "vector_operand")])
+         (match_operand:VF_AVX512FP16VL 1 "general_operand")
+         (match_operand:VF_AVX512FP16VL 2 "general_operand")))]
+  "TARGET_AVX512FP16"
+{
+  bool ok = ix86_expand_int_vcond (operands);
+  gcc_assert (ok);
+  DONE;
+})
+
  (define_expand "vcondeq<VI8F_128:mode>v2di"
    [(set (match_operand:VI8F_128 0 "register_operand")
         (if_then_else:VI8F_128
diff --git a/gcc/testsuite/g++.target/i386/avx512fp16-vcondmn-minmax.C b/gcc/testsuite/g++.target/i386/avx512fp16-vcondmn-minmax.C

new file mode 100644 (file)

index 0000000..6d50f49
--- /dev/null
+++ b/gcc/testsuite/g++.target/i386/avx512fp16-vcondmn-minmax.C
@@ -0,0 +1,25 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx512fp16 -mavx512vl" } */
+
+/* { dg-final { scan-assembler-times "vminph" 3 } } */
+/* { dg-final { scan-assembler-times "vmaxph" 3 } } */
+
+typedef _Float16 v8hf __attribute__ ((vector_size (16)));
+typedef _Float16 v16hf __attribute__ ((vector_size (32)));
+typedef _Float16 v32hf __attribute__ ((vector_size (64)));
+
+#define VCONDMINMAX(size, op, name)  \
+v##size##hf \
+__attribute__ ((noinline, noclone)) \
+vminmax_##v##size##hf##v##size##hf##name (v##size##hf a, v##size##hf b)  \
+{ \
+  return (a op b) ? a : b;  \
+}
+
+VCONDMINMAX (8, <, min)
+VCONDMINMAX (8, >, max)
+VCONDMINMAX (16, <, min)
+VCONDMINMAX (16, >, max)
+VCONDMINMAX (32, <, min)
+VCONDMINMAX (32, >, max)
+
diff --git a/gcc/testsuite/g++.target/i386/avx512fp16-vcondmn-vec.C b/gcc/testsuite/g++.target/i386/avx512fp16-vcondmn-vec.C

new file mode 100644 (file)

index 0000000..de93e2c
--- /dev/null
+++ b/gcc/testsuite/g++.target/i386/avx512fp16-vcondmn-vec.C
@@ -0,0 +1,70 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx512fp16 -mavx512vl" } */
+
+/* { dg-final { scan-assembler-times "vcmpph" 45 } } */
+/* { dg-final { scan-assembler-times "vpcmpuw" 12 } } */
+/* { dg-final { scan-assembler-times "vpcmpw" 18 } } */
+/* { dg-final { scan-assembler-times "(?:vpblendmw|vmovdqu16\[^\{\n\]+\{%k\[1-7\]\})" 75 } } */
+
+typedef _Float16 v8hf __attribute__ ((vector_size (16)));
+typedef _Float16 v16hf __attribute__ ((vector_size (32)));
+typedef _Float16 v32hf __attribute__ ((vector_size (64)));
+typedef short v8hi __attribute__ ((vector_size (16)));
+typedef short v16hi __attribute__ ((vector_size (32)));
+typedef short v32hi __attribute__ ((vector_size (64)));
+typedef unsigned short v8uhi __attribute__ ((vector_size (16)));
+typedef unsigned short v16uhi __attribute__ ((vector_size (32)));
+typedef unsigned short v32uhi __attribute__ ((vector_size (64)));
+
+#define VCONDMOV(size, op, name)  \
+v##size##hf \
+__attribute__ ((noinline, noclone)) \
+vcond_##v##size##hf##v##size##hf##name (v##size##hf a, v##size##hf b,  \
+                          v##size##hf c, v##size##hf d)  \
+{ \
+  return (a op b) ? c : d;  \
+}\
+v##size##hf \
+__attribute__ ((noinline, noclone)) \
+vcond_##v##size##hi##v##size##hf##name (v##size##hi a, v##size##hi b,  \
+                          v##size##hf c, v##size##hf d)  \
+{ \
+  return (a op b) ? c : d;  \
+}\
+v##size##hi \
+__attribute__ ((noinline, noclone)) \
+vcond_##v##size##hf##v##size##hi##name (v##size##hi a, v##size##hi b,  \
+                          v##size##hf c, v##size##hf d)  \
+{ \
+  return (c op d) ? a : b;  \
+} \
+v##size##hf \
+__attribute__ ((noinline, noclone)) \
+vcond_##v##size##uhi##v##size##hf##name (v##size##uhi a, v##size##uhi b,  \
+                          v##size##hf c, v##size##hf d)  \
+{ \
+  return (a op b) ? c : d;  \
+}\
+v##size##uhi \
+__attribute__ ((noinline, noclone)) \
+vcond_##v##size##hf##v##size##uhi##name (v##size##uhi a, v##size##uhi b,  \
+                          v##size##hf c, v##size##hf d)  \
+{ \
+  return (c op d) ? a : b;  \
+} \
+
+VCONDMOV (8, <, lt)
+VCONDMOV (8, >, gt)
+VCONDMOV (8, ==, eq)
+VCONDMOV (8, <=, le)
+VCONDMOV (8, >=, ge)
+VCONDMOV (16, <, lt)
+VCONDMOV (16, >, gt)
+VCONDMOV (16, <=, le)
+VCONDMOV (16, >=, ge)
+VCONDMOV (16, ==, eq)
+VCONDMOV (32, <, lt)
+VCONDMOV (32, >, gt)
+VCONDMOV (32, <=, le)
+VCONDMOV (32, >=, ge)
+VCONDMOV (32, ==, eq)
diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16-vcondmn-loop-1.c b/gcc/testsuite/gcc.target/i386/avx512fp16-vcondmn-loop-1.c

new file mode 100644 (file)

index 0000000..e8745ab
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512fp16-vcondmn-loop-1.c
@@ -0,0 +1,70 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx512fp16 -mavx512vl -mprefer-vector-width=512" } */
+
+/* { dg-final { scan-assembler-times "vcmpph" 27 } } */
+/* { dg-final { scan-assembler-times "(?:vpcmpw|vpcmpeqw)" 12 } } */
+/* { dg-final { scan-assembler-times "vpcmpuw" 6 } } */
+
+typedef unsigned short u16;
+typedef short s16;
+
+#define CONDMOV_LOOP(size, type, ptype, op, name) \
+void \
+__attribute__ ((noinline, noclone, optimize("tree-vectorize"))) \
+loop_cond_##size##ptype##type##name ( \
+  ptype * restrict a, ptype * restrict b,      \
+  type * restrict c, type * restrict d) \
+{ \
+  int i;  \
+  for (i = 0; i < size; i++)  \
+    { \
+      if (a[i] op b[i])        \
+       d[i] = c[i];  \
+    } \
+}
+
+CONDMOV_LOOP (32, _Float16, _Float16, <, lt)
+CONDMOV_LOOP (32, _Float16, _Float16, >, gt)
+CONDMOV_LOOP (32, _Float16, _Float16, ==, eq)
+CONDMOV_LOOP (16, _Float16, _Float16, <, lt)
+CONDMOV_LOOP (16, _Float16, _Float16, >, gt)
+CONDMOV_LOOP (16, _Float16, _Float16, ==, eq)
+CONDMOV_LOOP (8, _Float16, _Float16, <, lt)
+CONDMOV_LOOP (8, _Float16, _Float16, >, gt)
+CONDMOV_LOOP (8, _Float16, _Float16, ==, eq)
+CONDMOV_LOOP (32, _Float16, s16, <, lt)
+CONDMOV_LOOP (32, _Float16, s16, >, gt)
+CONDMOV_LOOP (32, _Float16, s16, ==, eq)
+CONDMOV_LOOP (16, _Float16, s16, <, lt)
+CONDMOV_LOOP (16, _Float16, s16, >, gt)
+CONDMOV_LOOP (16, _Float16, s16, ==, eq)
+CONDMOV_LOOP (8, _Float16, s16, <, lt)
+CONDMOV_LOOP (8, _Float16, s16, >, gt)
+CONDMOV_LOOP (8, _Float16, s16, ==, eq)
+CONDMOV_LOOP (32, s16, _Float16, <, lt)
+CONDMOV_LOOP (32, s16, _Float16, >, gt)
+CONDMOV_LOOP (32, s16, _Float16, ==, eq)
+CONDMOV_LOOP (16, s16, _Float16, <, lt)
+CONDMOV_LOOP (16, s16, _Float16, >, gt)
+CONDMOV_LOOP (16, s16, _Float16, ==, eq)
+CONDMOV_LOOP (8, s16, _Float16, <, lt)
+CONDMOV_LOOP (8, s16, _Float16, >, gt)
+CONDMOV_LOOP (8, s16, _Float16, ==, eq)
+CONDMOV_LOOP (32, _Float16, u16, <, lt)
+CONDMOV_LOOP (32, _Float16, u16, >, gt)
+CONDMOV_LOOP (32, _Float16, u16, ==, eq)
+CONDMOV_LOOP (16, _Float16, u16, <, lt)
+CONDMOV_LOOP (16, _Float16, u16, >, gt)
+CONDMOV_LOOP (16, _Float16, u16, ==, eq)
+CONDMOV_LOOP (8, _Float16, u16, <, lt)
+CONDMOV_LOOP (8, _Float16, u16, >, gt)
+CONDMOV_LOOP (8, _Float16, u16, ==, eq)
+CONDMOV_LOOP (32, u16, _Float16, <, lt)
+CONDMOV_LOOP (32, u16, _Float16, >, gt)
+CONDMOV_LOOP (32, u16, _Float16, ==, eq)
+CONDMOV_LOOP (16, u16, _Float16, <, lt)
+CONDMOV_LOOP (16, u16, _Float16, >, gt)
+CONDMOV_LOOP (16, u16, _Float16, ==, eq)
+CONDMOV_LOOP (8, u16, _Float16, <, lt)
+CONDMOV_LOOP (8, u16, _Float16, >, gt)
+CONDMOV_LOOP (8, u16, _Float16, ==, eq)
diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16-vcondmn-loop-2.c b/gcc/testsuite/gcc.target/i386/avx512fp16-vcondmn-loop-2.c

new file mode 100644 (file)

index 0000000..a0d5f98
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512fp16-vcondmn-loop-2.c
@@ -0,0 +1,143 @@
+/* { dg-do run { target avx512fp16 } } */
+/* { dg-options "-O2 -mavx512fp16 -mavx512vl -mprefer-vector-width=512" } */
+
+static void condmov_test (void);
+#define DO_TEST condmov_test
+#define AVX512FP16
+#define AVX512VL
+#include "avx512f-check.h"
+#include "avx512fp16-vcondmn-loop-1.c"
+
+_Float16 a[32], b[32], c[32], fexp[32], fref[32];
+s16 sa[32], sb[32], sc[32], sexp[32], sref[32];
+u16 ua[32], ub[32], uc[32], uexp[32], uref[32];
+
+#define EMULATE_CONDMOV_LOOP(size, type, ptype, op, name) \
+void \
+__attribute__ ((noinline, noclone)) \
+scalar_cond_##size##ptype##type##name ( \
+  ptype * restrict a, ptype * restrict b,      \
+  type * restrict c, type * restrict d)  \
+{ \
+  int i;  \
+  for (i = 0; i < size; i++)  \
+    { \
+      if (a[i] op b[i])        \
+       d[i] = c[i];  \
+    } \
+}
+
+EMULATE_CONDMOV_LOOP (32, _Float16, _Float16, <, lt)
+EMULATE_CONDMOV_LOOP (32, _Float16, _Float16, >, gt)
+EMULATE_CONDMOV_LOOP (32, _Float16, _Float16, ==, eq)
+EMULATE_CONDMOV_LOOP (16, _Float16, _Float16, <, lt)
+EMULATE_CONDMOV_LOOP (16, _Float16, _Float16, >, gt)
+EMULATE_CONDMOV_LOOP (16, _Float16, _Float16, ==, eq)
+EMULATE_CONDMOV_LOOP (8, _Float16, _Float16, <, lt)
+EMULATE_CONDMOV_LOOP (8, _Float16, _Float16, >, gt)
+EMULATE_CONDMOV_LOOP (8, _Float16, _Float16, ==, eq)
+EMULATE_CONDMOV_LOOP (32, _Float16, s16, <, lt)
+EMULATE_CONDMOV_LOOP (32, _Float16, s16, >, gt)
+EMULATE_CONDMOV_LOOP (32, _Float16, s16, ==, eq)
+EMULATE_CONDMOV_LOOP (16, _Float16, s16, <, lt)
+EMULATE_CONDMOV_LOOP (16, _Float16, s16, >, gt)
+EMULATE_CONDMOV_LOOP (16, _Float16, s16, ==, eq)
+EMULATE_CONDMOV_LOOP (8, _Float16, s16, <, lt)
+EMULATE_CONDMOV_LOOP (8, _Float16, s16, >, gt)
+EMULATE_CONDMOV_LOOP (8, _Float16, s16, ==, eq)
+EMULATE_CONDMOV_LOOP (32, s16, _Float16, <, lt)
+EMULATE_CONDMOV_LOOP (32, s16, _Float16, >, gt)
+EMULATE_CONDMOV_LOOP (32, s16, _Float16, ==, eq)
+EMULATE_CONDMOV_LOOP (16, s16, _Float16, <, lt)
+EMULATE_CONDMOV_LOOP (16, s16, _Float16, >, gt)
+EMULATE_CONDMOV_LOOP (16, s16, _Float16, ==, eq)
+EMULATE_CONDMOV_LOOP (8, s16, _Float16, <, lt)
+EMULATE_CONDMOV_LOOP (8, s16, _Float16, >, gt)
+EMULATE_CONDMOV_LOOP (8, s16, _Float16, ==, eq)
+EMULATE_CONDMOV_LOOP (32, _Float16, u16, <, lt)
+EMULATE_CONDMOV_LOOP (32, _Float16, u16, >, gt)
+EMULATE_CONDMOV_LOOP (32, _Float16, u16, ==, eq)
+EMULATE_CONDMOV_LOOP (16, _Float16, u16, <, lt)
+EMULATE_CONDMOV_LOOP (16, _Float16, u16, >, gt)
+EMULATE_CONDMOV_LOOP (16, _Float16, u16, ==, eq)
+EMULATE_CONDMOV_LOOP (8, _Float16, u16, <, lt)
+EMULATE_CONDMOV_LOOP (8, _Float16, u16, >, gt)
+EMULATE_CONDMOV_LOOP (8, _Float16, u16, ==, eq)
+EMULATE_CONDMOV_LOOP (32, u16, _Float16, <, lt)
+EMULATE_CONDMOV_LOOP (32, u16, _Float16, >, gt)
+EMULATE_CONDMOV_LOOP (32, u16, _Float16, ==, eq)
+EMULATE_CONDMOV_LOOP (16, u16, _Float16, <, lt)
+EMULATE_CONDMOV_LOOP (16, u16, _Float16, >, gt)
+EMULATE_CONDMOV_LOOP (16, u16, _Float16, ==, eq)
+EMULATE_CONDMOV_LOOP (8, u16, _Float16, <, lt)
+EMULATE_CONDMOV_LOOP (8, u16, _Float16, >, gt)
+EMULATE_CONDMOV_LOOP (8, u16, _Float16, ==, eq)
+
+void init()
+{
+  int i;
+  for (i = 0; i < 32; i++)
+    {
+      ua[i] = sa[i] = a[i] = i; 
+      ub[i] = sb[i] = b[i] = i;
+      uc[i] = sc[i] = c[i] = (32 - i) * 2;
+      uexp[i] = sexp[i] = fexp[i] = -1;
+      uref[i] = sref[i] = fref[i] = -1;
+    }
+}
+
+int check_cond(void *a, void *b, int size)
+{
+  int i;
+  u16 *pa = (u16 *)a, *pb = (u16 *)b;
+  for (i = 0; i < size; i++)
+    if (pa[i] != pb[i])
+      return 0;
+  return 1;
+}
+
+#define TEST_CONDMOV_LOOP(size, name)  \
+{ \
+  init ();  \
+  scalar_cond_##size##_Float16_Float16##name (a, b, c, fexp);  \
+  loop_cond_##size##_Float16_Float16##name (a, b, c, fref);  \
+  if (!check_cond ((void *)fexp, (void *)fref, size)) \
+    abort();  \
+  \
+  init ();  \
+  scalar_cond_##size##_Float16s16##name (a, b, sc, sexp);  \
+  loop_cond_##size##_Float16s16##name (a, b, sc, sref);  \
+  if (!check_cond ((void *)sexp, (void *)sref, size)) \
+    abort();  \
+  \
+  init ();  \
+  scalar_cond_##size##s16_Float16##name (sa, sb, c, fexp);  \
+  loop_cond_##size##s16_Float16##name (sa, sb, c, fref);  \
+  if (!check_cond ((void *)fexp, (void *)fref, size)) \
+    abort();  \
+  \
+  init ();  \
+  scalar_cond_##size##_Float16u16##name (a, b, uc, uexp);  \
+  loop_cond_##size##_Float16u16##name (a, b, uc, uref);  \
+  if (!check_cond ((void *)uexp, (void *)uref, size)) \
+    abort();  \
+  \
+  init ();  \
+  scalar_cond_##size##u16_Float16##name (ua, ub, c, fexp);  \
+  loop_cond_##size##u16_Float16##name (ua, ub, c, fref);  \
+  if (!check_cond ((void *)fexp, (void *)fref, size)) \
+    abort();  \
+}
+
+static void condmov_test()
+{
+  TEST_CONDMOV_LOOP (32, lt)
+  TEST_CONDMOV_LOOP (32, gt)
+  TEST_CONDMOV_LOOP (32, eq)
+  TEST_CONDMOV_LOOP (16, lt)
+  TEST_CONDMOV_LOOP (16, gt)
+  TEST_CONDMOV_LOOP (16, eq)
+  TEST_CONDMOV_LOOP (8, lt)
+  TEST_CONDMOV_LOOP (8, gt)
+  TEST_CONDMOV_LOOP (8, eq)
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16-vec_cmpmn.c b/gcc/testsuite/gcc.target/i386/avx512fp16-vec_cmpmn.c

new file mode 100644 (file)

index 0000000..ef9f853
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512fp16-vec_cmpmn.c
@@ -0,0 +1,32 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx512fp16 -mavx512vl" } */
+
+/* { dg-final { scan-assembler-times "vcmpph" 15 } } */
+
+typedef _Float16 v8hf __attribute__ ((vector_size (16)));
+typedef _Float16 v16hf __attribute__ ((vector_size (32)));
+typedef _Float16 v32hf __attribute__ ((vector_size (64)));
+
+#define VCMPMN(type, op, name) \
+type  \
+__attribute__ ((noinline, noclone)) \
+vec_cmp_##type##type##name (type a, type b) \
+{ \
+  return a op b;  \
+}
+
+VCMPMN (v8hf, <, lt)
+VCMPMN (v16hf, <, lt)
+VCMPMN (v32hf, <, lt)
+VCMPMN (v8hf, <=, le)
+VCMPMN (v16hf, <=, le)
+VCMPMN (v32hf, <=, le)
+VCMPMN (v8hf, >, gt)
+VCMPMN (v16hf, >, gt)
+VCMPMN (v32hf, >, gt)
+VCMPMN (v8hf, >=, ge)
+VCMPMN (v16hf, >=, ge)
+VCMPMN (v32hf, >=, ge)
+VCMPMN (v8hf, ==, eq)
+VCMPMN (v16hf, ==, eq)
+VCMPMN (v32hf, ==, eq)
author	Hongyu Wang <hongyu.wang@intel.com>
	Fri, 16 Jul 2021 02:01:17 +0000 (10:01 +0800)
committer	liuhongt <hongtao.liu@intel.com>
	Thu, 23 Sep 2021 09:05:02 +0000 (17:05 +0800)
gcc/config/i386/i386-expand.c		patch \| blob \| blame \| history
gcc/config/i386/sse.md		patch \| blob \| blame \| history
gcc/testsuite/g++.target/i386/avx512fp16-vcondmn-minmax.C	[new file with mode: 0644]	patch \| blob
gcc/testsuite/g++.target/i386/avx512fp16-vcondmn-vec.C	[new file with mode: 0644]	patch \| blob
gcc/testsuite/gcc.target/i386/avx512fp16-vcondmn-loop-1.c	[new file with mode: 0644]	patch \| blob
gcc/testsuite/gcc.target/i386/avx512fp16-vcondmn-loop-2.c	[new file with mode: 0644]	patch \| blob
gcc/testsuite/gcc.target/i386/avx512fp16-vec_cmpmn.c	[new file with mode: 0644]	patch \| blob