]> git.ipfire.org Git - thirdparty/gcc.git/commitdiff
AVX512FP16: Add vcvttsh2si/vcvttsh2usi.
authorliuhongt <hongtao.liu@intel.com>
Fri, 15 Mar 2019 06:17:54 +0000 (14:17 +0800)
committerliuhongt <hongtao.liu@intel.com>
Fri, 17 Sep 2021 08:04:29 +0000 (16:04 +0800)
gcc/ChangeLog:

* config/i386/avx512fp16intrin.h (_mm_cvttsh_i32):
New intrinsic.
(_mm_cvttsh_u32): Likewise.
(_mm_cvtt_roundsh_i32): Likewise.
(_mm_cvtt_roundsh_u32): Likewise.
(_mm_cvttsh_i64): Likewise.
(_mm_cvttsh_u64): Likewise.
(_mm_cvtt_roundsh_i64): Likewise.
(_mm_cvtt_roundsh_u64): Likewise.
* config/i386/i386-builtin.def: Add corresponding new builtins.
* config/i386/sse.md
(avx512fp16_fix<fixunssuffix>_trunc<mode>2<round_saeonly_name>):
New.

gcc/testsuite/ChangeLog:

* gcc.target/i386/avx512fp16-vcvttsh2si-1a.c: New test.
* gcc.target/i386/avx512fp16-vcvttsh2si-1b.c: Ditto.
* gcc.target/i386/avx512fp16-vcvttsh2si64-1a.c: Ditto.
* gcc.target/i386/avx512fp16-vcvttsh2si64-1b.c: Ditto.
* gcc.target/i386/avx512fp16-vcvttsh2usi-1a.c: Ditto.
* gcc.target/i386/avx512fp16-vcvttsh2usi-1b.c: Ditto.
* gcc.target/i386/avx512fp16-vcvttsh2usi64-1a.c: Ditto.
* gcc.target/i386/avx512fp16-vcvttsh2usi64-1b.c: Ditto.
* gcc.target/i386/avx-1.c: Add test for new builtins.
* gcc.target/i386/sse-13.c: Ditto.
* gcc.target/i386/sse-23.c: Ditto.
* gcc.target/i386/sse-14.c: Add test for new intrinsics.
* gcc.target/i386/sse-22.c: Ditto.

16 files changed:
gcc/config/i386/avx512fp16intrin.h
gcc/config/i386/i386-builtin.def
gcc/config/i386/sse.md
gcc/testsuite/gcc.target/i386/avx-1.c
gcc/testsuite/gcc.target/i386/avx512fp16-vcvttsh2si-1a.c [new file with mode: 0644]
gcc/testsuite/gcc.target/i386/avx512fp16-vcvttsh2si-1b.c [new file with mode: 0644]
gcc/testsuite/gcc.target/i386/avx512fp16-vcvttsh2si64-1a.c [new file with mode: 0644]
gcc/testsuite/gcc.target/i386/avx512fp16-vcvttsh2si64-1b.c [new file with mode: 0644]
gcc/testsuite/gcc.target/i386/avx512fp16-vcvttsh2usi-1a.c [new file with mode: 0644]
gcc/testsuite/gcc.target/i386/avx512fp16-vcvttsh2usi-1b.c [new file with mode: 0644]
gcc/testsuite/gcc.target/i386/avx512fp16-vcvttsh2usi64-1a.c [new file with mode: 0644]
gcc/testsuite/gcc.target/i386/avx512fp16-vcvttsh2usi64-1b.c [new file with mode: 0644]
gcc/testsuite/gcc.target/i386/sse-13.c
gcc/testsuite/gcc.target/i386/sse-14.c
gcc/testsuite/gcc.target/i386/sse-22.c
gcc/testsuite/gcc.target/i386/sse-23.c

index 762fc8d721665cb98c3d75820badcc47be3d99ea..d5fe49b9b7c2368abda9be6675504dd605765941 100644 (file)
@@ -4154,6 +4154,87 @@ _mm_cvt_roundsh_u64 (__m128h __A, const int __R)
 #endif /* __OPTIMIZE__ */
 #endif /* __x86_64__ */
 
+/* Intrinsics vcvttsh2si, vcvttsh2us.  */
+extern __inline int
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvttsh_i32 (__m128h __A)
+{
+  return (int)
+    __builtin_ia32_vcvttsh2si32_round (__A, _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline unsigned
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvttsh_u32 (__m128h __A)
+{
+  return (int)
+    __builtin_ia32_vcvttsh2usi32_round (__A, _MM_FROUND_CUR_DIRECTION);
+}
+
+#ifdef __OPTIMIZE__
+extern __inline int
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtt_roundsh_i32 (__m128h __A, const int __R)
+{
+  return (int) __builtin_ia32_vcvttsh2si32_round (__A, __R);
+}
+
+extern __inline unsigned
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtt_roundsh_u32 (__m128h __A, const int __R)
+{
+  return (int) __builtin_ia32_vcvttsh2usi32_round (__A, __R);
+}
+
+#else
+#define _mm_cvtt_roundsh_i32(A, B)             \
+  ((int)__builtin_ia32_vcvttsh2si32_round ((A), (B)))
+#define _mm_cvtt_roundsh_u32(A, B)             \
+  ((int)__builtin_ia32_vcvttsh2usi32_round ((A), (B)))
+
+#endif /* __OPTIMIZE__ */
+
+#ifdef __x86_64__
+extern __inline long long
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvttsh_i64 (__m128h __A)
+{
+  return (long long)
+    __builtin_ia32_vcvttsh2si64_round (__A, _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline unsigned long long
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvttsh_u64 (__m128h __A)
+{
+  return (long long)
+    __builtin_ia32_vcvttsh2usi64_round (__A, _MM_FROUND_CUR_DIRECTION);
+}
+
+#ifdef __OPTIMIZE__
+extern __inline long long
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtt_roundsh_i64 (__m128h __A, const int __R)
+{
+  return (long long) __builtin_ia32_vcvttsh2si64_round (__A, __R);
+}
+
+extern __inline unsigned long long
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtt_roundsh_u64 (__m128h __A, const int __R)
+{
+  return (long long) __builtin_ia32_vcvttsh2usi64_round (__A, __R);
+}
+
+#else
+#define _mm_cvtt_roundsh_i64(A, B)                     \
+  ((long long)__builtin_ia32_vcvttsh2si64_round ((A), (B)))
+#define _mm_cvtt_roundsh_u64(A, B)                     \
+  ((long long)__builtin_ia32_vcvttsh2usi64_round ((A), (B)))
+
+#endif /* __OPTIMIZE__ */
+#endif /* __x86_64__ */
+
 /* Intrinsics vcvtsi2sh, vcvtusi2sh.  */
 extern __inline __m128h
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
index 176a3783ad90a28ef6286202564c6a1e970db68f..fb36707bf55bac16a2644da697b2472c136516b1 100644 (file)
@@ -3116,6 +3116,10 @@ BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtsh2si_round, "__b
 BDESC (OPTION_MASK_ISA_64BIT, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtsh2siq_round, "__builtin_ia32_vcvtsh2si64_round", IX86_BUILTIN_VCVTSH2SI64_ROUND, UNKNOWN, (int) INT64_FTYPE_V8HF_INT)
 BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtsh2usi_round, "__builtin_ia32_vcvtsh2usi32_round", IX86_BUILTIN_VCVTSH2USI32_ROUND, UNKNOWN, (int) UINT_FTYPE_V8HF_INT)
 BDESC (OPTION_MASK_ISA_64BIT, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtsh2usiq_round, "__builtin_ia32_vcvtsh2usi64_round", IX86_BUILTIN_VCVTSH2USI64_ROUND, UNKNOWN, (int) UINT64_FTYPE_V8HF_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_fix_truncsi2_round, "__builtin_ia32_vcvttsh2si32_round", IX86_BUILTIN_VCVTTSH2SI32_ROUND, UNKNOWN, (int) INT_FTYPE_V8HF_INT)
+BDESC (OPTION_MASK_ISA_64BIT, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_fix_truncdi2_round, "__builtin_ia32_vcvttsh2si64_round", IX86_BUILTIN_VCVTTSH2SI64_ROUND, UNKNOWN, (int) INT64_FTYPE_V8HF_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_fixuns_truncsi2_round, "__builtin_ia32_vcvttsh2usi32_round", IX86_BUILTIN_VCVTTSH2USI32_ROUND, UNKNOWN, (int) UINT_FTYPE_V8HF_INT)
+BDESC (OPTION_MASK_ISA_64BIT, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_fixuns_truncdi2_round, "__builtin_ia32_vcvttsh2usi64_round", IX86_BUILTIN_VCVTTSH2USI64_ROUND, UNKNOWN, (int) UINT64_FTYPE_V8HF_INT)
 BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtsi2sh_round, "__builtin_ia32_vcvtsi2sh32_round", IX86_BUILTIN_VCVTSI2SH32_ROUND, UNKNOWN, (int) V8HF_FTYPE_V8HF_INT_INT)
 BDESC (OPTION_MASK_ISA_64BIT, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtsi2shq_round, "__builtin_ia32_vcvtsi2sh64_round", IX86_BUILTIN_VCVTSI2SH64_ROUND, UNKNOWN, (int) V8HF_FTYPE_V8HF_INT64_INT)
 BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtusi2sh_round, "__builtin_ia32_vcvtusi2sh32_round", IX86_BUILTIN_VCVTUSI2SH32_ROUND, UNKNOWN, (int) V8HF_FTYPE_V8HF_UINT_INT)
index 60a52b23611465ca5a95ab515634489d14c091a1..fc606628b941e7ca23d59028a1fa30e1a79dfb26 100644 (file)
    (set_attr "prefix" "evex")
    (set_attr "mode" "TI")])
 
+(define_insn "avx512fp16_fix<fixunssuffix>_trunc<mode>2<round_saeonly_name>"
+  [(set (match_operand:SWI48 0 "register_operand" "=r")
+       (any_fix:SWI48
+         (vec_select:HF
+           (match_operand:V8HF 1 "register_operand" "v")
+           (parallel [(const_int 0)]))))]
+  "TARGET_AVX512FP16"
+  "%vcvttsh2<fixsuffix>si\t{<round_saeonly_op2>%1, %0|%0, %k1<round_saeonly_op2>}"
+  [(set_attr "type" "sseicvt")
+   (set_attr "prefix" "evex")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "avx512fp16_fix<fixunssuffix>_trunc<mode>2_mem"
+  [(set (match_operand:SWI48 0 "register_operand" "=r")
+       (any_fix:SWI48
+         (match_operand:HF 1 "memory_operand" "vm")))]
+  "TARGET_AVX512FP16"
+  "%vcvttsh2<fixsuffix>si\t{%1, %0|%0, %1}"
+  [(set_attr "type" "sseicvt")
+   (set_attr "prefix" "evex")
+   (set_attr "mode" "<MODE>")])
+
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 ;;
 ;; Parallel single-precision floating point conversion operations
index bd6b9df5842cbbb6f77cc615df6988d999d546e4..65796841d9874072f682e5d1890be72c68463193 100644 (file)
 #define __builtin_ia32_vcvtsh2si64_round(A, B) __builtin_ia32_vcvtsh2si64_round(A, 8)
 #define __builtin_ia32_vcvtsh2usi32_round(A, B) __builtin_ia32_vcvtsh2usi32_round(A, 8)
 #define __builtin_ia32_vcvtsh2usi64_round(A, B) __builtin_ia32_vcvtsh2usi64_round(A, 8)
+#define __builtin_ia32_vcvttsh2si32_round(A, B) __builtin_ia32_vcvttsh2si32_round(A, 8)
+#define __builtin_ia32_vcvttsh2si64_round(A, B) __builtin_ia32_vcvttsh2si64_round(A, 8)
+#define __builtin_ia32_vcvttsh2usi32_round(A, B) __builtin_ia32_vcvttsh2usi32_round(A, 8)
+#define __builtin_ia32_vcvttsh2usi64_round(A, B) __builtin_ia32_vcvttsh2usi64_round(A, 8)
 #define __builtin_ia32_vcvtsi2sh32_round(A, B, C) __builtin_ia32_vcvtsi2sh32_round(A, B, 8)
 #define __builtin_ia32_vcvtsi2sh64_round(A, B, C) __builtin_ia32_vcvtsi2sh64_round(A, B, 8)
 #define __builtin_ia32_vcvtusi2sh32_round(A, B, C) __builtin_ia32_vcvtusi2sh32_round(A, B, 8)
diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16-vcvttsh2si-1a.c b/gcc/testsuite/gcc.target/i386/avx512fp16-vcvttsh2si-1a.c
new file mode 100644 (file)
index 0000000..80d84fc
--- /dev/null
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512fp16 -O2" } */
+/* { dg-final { scan-assembler-times "vcvttsh2si\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%eax" 1 } } */
+/* { dg-final { scan-assembler-times "vcvttsh2si\[ \\t\]+\{sae\}\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%eax" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m128h x1;
+volatile int res1;
+
+void extern
+avx512f_test (void)
+{
+  res1 = _mm_cvttsh_i32 (x1);
+  res1 = _mm_cvtt_roundsh_i32 (x1, 8);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16-vcvttsh2si-1b.c b/gcc/testsuite/gcc.target/i386/avx512fp16-vcvttsh2si-1b.c
new file mode 100644 (file)
index 0000000..c5b0a64
--- /dev/null
@@ -0,0 +1,54 @@
+/* { dg-do run { target avx512fp16 } } */
+/* { dg-options "-O2 -mavx512fp16 -mavx512dq" } */
+
+
+#define AVX512FP16
+#include "avx512fp16-helper.h"
+
+#define N_ELEMS 2
+
+void NOINLINE
+emulate_cvtph2_d(V512 * dest, V512 op1,
+                __mmask32 k, int zero_mask)
+{
+  V512 v1, v2, v3, v4, v5, v6, v7, v8;
+  int i;
+  __mmask16 m1, m2;
+
+  m1 = k & 0xffff;
+
+  unpack_ph_2twops(op1, &v1, &v2);
+
+  for (i = 0; i < 16; i++) {
+    if (((1 << i) & m1) == 0) {
+      if (zero_mask) {
+       v5.u32[i] = 0;
+      }
+      else {
+       v5.u32[i] = dest->u32[i];
+      }
+    }
+    else {
+      v5.u32[i] = v1.f32[i];
+
+    }
+  }
+  *dest = v5;
+}
+
+void
+test_512 (void)
+{
+  V512 res;
+  V512 exp;
+  
+  init_src();
+  emulate_cvtph2_d(&exp, src1,  NET_MASK, 0);
+  res.i32[0] = _mm_cvtt_roundsh_i32(src1.xmmh[0], _ROUND_NINT);
+  check_results(&res, &exp, N_ELEMS, "_mm_cvtt_roundsh_i32");
+
+  if (n_errs != 0) {
+      abort ();
+  }
+}
+
diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16-vcvttsh2si64-1a.c b/gcc/testsuite/gcc.target/i386/avx512fp16-vcvttsh2si64-1a.c
new file mode 100644 (file)
index 0000000..76a9053
--- /dev/null
@@ -0,0 +1,16 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-mavx512fp16 -O2" } */
+/* { dg-final { scan-assembler-times "vcvttsh2si\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%rax" 1 } } */
+/* { dg-final { scan-assembler-times "vcvttsh2si\[ \\t\]+\{sae\}\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%rax" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m128h x1;
+volatile long long res2;
+
+void extern
+avx512f_test (void)
+{
+  res2 = _mm_cvttsh_i64 (x1);
+  res2 = _mm_cvtt_roundsh_i64 (x1, 8);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16-vcvttsh2si64-1b.c b/gcc/testsuite/gcc.target/i386/avx512fp16-vcvttsh2si64-1b.c
new file mode 100644 (file)
index 0000000..4e0fe5b
--- /dev/null
@@ -0,0 +1,52 @@
+/* { dg-do run { target { { ! ia32 } && avx512fp16 } } } */
+/* { dg-options "-O2 -mavx512fp16 -mavx512dq" } */
+
+#define AVX512FP16
+#include "avx512fp16-helper.h"
+
+#define N_ELEMS 4
+
+void NOINLINE
+emulate_cvtph2_q(V512 * dest, V512 op1,
+                __mmask32 k, int zero_mask)
+{
+  V512 v1, v2, v3, v4, v5, v6, v7, v8;
+  int i;
+  __mmask16 m1, m2;
+
+  m1 = k & 0xffff;
+
+  unpack_ph_2twops(op1, &v1, &v2);
+
+  for (i = 0; i < 8; i++) {
+    if (((1 << i) & m1) == 0) {
+      if (zero_mask) {
+       v5.u64[i] = 0;
+      }
+      else {
+       v5.u64[i] = dest->u64[i];
+      }
+    }
+    else {
+      v5.u64[i] = v1.f32[i];
+    }
+  }
+  *dest = v5;
+}
+
+void
+test_512 (void)
+{
+  V512 res;
+  V512 exp;
+  
+  init_src();
+  emulate_cvtph2_q(&exp, src1,  NET_MASK, 0);
+  res.s64[0] = _mm_cvtt_roundsh_i64(src1.xmmh[0], _ROUND_NINT);
+  check_results(&res, &exp, N_ELEMS, "_mm_cvtt_roundsh_i64");
+
+  if (n_errs != 0) {
+      abort ();
+  }
+}
+
diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16-vcvttsh2usi-1a.c b/gcc/testsuite/gcc.target/i386/avx512fp16-vcvttsh2usi-1a.c
new file mode 100644 (file)
index 0000000..5956457
--- /dev/null
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512fp16 -O2" } */
+/* { dg-final { scan-assembler-times "vcvttsh2usi\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%eax" 1 } } */
+/* { dg-final { scan-assembler-times "vcvttsh2usi\[ \\t\]+\{sae\}\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%eax" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m128h x1;
+volatile unsigned int res1;
+
+void extern
+avx512f_test (void)
+{
+  res1 = _mm_cvttsh_u32 (x1);
+  res1 = _mm_cvtt_roundsh_u32 (x1, 8);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16-vcvttsh2usi-1b.c b/gcc/testsuite/gcc.target/i386/avx512fp16-vcvttsh2usi-1b.c
new file mode 100644 (file)
index 0000000..214e3e1
--- /dev/null
@@ -0,0 +1,54 @@
+/* { dg-do run { target avx512fp16 } } */
+/* { dg-options "-O2 -mavx512fp16 -mavx512dq" } */
+
+
+#define AVX512FP16
+#include "avx512fp16-helper.h"
+
+#define N_ELEMS 2
+
+void NOINLINE
+emulate_cvtph2_d(V512 * dest, V512 op1,
+                __mmask32 k, int zero_mask)
+{
+  V512 v1, v2, v3, v4, v5, v6, v7, v8;
+  int i;
+  __mmask16 m1, m2;
+
+  m1 = k & 0xffff;
+
+  unpack_ph_2twops(op1, &v1, &v2);
+
+  for (i = 0; i < 16; i++) {
+    if (((1 << i) & m1) == 0) {
+      if (zero_mask) {
+       v5.u32[i] = 0;
+      }
+      else {
+       v5.u32[i] = dest->u32[i];
+      }
+    }
+    else {
+      v5.u32[i] = v1.f32[i];
+
+    }
+  }
+  *dest = v5;
+}
+
+void
+test_512 (void)
+{
+  V512 res;
+  V512 exp;
+  
+  init_src();
+  emulate_cvtph2_d(&exp, src1,  NET_MASK, 0);
+  res.u32[0] = _mm_cvtt_roundsh_i32(src1.xmmh[0], _ROUND_NINT);
+  check_results(&res, &exp, N_ELEMS, "_mm_cvtt_roundsh_u32");
+
+  if (n_errs != 0) {
+      abort ();
+  }
+}
+
diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16-vcvttsh2usi64-1a.c b/gcc/testsuite/gcc.target/i386/avx512fp16-vcvttsh2usi64-1a.c
new file mode 100644 (file)
index 0000000..23e8e70
--- /dev/null
@@ -0,0 +1,16 @@
+/* { dg-do compile  { target { ! ia32 } } } */
+/* { dg-options "-mavx512fp16 -O2 " } */
+/* { dg-final { scan-assembler-times "vcvttsh2usi\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%rax" 1 } } */
+/* { dg-final { scan-assembler-times "vcvttsh2usi\[ \\t\]+\{sae\}\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%rax" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m128h x1;
+volatile unsigned long long res2;
+
+void extern
+avx512f_test (void)
+{
+  res2 = _mm_cvttsh_u64 (x1);
+  res2 = _mm_cvtt_roundsh_u64 (x1, 8);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16-vcvttsh2usi64-1b.c b/gcc/testsuite/gcc.target/i386/avx512fp16-vcvttsh2usi64-1b.c
new file mode 100644 (file)
index 0000000..863fb6e
--- /dev/null
@@ -0,0 +1,53 @@
+/* { dg-do run  { target { { ! ia32 } && avx512fp16 } } } */
+/* { dg-options "-O2 -mavx512fp16 -mavx512dq" } */
+
+
+#define AVX512FP16
+#include "avx512fp16-helper.h"
+
+#define N_ELEMS 4
+
+void NOINLINE
+emulate_cvtph2_q(V512 * dest, V512 op1,
+                __mmask32 k, int zero_mask)
+{
+  V512 v1, v2, v3, v4, v5, v6, v7, v8;
+  int i;
+  __mmask16 m1, m2;
+
+  m1 = k & 0xffff;
+
+  unpack_ph_2twops(op1, &v1, &v2);
+
+  for (i = 0; i < 8; i++) {
+    if (((1 << i) & m1) == 0) {
+      if (zero_mask) {
+       v5.u64[i] = 0;
+      }
+      else {
+       v5.u64[i] = dest->u64[i];
+      }
+    }
+    else {
+      v5.u64[i] = v1.f32[i];
+    }
+  }
+  *dest = v5;
+}
+
+void
+test_512 (void)
+{
+  V512 res;
+  V512 exp;
+  
+  init_src();
+  emulate_cvtph2_q(&exp, src1,  NET_MASK, 0);
+  res.u64[0] = _mm_cvtt_roundsh_i64(src1.xmmh[0], _ROUND_NINT);
+  check_results(&res, &exp, 4, "_mm_cvtt_roundsh_u64");
+
+  if (n_errs != 0) {
+      abort ();
+  }
+}
+
index c58e8acd57f7d49e86179fd1f91788ee889a8a18..d3bb6e1daa140f99376ad4291fda71241460ae2c 100644 (file)
 #define __builtin_ia32_vcvtsh2si64_round(A, B) __builtin_ia32_vcvtsh2si64_round(A, 8)
 #define __builtin_ia32_vcvtsh2usi32_round(A, B) __builtin_ia32_vcvtsh2usi32_round(A, 8)
 #define __builtin_ia32_vcvtsh2usi64_round(A, B) __builtin_ia32_vcvtsh2usi64_round(A, 8)
+#define __builtin_ia32_vcvttsh2si32_round(A, B) __builtin_ia32_vcvttsh2si32_round(A, 8)
+#define __builtin_ia32_vcvttsh2si64_round(A, B) __builtin_ia32_vcvttsh2si64_round(A, 8)
+#define __builtin_ia32_vcvttsh2usi32_round(A, B) __builtin_ia32_vcvttsh2usi32_round(A, 8)
+#define __builtin_ia32_vcvttsh2usi64_round(A, B) __builtin_ia32_vcvttsh2usi64_round(A, 8)
 #define __builtin_ia32_vcvtsi2sh32_round(A, B, C) __builtin_ia32_vcvtsi2sh32_round(A, B, 8)
 #define __builtin_ia32_vcvtsi2sh64_round(A, B, C) __builtin_ia32_vcvtsi2sh64_round(A, B, 8)
 #define __builtin_ia32_vcvtusi2sh32_round(A, B, C) __builtin_ia32_vcvtusi2sh32_round(A, B, 8)
index 98e38fb025a949e25506b0f369736a0d76223edb..403f3af60678a8c0d63976a35766dcd96bdae957 100644 (file)
@@ -698,9 +698,13 @@ test_1 (_mm512_cvt_roundepi64_ph, __m128h, __m512i, 8)
 test_1 (_mm512_cvt_roundepu64_ph, __m128h, __m512i, 8)
 test_1 (_mm_cvt_roundsh_i32, int, __m128h, 8)
 test_1 (_mm_cvt_roundsh_u32, unsigned, __m128h, 8)
+test_1 (_mm_cvtt_roundsh_i32, int, __m128h, 8)
+test_1 (_mm_cvtt_roundsh_u32, unsigned, __m128h, 8)
 #ifdef __x86_64__
 test_1 (_mm_cvt_roundsh_i64, long long, __m128h, 8)
 test_1 (_mm_cvt_roundsh_u64, unsigned long long, __m128h, 8)
+test_1 (_mm_cvtt_roundsh_i64, long long, __m128h, 8)
+test_1 (_mm_cvtt_roundsh_u64, unsigned long long, __m128h, 8)
 test_2 (_mm_cvt_roundi64_sh, __m128h, __m128h, long long, 8)
 test_2 (_mm_cvt_roundu64_sh, __m128h, __m128h, unsigned long long, 8)
 #endif
index 3ad10908d49764608e762317b74cb165c06f6a59..b980ac3cddd0caa5b3aa361d9762a0304631eb28 100644 (file)
@@ -803,9 +803,13 @@ test_1 (_mm512_cvt_roundepi64_ph, __m128h, __m512i, 8)
 test_1 (_mm512_cvt_roundepu64_ph, __m128h, __m512i, 8)
 test_1 (_mm_cvt_roundsh_i32, int, __m128h, 8)
 test_1 (_mm_cvt_roundsh_u32, unsigned, __m128h, 8)
+test_1 (_mm_cvtt_roundsh_i32, int, __m128h, 8)
+test_1 (_mm_cvtt_roundsh_u32, unsigned, __m128h, 8)
 #ifdef __x86_64__
 test_1 (_mm_cvt_roundsh_i64, long long, __m128h, 8)
 test_1 (_mm_cvt_roundsh_u64, unsigned long long, __m128h, 8)
+test_1 (_mm_cvtt_roundsh_i64, long long, __m128h, 8)
+test_1 (_mm_cvtt_roundsh_u64, unsigned long long, __m128h, 8)
 test_2 (_mm_cvt_roundi64_sh, __m128h, __m128h, long long, 8)
 test_2 (_mm_cvt_roundu64_sh, __m128h, __m128h, unsigned long long, 8)
 #endif
index 7dcaf216043c0c8c7d3af23d6d7113e13d2cd4f2..16430344ed23d590e8cbd24bb15e28caf577d96c 100644 (file)
 #define __builtin_ia32_vcvtsh2si64_round(A, B) __builtin_ia32_vcvtsh2si64_round(A, 8)
 #define __builtin_ia32_vcvtsh2usi32_round(A, B) __builtin_ia32_vcvtsh2usi32_round(A, 8)
 #define __builtin_ia32_vcvtsh2usi64_round(A, B) __builtin_ia32_vcvtsh2usi64_round(A, 8)
+#define __builtin_ia32_vcvttsh2si32_round(A, B) __builtin_ia32_vcvttsh2si32_round(A, 8)
+#define __builtin_ia32_vcvttsh2si64_round(A, B) __builtin_ia32_vcvttsh2si64_round(A, 8)
+#define __builtin_ia32_vcvttsh2usi32_round(A, B) __builtin_ia32_vcvttsh2usi32_round(A, 8)
+#define __builtin_ia32_vcvttsh2usi64_round(A, B) __builtin_ia32_vcvttsh2usi64_round(A, 8)
 #define __builtin_ia32_vcvtsi2sh32_round(A, B, C) __builtin_ia32_vcvtsi2sh32_round(A, B, 8)
 #define __builtin_ia32_vcvtsi2sh64_round(A, B, C) __builtin_ia32_vcvtsi2sh64_round(A, B, 8)
 #define __builtin_ia32_vcvtusi2sh32_round(A, B, C) __builtin_ia32_vcvtusi2sh32_round(A, B, 8)