]> git.ipfire.org Git - thirdparty/gcc.git/commitdiff
Guard truncate from vector float to vector __bf16 with !flag_rounding_math && HONOR_N...
authorliuhongt <hongtao.liu@intel.com>
Thu, 7 Nov 2024 02:15:42 +0000 (18:15 -0800)
committerliuhongt <hongtao.liu@intel.com>
Mon, 11 Nov 2024 02:20:23 +0000 (18:20 -0800)
hw instruction doesn't raise exceptions, turns sNAN into qNAN quietly,
and always round to nearest (even). Output denormals are always
flushed to zero and input denormals are always treated as zero. MXCSR
is not consulted nor updated.
W/o native instructions, flag_unsafe_math_optimizations is needed for
the permutation instructions.
Similar guard extend from vector __bf16 to vector float with
!HONOR_NANS (BFmode).

gcc/ChangeLog:

* config/i386/i386.md (truncsf2bf2): Add !flag_rounding_math
to the condition, require flag_unsafe_math_optimizations when
native instruction is not available.
* config/i386/mmx.md: (truncv2sfv2bf2): Ditto.
(extendv2bfv2sf2): Add !HONOR_NANS (BFmode) to the condition.
* config/i386/sse.md: (truncv4sfv4sf2): Add
!flag_rounding_math to the condition, require
flag_unsafe_math_optimizations when native instruction is not
available.
(truncv8sfv8bf2): Ditto.
(truncv16sfv16bf2): Ditto.
(extendv4bfv4sf2): Add !HONOR_NANS (BFmode) to the condition.
(extendv8bfv8sf2): Ditto.
(extendv16bfv16sf2): Ditto.

gcc/testsuite/ChangeLog:

* gcc.target/i386/avx512bf16-truncsfbf.c: Add -ffast-math.
* gcc.target/i386/avx512bw-extendbf2sf.c: Ditto.
* gcc.target/i386/avx512bw-truncsfbf.c: Ditto.
* gcc.target/i386/sse2-extendbf2sf.c: Ditto.
* gcc.target/i386/ssse3-truncsfbf.c: Ditto.

gcc/config/i386/i386.md
gcc/config/i386/mmx.md
gcc/config/i386/sse.md
gcc/testsuite/gcc.target/i386/avx512bf16-truncsfbf.c
gcc/testsuite/gcc.target/i386/avx512bw-extendbf2sf.c
gcc/testsuite/gcc.target/i386/avx512bw-truncsfbf.c
gcc/testsuite/gcc.target/i386/sse2-extendbf2sf.c
gcc/testsuite/gcc.target/i386/ssse3-truncsfbf.c

index 34bc04622b18316e62e043baccb304a56b97a398..f4aae80b7a9575abf2ffa6a9497b35bee77c420a 100644 (file)
    (set_attr "prefix" "evex")
    (set_attr "mode" "HF")])
 
+/* vcvtneps2bf16 doesn't honor SNAN, and turn sNAN into qNAN quietly,
+   and it always round to even.
+   flag_unsafte_math_optimization is needed for psrld.
+   If we don't expect qNaNs nor sNaNs and can assume rounding
+   to nearest, we can expand the conversion inline as
+   (fromi + 0x7fff + ((fromi >> 16) & 1)) >> 16.  */
 (define_insn "truncsfbf2"
   [(set (match_operand:BF 0 "register_operand" "=x,x,v,Yv")
        (float_truncate:BF
          (match_operand:SF 1 "register_operand" "0,x,v,Yv")))]
-  "TARGET_SSE2 && flag_unsafe_math_optimizations && !HONOR_NANS (BFmode)"
+  "TARGET_SSE2 && !HONOR_NANS (BFmode) && !flag_rounding_math
+   && (flag_unsafe_math_optimizations
+       || TARGET_AVXNECONVERT
+       || (TARGET_AVX512BF16 && TARGET_AVX512VL))"
   "@
   psrld\t{$16, %0|%0, 16}
   %{vex%} vcvtneps2bf16\t{%1, %0|%0, %1}
index 021ac90ae2a00f3d0540686f27ee5d6cdc6292b0..61a4f4d21ea354fe0fb26238c40f158c4bfa701e 100644 (file)
   [(set (match_operand:V2BF 0 "register_operand")
        (float_truncate:V2BF
          (match_operand:V2SF 1 "nonimmediate_operand")))]
-  "TARGET_SSSE3 && TARGET_MMX_WITH_SSE"
+  "TARGET_SSSE3 && TARGET_MMX_WITH_SSE
+  && !HONOR_NANS (BFmode) && !flag_rounding_math
+  && (flag_unsafe_math_optimizations
+      || TARGET_AVXNECONVERT
+      || (TARGET_AVX512BF16 && TARGET_AVX512VL))"
 {
   rtx op1 = gen_reg_rtx (V4SFmode);
   rtx op0 = gen_reg_rtx (V4BFmode);
   [(set (match_operand:V2SF 0 "register_operand")
        (float_extend:V2SF
          (match_operand:V2BF 1 "nonimmediate_operand")))]
-  "TARGET_SSE2 && TARGET_MMX_WITH_SSE"
+  "TARGET_SSE2 && TARGET_MMX_WITH_SSE && !HONOR_NANS (BFmode)"
 {
   rtx op0 = gen_reg_rtx (V4SFmode);
   rtx op1 = gen_reg_rtx (V4BFmode);
index 5eeb3ab221a1245546be7ad1f60bb5e0e39e4320..efe32e5149fc0ff35afb6a1b39287668f3ec5782 100644 (file)
   [(set (match_operand:V4BF 0 "register_operand")
          (float_truncate:V4BF
            (match_operand:V4SF 1 "nonimmediate_operand")))]
-  "TARGET_SSSE3"
+  "TARGET_SSSE3 && !HONOR_NANS (BFmode) && !flag_rounding_math
+   && (flag_unsafe_math_optimizations
+       || TARGET_AVXNECONVERT
+       || (TARGET_AVX512BF16 && TARGET_AVX512VL))"
 {
   if (!TARGET_AVXNECONVERT
       && !(TARGET_AVX512BF16 && TARGET_AVX512VL))
   [(set (match_operand:V8BF 0 "register_operand")
        (float_truncate:V8BF
          (match_operand:V8SF 1 "nonimmediate_operand")))]
-  "TARGET_AVX2"
+  "TARGET_AVX2 && !HONOR_NANS (BFmode) && !flag_rounding_math
+   && (flag_unsafe_math_optimizations
+       || TARGET_AVXNECONVERT
+       || (TARGET_AVX512BF16 && TARGET_AVX512VL))"
 {
   if (!TARGET_AVXNECONVERT
       && !(TARGET_AVX512BF16 && TARGET_AVX512VL))
   [(set (match_operand:V16BF 0 "register_operand")
        (float_truncate:V16BF
          (match_operand:V16SF 1 "nonimmediate_operand")))]
-  "TARGET_AVX512BW && TARGET_EVEX512"
+  "TARGET_AVX512BW && TARGET_EVEX512
+   && !HONOR_NANS (BFmode) && !flag_rounding_math
+   && (flag_unsafe_math_optimizations || TARGET_AVX512BF16)"
 {
   if (!TARGET_AVX512BF16)
     {
   [(set (match_operand:VF1_AVX512BW 0 "register_operand")
        (float_extend:VF1_AVX512BW
          (match_operand:<sf_cvt_bf16> 1 "nonimmediate_operand")))]
-  "TARGET_SSE2"
+  "TARGET_SSE2 && !HONOR_NANS (BFmode)"
 {
   ix86_expand_vector_bf2sf_with_vec_perm (operands[0], operands[1]);
   DONE;
index da31bdba21b0c965fcc3136b5255024d64bfde37..1b4b62f10601f2fd7862df6872675c37716599f3 100644 (file)
@@ -1,5 +1,5 @@
 /* { dg-do compile } */
-/* { dg-options "-mavx512vl -mavx512bf16 -O2" } */
+/* { dg-options "-mavx512vl -mavx512bf16 -O2 -ffast-math" } */
 /* { dg-final { scan-assembler-times {(?n)vcvtneps2bf16} 6 } } */
 
 #include "avx512bw-truncsfbf.c"
index 5b59958151f7f995e5cf8acc7dcc788fd9e006b5..e7c65b7ee0146849cd6f45f776bb72d32edc8650 100644 (file)
@@ -1,5 +1,5 @@
 /* { dg-do compile } */
-/* { dg-options "-mavx512bw -mavx512vl -O2" } */
+/* { dg-options "-mavx512bw -mavx512vl -O2 -ffast-math" } */
 /* { dg-final { scan-assembler-times {(?n)(?:vpermi2w|vpunpcklwd)} 6 } } */
 
 typedef float v4sf __attribute__((vector_size(16)));
index 071db21cfb3710eac4bfdfaeb012af88efe6c3b8..40802d865df6c6fdc6f1fe71e91df33c89cf21c3 100644 (file)
@@ -1,5 +1,5 @@
 /* { dg-do compile } */
-/* { dg-options "-mavx512bw -mavx512vl -mno-avx512bf16 -mno-avxneconvert -O2" } */
+/* { dg-options "-mavx512bw -mavx512vl -mno-avx512bf16 -mno-avxneconvert -O2 -ffast-math" } */
 /* { dg-final { scan-assembler-times {(?n)(?:vpermw|vpshufb)} 6 } } */
 
 typedef float v4sf __attribute__((vector_size(16)));
index 0f007df68f6b4ddeba2659268efc41c3680060a0..d7f77acd6035be6ad50a433c9dabd32cb1d0543c 100644 (file)
@@ -1,5 +1,5 @@
 /* { dg-do compile } */
-/* { dg-options "-msse2 -O2" } */
+/* { dg-options "-msse2 -O2 -ffast-math" } */
 /* { dg-final { scan-assembler-times {(?n)(?:vpermi2w|punpcklwd)} 2 { target { ! ia32 } } } } */
 
 typedef float v2sf __attribute__((vector_size(8)));
index 70840c537f19840e7270f15b347e75fed2a62d35..af92f4d0befe3724d8ae8f8639c6e8331332bed1 100644 (file)
@@ -1,5 +1,5 @@
 /* { dg-do compile } */
-/* { dg-options "-mssse3 -mno-avx512bf16 -mno-avxneconvert -O2" } */
+/* { dg-options "-mssse3 -mno-avx512bf16 -mno-avxneconvert -O2 -ffast-math" } */
 /* { dg-final { scan-assembler-times {(?n)pshufb} 2 { target { ! ia32 } } } } */
 
 typedef float v2sf __attribute__((vector_size(8)));