From: Hongyu Wang Date: Thu, 1 Jul 2021 05:17:32 +0000 (+0800) Subject: AVX512FP16: Add fix(uns)?_truncmn2 for HF scalar and vector modes X-Git-Tag: basepoints/gcc-13~4478 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=60698a19c77aef8c96d14238975e12fad653e7fb;p=thirdparty%2Fgcc.git AVX512FP16: Add fix(uns)?_truncmn2 for HF scalar and vector modes NB: 64bit/32bit vectorize for HFmode is not supported for now, will adjust this patch when V2HF/V4HF operations supported. gcc/ChangeLog: * config/i386/i386.md (fix_trunchf2): New expander. (fixuns_trunchfhi2): Likewise. (*fixuns_trunchfsi2zext): New define_insn. * config/i386/sse.md (ssePHmodelower): New mode_attr. (fix_trunc2): New expander for same element vector fix_truncate. (fix_trunc2): Likewise for V4HF to V4SI/V4DI fix_truncate. (fix_truncv2hfv2di2): Likeise for V2HF to V2DI fix_truncate. gcc/testsuite/ChangeLog: * gcc.target/i386/avx512fp16-trunchf.c: New test. * gcc.target/i386/avx512fp16-truncvnhf.c: Ditto. --- diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index a087e557d7fa..c6279e620c98 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -4810,6 +4810,16 @@ } }) +(define_insn "fix_trunchf2" + [(set (match_operand:SWI48 0 "register_operand" "=r") + (any_fix:SWI48 + (match_operand:HF 1 "nonimmediate_operand" "vm")))] + "TARGET_AVX512FP16" + "vcvttsh2si\t{%1, %0|%0, %1}" + [(set_attr "type" "sseicvt") + (set_attr "prefix" "evex") + (set_attr "mode" "")]) + ;; Signed conversion to SImode. (define_expand "fix_truncxfsi2" @@ -4917,6 +4927,17 @@ (set_attr "prefix" "evex") (set_attr "mode" "SI")]) +(define_insn "*fixuns_trunchfsi2zext" + [(set (match_operand:DI 0 "register_operand" "=r") + (zero_extend:DI + (unsigned_fix:SI + (match_operand:HF 1 "nonimmediate_operand" "vm"))))] + "TARGET_64BIT && TARGET_AVX512FP16" + "vcvttsh2usi\t{%1, %k0|%k0, %1}" + [(set_attr "type" "sseicvt") + (set_attr "prefix" "evex") + (set_attr "mode" "SI")]) + (define_insn "*fixuns_truncsi2_avx512f_zext" [(set (match_operand:DI 0 "register_operand" "=r") (zero_extend:DI @@ -4949,6 +4970,14 @@ ;; Without these patterns, we'll try the unsigned SI conversion which ;; is complex for SSE, rather than the signed SI conversion, which isn't. +(define_expand "fixuns_trunchfhi2" + [(set (match_dup 2) + (fix:SI (match_operand:HF 1 "nonimmediate_operand"))) + (set (match_operand:HI 0 "nonimmediate_operand") + (subreg:HI (match_dup 2) 0))] + "TARGET_AVX512FP16" + "operands[2] = gen_reg_rtx (SImode);") + (define_expand "fixuns_trunchi2" [(set (match_dup 2) (fix:SI (match_operand:MODEF 1 "nonimmediate_operand"))) diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index 1ca95984afc6..f8a5f197f3ca 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -1034,6 +1034,13 @@ (V8DI "V8HF") (V4DI "V8HF") (V2DI "V8HF") (V8DF "V8HF") (V16SF "V16HF") (V8SF "V8HF")]) +;; Mapping of vector modes to vector hf modes of same element. +(define_mode_attr ssePHmodelower + [(V32HI "v32hf") (V16HI "v16hf") (V8HI "v8hf") + (V16SI "v16hf") (V8SI "v8hf") (V4SI "v4hf") + (V8DI "v8hf") (V4DI "v4hf") (V2DI "v2hf") + (V8DF "v8hf") (V16SF "v16hf") (V8SF "v8hf")]) + ;; Mapping of vector modes to packed single mode of the same size (define_mode_attr ssePSmode [(V16SI "V16SF") (V8DF "V16SF") @@ -6175,6 +6182,12 @@ (set_attr "prefix" "evex") (set_attr "mode" "HF")]) +(define_expand "fix_trunc2" + [(set (match_operand:VI2H_AVX512VL 0 "register_operand") + (any_fix:VI2H_AVX512VL + (match_operand: 1 "nonimmediate_operand")))] + "TARGET_AVX512FP16") + (define_insn "avx512fp16_fix_trunc2" [(set (match_operand:VI2H_AVX512VL 0 "register_operand" "=v") (any_fix:VI2H_AVX512VL @@ -6185,6 +6198,21 @@ (set_attr "prefix" "evex") (set_attr "mode" "")]) +(define_expand "fix_truncv4hf2" + [(set (match_operand:VI4_128_8_256 0 "register_operand") + (any_fix:VI4_128_8_256 + (match_operand:V4HF 1 "nonimmediate_operand")))] + "TARGET_AVX512FP16 && TARGET_AVX512VL" +{ + if (!MEM_P (operands[1])) + { + operands[1] = lowpart_subreg (V8HFmode, operands[1], V4HFmode); + emit_insn (gen_avx512fp16_fix_trunc2 (operands[0], + operands[1])); + DONE; + } +}) + (define_insn "avx512fp16_fix_trunc2" [(set (match_operand:VI4_128_8_256 0 "register_operand" "=v") (any_fix:VI4_128_8_256 @@ -6207,6 +6235,21 @@ (set_attr "prefix" "evex") (set_attr "mode" "")]) +(define_expand "fix_truncv2hfv2di2" + [(set (match_operand:V2DI 0 "register_operand") + (any_fix:V2DI + (match_operand:V2HF 1 "nonimmediate_operand")))] + "TARGET_AVX512FP16 && TARGET_AVX512VL" +{ + if (!MEM_P (operands[1])) + { + operands[1] = lowpart_subreg (V8HFmode, operands[1], V2HFmode); + emit_insn (gen_avx512fp16_fix_truncv2di2 (operands[0], + operands[1])); + DONE; + } +}) + (define_insn "avx512fp16_fix_truncv2di2" [(set (match_operand:V2DI 0 "register_operand" "=v") (any_fix:V2DI diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16-trunchf.c b/gcc/testsuite/gcc.target/i386/avx512fp16-trunchf.c new file mode 100644 index 000000000000..2c025b7803c4 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512fp16-trunchf.c @@ -0,0 +1,59 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -mavx512fp16" } */ +/* { dg-final { scan-assembler-times "vcvttsh2si\[ \\t\]+\[^\{\n\]*(?:%xmm\[0-9\]|\\(%esp\\))+, %eax(?:\n|\[ \\t\]+#)" 3 } } */ +/* { dg-final { scan-assembler-times "vcvttsh2usi\[ \\t\]+\[^\{\n\]*(?:%xmm\[0-9\]|\\(%esp\\))+, %eax(?:\n|\[ \\t\]+#)" 2 } } */ +/* { dg-final { scan-assembler-times "vcvttsh2si\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+, %rax(?:\n|\[ \\t\]+#)" 1 { target { ! ia32 } } } } */ +/* { dg-final { scan-assembler-times "vcvttsh2usi\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+, %rax(?:\n|\[ \\t\]+#)" 1 { target { ! ia32 } } } } */ +/* { dg-final { scan-assembler "xorl\[ \\t\]+%edx, %edx" { target ia32 } } } */ + +#include + +short +__attribute__ ((noinline, noclone)) +trunc_f16_to_si16 (_Float16 f) +{ + return f; +} + +unsigned short +__attribute__ ((noinline, noclone)) +trunc_f16_to_su16 (_Float16 f) +{ + return f; +} + +int +__attribute__ ((noinline, noclone)) +trunc_f16_to_si32 (_Float16 f) +{ + return f; +} + +unsigned int +__attribute__ ((noinline, noclone)) +trunc_f16_to_su32 (_Float16 f) +{ + return f; +} + +long long +__attribute__ ((noinline, noclone)) +trunc_f16_to_si64 (_Float16 f) +{ + return f; +} + +unsigned long long +__attribute__ ((noinline, noclone)) +trunc_f16_to_su64 (_Float16 f) +{ + return f; +} + +unsigned long long +__attribute__ ((noinline, noclone)) +trunc_f16_to_su64_zext (_Float16 f) +{ + return (unsigned int) f; +} + diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16-truncvnhf.c b/gcc/testsuite/gcc.target/i386/avx512fp16-truncvnhf.c new file mode 100644 index 000000000000..ee55cd12300b --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512fp16-truncvnhf.c @@ -0,0 +1,61 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -mavx512fp16 -mavx512vl -ftree-slp-vectorize -mprefer-vector-width=512" } */ + +extern long long di[8]; +extern unsigned long long udi[8]; +extern int si[16]; +extern unsigned int usi[16]; +extern short hi[32]; +extern unsigned short uhi[32]; +extern _Float16 hf[32]; + +#define DO_PRAGMA(X) _Pragma(#X) + +#define FIX_TRUNCHFVV(size, mode) \ + void __attribute__ ((noinline, noclone)) \ +fix_trunc##size##hf##v##size##mode () \ +{\ + int i; \ + DO_PRAGMA (GCC unroll size) \ + for (i = 0; i < size; i++) \ + mode[i] = hf[i]; \ +} + +FIX_TRUNCHFVV(32, hi) +FIX_TRUNCHFVV(16, hi) +FIX_TRUNCHFVV(8, hi) +FIX_TRUNCHFVV(16, si) +FIX_TRUNCHFVV(8, si) +FIX_TRUNCHFVV(4, si) +FIX_TRUNCHFVV(8, di) +FIX_TRUNCHFVV(4, di) +FIX_TRUNCHFVV(2, di) + +FIX_TRUNCHFVV(32, uhi) +FIX_TRUNCHFVV(16, uhi) +FIX_TRUNCHFVV(8, uhi) +FIX_TRUNCHFVV(16, usi) +FIX_TRUNCHFVV(8, usi) +FIX_TRUNCHFVV(4, usi) +FIX_TRUNCHFVV(8, udi) +FIX_TRUNCHFVV(4, udi) +FIX_TRUNCHFVV(2, udi) + +/* { dg-final { scan-assembler-times "vcvttph2qq\[ \\t\]+\[^\{\n\]*\[^\n\r]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvttph2uqq\[ \\t\]+\[^\{\n\]*\[^\n\r]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvttph2qq\[ \\t\]+\[^\{\n\]*\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 { xfail *-*-* } } } */ +/* { dg-final { scan-assembler-times "vcvttph2uqq\[ \\t\]+\[^\{\n\]*\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 { xfail *-*-* } } } */ +/* { dg-final { scan-assembler-times "vcvttph2qq\[ \\t\]+\[^\{\n\]*\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 { xfail *-*-* } } } */ +/* { dg-final { scan-assembler-times "vcvttph2uqq\[ \\t\]+\[^\{\n\]*\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 { xfail *-*-* } } } */ +/* { dg-final { scan-assembler-times "vcvttph2dq\[ \\t\]+\[^\{\n\]*\[^\n\r]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvttph2udq\[ \\t\]+\[^\{\n\]*\[^\n\r]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvttph2dq\[ \\t\]+\[^\{\n\]*\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvttph2udq\[ \\t\]+\[^\{\n\]*\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvttph2dq\[ \\t\]+\[^\{\n\]*\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 { xfail *-*-* } } } */ +/* { dg-final { scan-assembler-times "vcvttph2udq\[ \\t\]+\[^\{\n\]*\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 { xfail *-*-* } } } */ +/* { dg-final { scan-assembler-times "vcvttph2w\[ \\t\]+\[^\{\n\]*\[^\n\r]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvttph2uw\[ \\t\]+\[^\{\n\]*\[^\n\r]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvttph2w\[ \\t\]+\[^\{\n\]*\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvttph2uw\[ \\t\]+\[^\{\n\]*\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvttph2w\[ \\t\]+\[^\{\n\]*\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvttph2uw\[ \\t\]+\[^\{\n\]*\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */