From: Hongyu Wang Date: Wed, 8 Sep 2021 01:42:29 +0000 (+0800) Subject: AVX512FP16: Support cond_op for HFmode X-Git-Tag: basepoints/gcc-13~4440 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=ea0f450e98db5f18d3363679db4cd82961f44642;p=thirdparty%2Fgcc.git AVX512FP16: Support cond_op for HFmode gcc/ChangeLog: * config/i386/sse.md (cond_): Extend to support vector HFmodes. (cond_mul): Likewise. (cond_div): Likewise. (cond_): Likewise. (cond_fma): Likewise. (cond_fms): Likewise. (cond_fnma): Likewise. (cond_fnms): Likewise. gcc/testsuite/ChangeLog: * gcc.target/i386/cond_op_addsubmuldiv__Float16-1.c: New test. * gcc.target/i386/cond_op_addsubmuldiv__Float16-2.c: Ditto. * gcc.target/i386/cond_op_fma__Float16-1.c: Ditto. * gcc.target/i386/cond_op_fma__Float16-2.c: Ditto. * gcc.target/i386/cond_op_maxmin__Float16-1.c: Ditto. * gcc.target/i386/cond_op_maxmin__Float16-2.c: Ditto. --- diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index 084fc7f46939..a446dedb2ec9 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -2125,12 +2125,12 @@ [(set_attr "isa" "noavx,noavx,avx,avx")]) (define_expand "cond_" - [(set (match_operand:VF 0 "register_operand") - (vec_merge:VF - (plusminus:VF - (match_operand:VF 2 "vector_operand") - (match_operand:VF 3 "vector_operand")) - (match_operand:VF 4 "nonimm_or_0_operand") + [(set (match_operand:VFH 0 "register_operand") + (vec_merge:VFH + (plusminus:VFH + (match_operand:VFH 2 "vector_operand") + (match_operand:VFH 3 "vector_operand")) + (match_operand:VFH 4 "nonimm_or_0_operand") (match_operand: 1 "register_operand")))] " == 64 || TARGET_AVX512VL" { @@ -2214,12 +2214,12 @@ (set_attr "mode" "")]) (define_expand "cond_mul" - [(set (match_operand:VF 0 "register_operand") - (vec_merge:VF - (mult:VF - (match_operand:VF 2 "vector_operand") - (match_operand:VF 3 "vector_operand")) - (match_operand:VF 4 "nonimm_or_0_operand") + [(set (match_operand:VFH 0 "register_operand") + (vec_merge:VFH + (mult:VFH + (match_operand:VFH 2 "vector_operand") + (match_operand:VFH 3 "vector_operand")) + (match_operand:VFH 4 "nonimm_or_0_operand") (match_operand: 1 "register_operand")))] " == 64 || TARGET_AVX512VL" { @@ -2329,12 +2329,12 @@ }) (define_expand "cond_div" - [(set (match_operand:VF 0 "register_operand") - (vec_merge:VF - (div:VF - (match_operand:VF 2 "register_operand") - (match_operand:VF 3 "vector_operand")) - (match_operand:VF 4 "nonimm_or_0_operand") + [(set (match_operand:VFH 0 "register_operand") + (vec_merge:VFH + (div:VFH + (match_operand:VFH 2 "register_operand") + (match_operand:VFH 3 "vector_operand")) + (match_operand:VFH 4 "nonimm_or_0_operand") (match_operand: 1 "register_operand")))] " == 64 || TARGET_AVX512VL" { @@ -2667,12 +2667,12 @@ (set_attr "mode" "HF")]) (define_expand "cond_" - [(set (match_operand:VF 0 "register_operand") - (vec_merge:VF - (smaxmin:VF - (match_operand:VF 2 "vector_operand") - (match_operand:VF 3 "vector_operand")) - (match_operand:VF 4 "nonimm_or_0_operand") + [(set (match_operand:VFH 0 "register_operand") + (vec_merge:VFH + (smaxmin:VFH + (match_operand:VFH 2 "vector_operand") + (match_operand:VFH 3 "vector_operand")) + (match_operand:VFH 4 "nonimm_or_0_operand") (match_operand: 1 "register_operand")))] " == 64 || TARGET_AVX512VL" { @@ -4837,13 +4837,13 @@ (set_attr "mode" "")]) (define_expand "cond_fma" - [(set (match_operand:VF_AVX512VL 0 "register_operand") - (vec_merge:VF_AVX512VL - (fma:VF_AVX512VL - (match_operand:VF_AVX512VL 2 "vector_operand") - (match_operand:VF_AVX512VL 3 "vector_operand") - (match_operand:VF_AVX512VL 4 "vector_operand")) - (match_operand:VF_AVX512VL 5 "nonimm_or_0_operand") + [(set (match_operand:VFH_AVX512VL 0 "register_operand") + (vec_merge:VFH_AVX512VL + (fma:VFH_AVX512VL + (match_operand:VFH_AVX512VL 2 "vector_operand") + (match_operand:VFH_AVX512VL 3 "vector_operand") + (match_operand:VFH_AVX512VL 4 "vector_operand")) + (match_operand:VFH_AVX512VL 5 "nonimm_or_0_operand") (match_operand: 1 "register_operand")))] "TARGET_AVX512F" { @@ -4937,14 +4937,14 @@ (set_attr "mode" "")]) (define_expand "cond_fms" - [(set (match_operand:VF_AVX512VL 0 "register_operand") - (vec_merge:VF_AVX512VL - (fma:VF_AVX512VL - (match_operand:VF_AVX512VL 2 "vector_operand") - (match_operand:VF_AVX512VL 3 "vector_operand") - (neg:VF_AVX512VL - (match_operand:VF_AVX512VL 4 "vector_operand"))) - (match_operand:VF_AVX512VL 5 "nonimm_or_0_operand") + [(set (match_operand:VFH_AVX512VL 0 "register_operand") + (vec_merge:VFH_AVX512VL + (fma:VFH_AVX512VL + (match_operand:VFH_AVX512VL 2 "vector_operand") + (match_operand:VFH_AVX512VL 3 "vector_operand") + (neg:VFH_AVX512VL + (match_operand:VFH_AVX512VL 4 "vector_operand"))) + (match_operand:VFH_AVX512VL 5 "nonimm_or_0_operand") (match_operand: 1 "register_operand")))] "TARGET_AVX512F" { @@ -5040,14 +5040,14 @@ (set_attr "mode" "")]) (define_expand "cond_fnma" - [(set (match_operand:VF_AVX512VL 0 "register_operand") - (vec_merge:VF_AVX512VL - (fma:VF_AVX512VL - (neg:VF_AVX512VL - (match_operand:VF_AVX512VL 2 "vector_operand")) - (match_operand:VF_AVX512VL 3 "vector_operand") - (match_operand:VF_AVX512VL 4 "vector_operand")) - (match_operand:VF_AVX512VL 5 "nonimm_or_0_operand") + [(set (match_operand:VFH_AVX512VL 0 "register_operand") + (vec_merge:VFH_AVX512VL + (fma:VFH_AVX512VL + (neg:VFH_AVX512VL + (match_operand:VFH_AVX512VL 2 "vector_operand")) + (match_operand:VFH_AVX512VL 3 "vector_operand") + (match_operand:VFH_AVX512VL 4 "vector_operand")) + (match_operand:VFH_AVX512VL 5 "nonimm_or_0_operand") (match_operand: 1 "register_operand")))] "TARGET_AVX512F" { @@ -5145,15 +5145,15 @@ (set_attr "mode" "")]) (define_expand "cond_fnms" - [(set (match_operand:VF_AVX512VL 0 "register_operand") - (vec_merge:VF_AVX512VL - (fma:VF_AVX512VL - (neg:VF_AVX512VL - (match_operand:VF_AVX512VL 2 "vector_operand")) - (match_operand:VF_AVX512VL 3 "vector_operand") - (neg:VF_AVX512VL - (match_operand:VF_AVX512VL 4 "vector_operand"))) - (match_operand:VF_AVX512VL 5 "nonimm_or_0_operand") + [(set (match_operand:VFH_AVX512VL 0 "register_operand") + (vec_merge:VFH_AVX512VL + (fma:VFH_AVX512VL + (neg:VFH_AVX512VL + (match_operand:VFH_AVX512VL 2 "vector_operand")) + (match_operand:VFH_AVX512VL 3 "vector_operand") + (neg:VFH_AVX512VL + (match_operand:VFH_AVX512VL 4 "vector_operand"))) + (match_operand:VFH_AVX512VL 5 "nonimm_or_0_operand") (match_operand: 1 "register_operand")))] "TARGET_AVX512F" { diff --git a/gcc/testsuite/gcc.target/i386/cond_op_addsubmuldiv__Float16-1.c b/gcc/testsuite/gcc.target/i386/cond_op_addsubmuldiv__Float16-1.c new file mode 100644 index 000000000000..b503b75d548b --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/cond_op_addsubmuldiv__Float16-1.c @@ -0,0 +1,9 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -march=sapphirerapids -DTYPE=_Float16 -fdump-tree-vect" } */ +/* { dg-final { scan-tree-dump ".COND_ADD" "vect" } } */ +/* { dg-final { scan-tree-dump ".COND_SUB" "vect" } } */ +/* { dg-final { scan-tree-dump ".COND_MUL" "vect" } } */ +/* { dg-final { scan-tree-dump ".COND_RDIV" "vect" } } */ + +#include "cond_op_addsubmuldiv_double-1.c" + diff --git a/gcc/testsuite/gcc.target/i386/cond_op_addsubmuldiv__Float16-2.c b/gcc/testsuite/gcc.target/i386/cond_op_addsubmuldiv__Float16-2.c new file mode 100644 index 000000000000..e8397bbc5b1e --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/cond_op_addsubmuldiv__Float16-2.c @@ -0,0 +1,7 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -mavx512fp16 -mavx512vl -mprefer-vector-width=256 -DTYPE=_Float16" } */ +/* { dg-require-effective-target avx512vl } */ +/* { dg-require-effective-target avx512fp16 } */ + +#define AVX512FP16 +#include "cond_op_addsubmuldiv_double-2.c" diff --git a/gcc/testsuite/gcc.target/i386/cond_op_fma__Float16-1.c b/gcc/testsuite/gcc.target/i386/cond_op_fma__Float16-1.c new file mode 100644 index 000000000000..9ea45d690e22 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/cond_op_fma__Float16-1.c @@ -0,0 +1,20 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -march=sapphirerapids -DTYPE=_Float16 -fdump-tree-optimized -D__BUILTIN_FMA=__builtin_fmaf16" } */ +/* { dg-final { scan-tree-dump-times ".COND_FMA" 3 "optimized" } } */ +/* { dg-final { scan-tree-dump-times ".COND_FNMA" 3 "optimized" } } */ +/* { dg-final { scan-tree-dump-times ".COND_FMS" 3 "optimized" } } */ +/* { dg-final { scan-tree-dump-times ".COND_FNMS" 3 "optimized" } } */ +/* { dg-final { scan-assembler-times "vfmadd132ph\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vfnmadd132ph\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vfmsub132ph\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vfnmsub132ph\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vfmadd231ph\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vfnmadd231ph\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vfmsub231ph\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vfnmsub231ph\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vfmadd132ph\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vfnmadd132ph\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vfmsub132ph\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vfnmsub132ph\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ + +#include "cond_op_fma_double-1.c" diff --git a/gcc/testsuite/gcc.target/i386/cond_op_fma__Float16-2.c b/gcc/testsuite/gcc.target/i386/cond_op_fma__Float16-2.c new file mode 100644 index 000000000000..b384ab86b4dd --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/cond_op_fma__Float16-2.c @@ -0,0 +1,7 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -mavx512fp16 -mavx512vl -mprefer-vector-width=256 -DTYPE=_Float16 -D__BUILTIN_FMA=__builtin_fmaf16 -DNUM=100" } */ +/* { dg-require-effective-target avx512fp16 } */ +/* { dg-require-effective-target avx512vl } */ + +#define AVX512FP16 +#include "cond_op_fma_double-2.c" diff --git a/gcc/testsuite/gcc.target/i386/cond_op_maxmin__Float16-1.c b/gcc/testsuite/gcc.target/i386/cond_op_maxmin__Float16-1.c new file mode 100644 index 000000000000..b09410248f0c --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/cond_op_maxmin__Float16-1.c @@ -0,0 +1,8 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -march=sapphirerapids -DTYPE=_Float16 -fdump-tree-optimized -DFN_MAX=__builtin_fmaxf16 -DFN_MIN=__builtin_fminf16" } */ +/* { dg-final { scan-tree-dump ".COND_MAX" "optimized" } } */ +/* { dg-final { scan-tree-dump ".COND_MIN" "optimized" } } */ +/* { dg-final { scan-assembler-times "vmaxph" 1 } } */ +/* { dg-final { scan-assembler-times "vminph" 1 } } */ + +#include "cond_op_maxmin_double-1.c" diff --git a/gcc/testsuite/gcc.target/i386/cond_op_maxmin__Float16-2.c b/gcc/testsuite/gcc.target/i386/cond_op_maxmin__Float16-2.c new file mode 100644 index 000000000000..b07d04405ec7 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/cond_op_maxmin__Float16-2.c @@ -0,0 +1,7 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -mavx512fp16 -mavx512vl -mprefer-vector-width=256 -DTYPE=_Float16 -DFN_MAX=__builtin_fmaxf16 -DFN_MIN=__builtin_fminf16 -ffast-math" } */ +/* { dg-require-effective-target avx512vl } */ +/* { dg-require-effective-target avx512fp16 } */ + +#define AVX512FP16 +#include "cond_op_maxmin_double-2.c"