From: H.J. Lu Date: Wed, 3 Apr 2019 04:44:55 +0000 (-0700) Subject: AVX512FP16: Add scalar/vector bitwise operations, including X-Git-Tag: basepoints/gcc-13~4664 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=75a97b59e1eb4cef28fd87c9c45b7c15620a84b8;p=thirdparty%2Fgcc.git AVX512FP16: Add scalar/vector bitwise operations, including 1. FP16 vector xor/ior/and/andnot/abs/neg 2. FP16 scalar abs/neg/copysign/xorsign gcc/ChangeLog: * config/i386/i386-expand.c (ix86_expand_fp_absneg_operator): Handle HFmode. (ix86_expand_copysign): Ditto. (ix86_expand_xorsign): Ditto. * config/i386/i386.c (ix86_build_const_vector): Handle HF vector modes. (ix86_build_signbit_mask): Ditto. (ix86_can_change_mode_class): Ditto. * config/i386/i386.md (SSEMODEF): Add HFmode. (ssevecmodef): Ditto. (hf2): New define_expand. (*hf2_1): New define_insn_and_split. (copysign): Extend to support HFmode under AVX512FP16. (xorsign): Ditto. * config/i386/sse.md (VFB): New mode iterator. (VFB_128_256): Ditto. (VFB_512): Ditto. (sseintvecmode2): Support HF vector mode. (2): Use new mode iterator. (*2): Ditto. (copysign3): Ditto. (xorsign3): Ditto. (3): Ditto. (3): Ditto. (_andnot3): Adjust for HF vector mode. (_andnot3): Ditto. (*3): Ditto. (*3): Ditto. --- diff --git a/gcc/config/i386/i386-expand.c b/gcc/config/i386/i386-expand.c index bfafd1517c86..3ec032b99940 100644 --- a/gcc/config/i386/i386-expand.c +++ b/gcc/config/i386/i386-expand.c @@ -1981,8 +1981,12 @@ ix86_expand_fp_absneg_operator (enum rtx_code code, machine_mode mode, machine_mode vmode = mode; rtvec par; - if (vector_mode || mode == TFmode) - use_sse = true; + if (vector_mode || mode == TFmode || mode == HFmode) + { + use_sse = true; + if (mode == HFmode) + vmode = V8HFmode; + } else if (TARGET_SSE_MATH) { use_sse = SSE_FLOAT_MODE_P (mode); @@ -2123,7 +2127,9 @@ ix86_expand_copysign (rtx operands[]) mode = GET_MODE (operands[0]); - if (mode == SFmode) + if (mode == HFmode) + vmode = V8HFmode; + else if (mode == SFmode) vmode = V4SFmode; else if (mode == DFmode) vmode = V2DFmode; @@ -2182,7 +2188,9 @@ ix86_expand_xorsign (rtx operands[]) mode = GET_MODE (dest); - if (mode == SFmode) + if (mode == HFmode) + vmode = V8HFmode; + else if (mode == SFmode) vmode = V4SFmode; else if (mode == DFmode) vmode = V2DFmode; diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index 1f6d6ce567c0..ba89e111d281 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -15561,6 +15561,9 @@ ix86_build_const_vector (machine_mode mode, bool vect, rtx value) case E_V2DImode: gcc_assert (vect); /* FALLTHRU */ + case E_V8HFmode: + case E_V16HFmode: + case E_V32HFmode: case E_V16SFmode: case E_V8SFmode: case E_V4SFmode: @@ -15599,6 +15602,13 @@ ix86_build_signbit_mask (machine_mode mode, bool vect, bool invert) switch (mode) { + case E_V8HFmode: + case E_V16HFmode: + case E_V32HFmode: + vec_mode = mode; + imode = HImode; + break; + case E_V16SImode: case E_V16SFmode: case E_V8SImode: diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index c82a9dc1f670..188f431510a1 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -1237,8 +1237,8 @@ (define_mode_iterator X87MODEFH [HF SF DF XF]) ;; All SSE floating point modes -(define_mode_iterator SSEMODEF [SF DF TF]) -(define_mode_attr ssevecmodef [(SF "V4SF") (DF "V2DF") (TF "TF")]) +(define_mode_iterator SSEMODEF [HF SF DF TF]) +(define_mode_attr ssevecmodef [(HF "V8HF") (SF "V4SF") (DF "V2DF") (TF "TF")]) ;; SSE instruction suffix for various modes (define_mode_attr ssemodesuffix @@ -10732,6 +10732,12 @@ } [(set_attr "isa" "noavx,noavx,avx,avx")]) +(define_expand "hf2" + [(set (match_operand:HF 0 "register_operand") + (absneg:HF (match_operand:HF 1 "register_operand")))] + "TARGET_AVX512FP16" + "ix86_expand_fp_absneg_operator (, HFmode, operands); DONE;") + (define_expand "2" [(set (match_operand:X87MODEF 0 "register_operand") (absneg:X87MODEF (match_operand:X87MODEF 1 "register_operand")))] @@ -10762,6 +10768,22 @@ [(const_int 0)] "ix86_split_fp_absneg_operator (, mode, operands); DONE;") +(define_insn_and_split "*hf2_1" + [(set (match_operand:HF 0 "register_operand" "=Yv") + (absneg:HF + (match_operand:HF 1 "register_operand" "Yv"))) + (use (match_operand:V8HF 2 "vector_operand" "Yvm")) + (clobber (reg:CC FLAGS_REG))] + "TARGET_AVX512FP16" + "#" + "&& reload_completed" + [(set (match_dup 0) + (:V8HF (match_dup 1) (match_dup 2)))] +{ + operands[0] = lowpart_subreg (V8HFmode, operands[0], HFmode); + operands[1] = lowpart_subreg (V8HFmode, operands[1], HFmode); +}) + (define_insn "*2_1" [(set (match_operand:MODEF 0 "register_operand" "=x,x,Yv,f,!r") (absneg:MODEF @@ -10862,14 +10884,16 @@ (match_operand:SSEMODEF 1 "nonmemory_operand") (match_operand:SSEMODEF 2 "register_operand")] "(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) - || (TARGET_SSE && (mode == TFmode))" + || (TARGET_SSE && (mode == TFmode)) + || (TARGET_AVX512FP16 && (mode ==HFmode))" "ix86_expand_copysign (operands); DONE;") (define_expand "xorsign3" - [(match_operand:MODEF 0 "register_operand") - (match_operand:MODEF 1 "register_operand") - (match_operand:MODEF 2 "register_operand")] - "SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH" + [(match_operand:MODEFH 0 "register_operand") + (match_operand:MODEFH 1 "register_operand") + (match_operand:MODEFH 2 "register_operand")] + "(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) + || mode == HFmode" { if (rtx_equal_p (operands[1], operands[2])) emit_insn (gen_abs2 (operands[0], operands[1])); diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index baf464214b1d..3b5c05be0344 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -319,11 +319,26 @@ (V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")]) +;; 128-, 256- and 512-bit float vector modes for bitwise operations +(define_mode_iterator VFB + [(V32HF "TARGET_AVX512FP16") + (V16HF "TARGET_AVX512FP16") + (V8HF "TARGET_AVX512FP16") + (V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF + (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")]) + ;; 128- and 256-bit float vector modes (define_mode_iterator VF_128_256 [(V8SF "TARGET_AVX") V4SF (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")]) +;; 128- and 256-bit float vector modes for bitwise operations +(define_mode_iterator VFB_128_256 + [(V16HF "TARGET_AVX512FP16") + (V8HF "TARGET_AVX512FP16") + (V8SF "TARGET_AVX") V4SF + (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")]) + ;; All SFmode vector float modes (define_mode_iterator VF1 [(V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF]) @@ -376,6 +391,10 @@ (define_mode_iterator VF_512 [V16SF V8DF]) +;; All 512bit vector float modes for bitwise operations +(define_mode_iterator VFB_512 + [(V32HF "TARGET_AVX512FP16") V16SF V8DF]) + (define_mode_iterator VI48_AVX512VL [V16SI (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL") V8DI (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")]) @@ -954,7 +973,8 @@ (define_mode_attr sseintvecmode2 [(V8DF "XI") (V4DF "OI") (V2DF "TI") - (V8SF "OI") (V4SF "TI")]) + (V8SF "OI") (V4SF "TI") + (V16HF "OI") (V8HF "TI")]) (define_mode_attr sseintvecmodelower [(V16SF "v16si") (V8DF "v8di") @@ -2030,22 +2050,22 @@ ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (define_expand "2" - [(set (match_operand:VF 0 "register_operand") - (absneg:VF - (match_operand:VF 1 "register_operand")))] + [(set (match_operand:VFB 0 "register_operand") + (absneg:VFB + (match_operand:VFB 1 "register_operand")))] "TARGET_SSE" "ix86_expand_fp_absneg_operator (, mode, operands); DONE;") (define_insn_and_split "*2" - [(set (match_operand:VF 0 "register_operand" "=x,x,v,v") - (absneg:VF - (match_operand:VF 1 "vector_operand" "0,xBm,v,m"))) - (use (match_operand:VF 2 "vector_operand" "xBm,0,vm,v"))] + [(set (match_operand:VFB 0 "register_operand" "=x,x,v,v") + (absneg:VFB + (match_operand:VFB 1 "vector_operand" "0,xBm,v,m"))) + (use (match_operand:VFB 2 "vector_operand" "xBm,0,vm,v"))] "TARGET_SSE" "#" "&& reload_completed" [(set (match_dup 0) - (:VF (match_dup 1) (match_dup 2)))] + (:VFB (match_dup 1) (match_dup 2)))] { if (TARGET_AVX) { @@ -4073,11 +4093,11 @@ ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (define_insn "_andnot3" - [(set (match_operand:VF_128_256 0 "register_operand" "=x,x,v,v") - (and:VF_128_256 - (not:VF_128_256 - (match_operand:VF_128_256 1 "register_operand" "0,x,v,v")) - (match_operand:VF_128_256 2 "vector_operand" "xBm,xm,vm,vm")))] + [(set (match_operand:VFB_128_256 0 "register_operand" "=x,x,v,v") + (and:VFB_128_256 + (not:VFB_128_256 + (match_operand:VFB_128_256 1 "register_operand" "0,x,v,v")) + (match_operand:VFB_128_256 2 "vector_operand" "xBm,xm,vm,vm")))] "TARGET_SSE && " { char buf[128]; @@ -4100,6 +4120,8 @@ switch (get_attr_mode (insn)) { + case MODE_V16HF: + case MODE_V8HF: case MODE_V8SF: case MODE_V4SF: suffix = "ps"; @@ -4138,11 +4160,11 @@ (const_string "")))]) (define_insn "_andnot3" - [(set (match_operand:VF_512 0 "register_operand" "=v") - (and:VF_512 - (not:VF_512 - (match_operand:VF_512 1 "register_operand" "v")) - (match_operand:VF_512 2 "nonimmediate_operand" "vm")))] + [(set (match_operand:VFB_512 0 "register_operand" "=v") + (and:VFB_512 + (not:VFB_512 + (match_operand:VFB_512 1 "register_operand" "v")) + (match_operand:VFB_512 2 "nonimmediate_operand" "vm")))] "TARGET_AVX512F" { char buf[128]; @@ -4152,8 +4174,9 @@ suffix = ""; ops = ""; - /* There is no vandnp[sd] in avx512f. Use vpandn[qd]. */ - if (!TARGET_AVX512DQ) + /* Since there are no vandnp[sd] without AVX512DQ nor vandnph, + use vp[dq]. */ + if (!TARGET_AVX512DQ || mode == V32HFmode) { suffix = GET_MODE_INNER (mode) == DFmode ? "q" : "d"; ops = "p"; @@ -4173,26 +4196,26 @@ (const_string "XI")))]) (define_expand "3" - [(set (match_operand:VF_128_256 0 "register_operand") - (any_logic:VF_128_256 - (match_operand:VF_128_256 1 "vector_operand") - (match_operand:VF_128_256 2 "vector_operand")))] + [(set (match_operand:VFB_128_256 0 "register_operand") + (any_logic:VFB_128_256 + (match_operand:VFB_128_256 1 "vector_operand") + (match_operand:VFB_128_256 2 "vector_operand")))] "TARGET_SSE && " "ix86_fixup_binary_operands_no_copy (, mode, operands);") (define_expand "3" - [(set (match_operand:VF_512 0 "register_operand") - (any_logic:VF_512 - (match_operand:VF_512 1 "nonimmediate_operand") - (match_operand:VF_512 2 "nonimmediate_operand")))] + [(set (match_operand:VFB_512 0 "register_operand") + (any_logic:VFB_512 + (match_operand:VFB_512 1 "nonimmediate_operand") + (match_operand:VFB_512 2 "nonimmediate_operand")))] "TARGET_AVX512F" "ix86_fixup_binary_operands_no_copy (, mode, operands);") (define_insn "*3" - [(set (match_operand:VF_128_256 0 "register_operand" "=x,x,v,v") - (any_logic:VF_128_256 - (match_operand:VF_128_256 1 "vector_operand" "%0,x,v,v") - (match_operand:VF_128_256 2 "vector_operand" "xBm,xm,vm,vm")))] + [(set (match_operand:VFB_128_256 0 "register_operand" "=x,x,v,v") + (any_logic:VFB_128_256 + (match_operand:VFB_128_256 1 "vector_operand" "%0,x,v,v") + (match_operand:VFB_128_256 2 "vector_operand" "xBm,xm,vm,vm")))] "TARGET_SSE && && !(MEM_P (operands[1]) && MEM_P (operands[2]))" { @@ -4216,6 +4239,8 @@ switch (get_attr_mode (insn)) { + case MODE_V16HF: + case MODE_V8HF: case MODE_V8SF: case MODE_V4SF: suffix = "ps"; @@ -4254,10 +4279,10 @@ (const_string "")))]) (define_insn "*3" - [(set (match_operand:VF_512 0 "register_operand" "=v") - (any_logic:VF_512 - (match_operand:VF_512 1 "nonimmediate_operand" "%v") - (match_operand:VF_512 2 "nonimmediate_operand" "vm")))] + [(set (match_operand:VFB_512 0 "register_operand" "=v") + (any_logic:VFB_512 + (match_operand:VFB_512 1 "nonimmediate_operand" "%v") + (match_operand:VFB_512 2 "nonimmediate_operand" "vm")))] "TARGET_AVX512F && !(MEM_P (operands[1]) && MEM_P (operands[2]))" { char buf[128]; @@ -4267,8 +4292,9 @@ suffix = ""; ops = ""; - /* There is no vp[sd] in avx512f. Use vp[dq]. */ - if (!TARGET_AVX512DQ) + /* Since there are no vp[sd] without AVX512DQ nor vph, + use vp[dq]. */ + if (!TARGET_AVX512DQ || mode == V32HFmode) { suffix = GET_MODE_INNER (mode) == DFmode ? "q" : "d"; ops = "p"; @@ -4289,14 +4315,14 @@ (define_expand "copysign3" [(set (match_dup 4) - (and:VF - (not:VF (match_dup 3)) - (match_operand:VF 1 "vector_operand"))) + (and:VFB + (not:VFB (match_dup 3)) + (match_operand:VFB 1 "vector_operand"))) (set (match_dup 5) - (and:VF (match_dup 3) - (match_operand:VF 2 "vector_operand"))) - (set (match_operand:VF 0 "register_operand") - (ior:VF (match_dup 4) (match_dup 5)))] + (and:VFB (match_dup 3) + (match_operand:VFB 2 "vector_operand"))) + (set (match_operand:VFB 0 "register_operand") + (ior:VFB (match_dup 4) (match_dup 5)))] "TARGET_SSE" { operands[3] = ix86_build_signbit_mask (mode, 1, 0); @@ -4307,11 +4333,11 @@ (define_expand "xorsign3" [(set (match_dup 4) - (and:VF (match_dup 3) - (match_operand:VF 2 "vector_operand"))) - (set (match_operand:VF 0 "register_operand") - (xor:VF (match_dup 4) - (match_operand:VF 1 "vector_operand")))] + (and:VFB (match_dup 3) + (match_operand:VFB 2 "vector_operand"))) + (set (match_operand:VFB 0 "register_operand") + (xor:VFB (match_dup 4) + (match_operand:VFB 1 "vector_operand")))] "TARGET_SSE" { operands[3] = ix86_build_signbit_mask (mode, 1, 0);