From: liuhongt Date: Wed, 19 Jun 2024 05:12:00 +0000 (+0800) Subject: Add more splitter for mskmov with avx512 comparison. X-Git-Tag: basepoints/gcc-16~7814 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=3cb204046c0db899750aee9480af4f1953a40ac3;p=thirdparty%2Fgcc.git Add more splitter for mskmov with avx512 comparison. gcc/ChangeLog: PR target/115517 * config/i386/sse.md (*_movmsk_lt_avx512): New define_insn_and_split. (*_movmsk_ext_lt_avx512): Ditto. (*_pmovmskb_lt_avx512): Ditto. (*_pmovmskb_zext_lt_avx512): Ditto. (*sse2_pmovmskb_ext_lt_avx512): Ditto. (*pmovsk_kmask_v16qi_avx512): Ditto. (*pmovsk_mask_v32qi_avx512): Ditto. (*pmovsk_mask_cmp__avx512): Ditto. (*pmovsk_ptest__avx512): Ditto. --- diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index 694b4b8f07c..3ffa1881c83 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -10071,24 +10071,6 @@ [(set_attr "prefix" "evex") (set_attr "mode" "")]) -(define_insn_and_split "*_cvtmask2_not" - [(set (match_operand:VI12_AVX512VL 0 "register_operand") - (vec_merge:VI12_AVX512VL - (match_operand:VI12_AVX512VL 2 "const0_operand") - (match_operand:VI12_AVX512VL 3 "vector_all_ones_operand") - (match_operand: 1 "register_operand")))] - "TARGET_AVX512BW && ix86_pre_reload_split ()" - "#" - "&& 1" - [(set (match_dup 4) - (not: (match_dup 1))) - (set (match_dup 0) - (vec_merge:VI12_AVX512VL - (match_dup 3) - (match_dup 2) - (match_dup 4)))] - "operands[4] = gen_reg_rtx (mode);") - (define_expand "_cvtmask2" [(set (match_operand:VI48_AVX512VL 0 "register_operand") (vec_merge:VI48_AVX512VL @@ -10128,10 +10110,10 @@ (set_attr "mode" "")]) (define_insn_and_split "*_cvtmask2_not" - [(set (match_operand:VI48_AVX512VL 0 "register_operand") - (vec_merge:VI48_AVX512VL - (match_operand:VI48_AVX512VL 2 "const0_operand") - (match_operand:VI48_AVX512VL 3 "vector_all_ones_operand") + [(set (match_operand:VI1248_AVX512VLBW 0 "register_operand") + (vec_merge:VI1248_AVX512VLBW + (match_operand:VI1248_AVX512VLBW 2 "const0_operand") + (match_operand:VI1248_AVX512VLBW 3 "vector_all_ones_operand") (match_operand: 1 "register_operand")))] "TARGET_AVX512F && ix86_pre_reload_split ()" "#" @@ -10139,7 +10121,7 @@ [(set (match_dup 4) (not: (match_dup 1))) (set (match_dup 0) - (vec_merge:VI48_AVX512VL + (vec_merge:VI1248_AVX512VLBW (match_dup 3) (match_dup 2) (match_dup 4)))] @@ -21816,6 +21798,30 @@ (set_attr "prefix" "maybe_vex") (set_attr "mode" "")]) +(define_insn_and_split "*_movmsk_lt_avx512" + [(set (match_operand:SI 0 "register_operand" "=r,jr") + (unspec:SI + [(subreg:VF_128_256 + (vec_merge: + (match_operand: 3 "vector_all_ones_operand") + (match_operand: 4 "const0_operand") + (unspec: + [(match_operand: 1 "register_operand" "x,x") + (match_operand: 2 "const0_operand") + (const_int 1)] + UNSPEC_PCMP)) 0)] + UNSPEC_MOVMSK))] + "TARGET_SSE" + "#" + "&& reload_completed" + [(set (match_dup 0) + (unspec:SI [(match_dup 1)] UNSPEC_MOVMSK))] + "operands[1] = gen_lowpart (mode, operands[1]);" + [(set_attr "isa" "noavx,avx") + (set_attr "type" "ssemov") + (set_attr "prefix" "maybe_vex") + (set_attr "mode" "")]) + (define_insn_and_split "*_movmsk_ext_lt" [(set (match_operand:DI 0 "register_operand" "=r,jr") (any_extend:DI @@ -21835,6 +21841,31 @@ (set_attr "prefix" "maybe_vex") (set_attr "mode" "")]) +(define_insn_and_split "*_movmsk_ext_lt_avx512" + [(set (match_operand:DI 0 "register_operand" "=r,jr") + (any_extend:DI + (unspec:SI + [(subreg:VF_128_256 + (vec_merge: + (match_operand: 3 "vector_all_ones_operand") + (match_operand: 4 "const0_operand") + (unspec: + [(match_operand: 1 "register_operand" "x,x") + (match_operand: 2 "const0_operand") + (const_int 1)] + UNSPEC_PCMP)) 0)] + UNSPEC_MOVMSK)))] + "TARGET_64BIT && TARGET_SSE" + "#" + "&& reload_completed" + [(set (match_dup 0) + (any_extend:DI (unspec:SI [(match_dup 1)] UNSPEC_MOVMSK)))] + "operands[1] = gen_lowpart (mode, operands[1]);" + [(set_attr "isa" "noavx,avx") + (set_attr "type" "ssemov") + (set_attr "prefix" "maybe_vex") + (set_attr "mode" "")]) + (define_insn_and_split "*_movmsk_shift" [(set (match_operand:SI 0 "register_operand" "=r,jr") (unspec:SI @@ -22024,6 +22055,34 @@ (set_attr "prefix" "maybe_vex") (set_attr "mode" "SI")]) +(define_insn_and_split "*_pmovmskb_lt_avx512" + [(set (match_operand:SI 0 "register_operand" "=r,jr") + (unspec:SI + [(vec_merge:VI1_AVX2 + (match_operand:VI1_AVX2 3 "vector_all_ones_operand") + (match_operand:VI1_AVX2 4 "const0_operand") + (unspec: + [(match_operand:VI1_AVX2 1 "register_operand" "x,x") + (match_operand:VI1_AVX2 2 "const0_operand") + (const_int 1)] + UNSPEC_PCMP))] + UNSPEC_MOVMSK))] + "TARGET_SSE2" + "#" + "&& 1" + [(set (match_dup 0) + (unspec:SI [(match_dup 1)] UNSPEC_MOVMSK))] + "" + [(set_attr "isa" "noavx,avx") + (set_attr "type" "ssemov") + (set (attr "prefix_data16") + (if_then_else + (match_test "TARGET_AVX") + (const_string "*") + (const_string "1"))) + (set_attr "prefix" "maybe_vex") + (set_attr "mode" "SI")]) + (define_insn_and_split "*_pmovmskb_zext_lt" [(set (match_operand:DI 0 "register_operand" "=r,jr") (zero_extend:DI @@ -22047,6 +22106,35 @@ (set_attr "prefix" "maybe_vex") (set_attr "mode" "SI")]) +(define_insn_and_split "*_pmovmskb_zext_lt_avx512" + [(set (match_operand:DI 0 "register_operand" "=r,jr") + (zero_extend:DI + (unspec:SI + [(vec_merge:VI1_AVX2 + (match_operand:VI1_AVX2 3 "vector_all_ones_operand") + (match_operand:VI1_AVX2 4 "const0_operand") + (unspec: + [(match_operand:VI1_AVX2 1 "register_operand" "x,x") + (match_operand:VI1_AVX2 2 "const0_operand") + (const_int 1)] + UNSPEC_PCMP))] + UNSPEC_MOVMSK)))] + "TARGET_64BIT && TARGET_SSE2" + "#" + "&& 1" + [(set (match_dup 0) + (zero_extend:DI (unspec:SI [(match_dup 1)] UNSPEC_MOVMSK)))] + "" + [(set_attr "isa" "noavx,avx") + (set_attr "type" "ssemov") + (set (attr "prefix_data16") + (if_then_else + (match_test "TARGET_AVX") + (const_string "*") + (const_string "1"))) + (set_attr "prefix" "maybe_vex") + (set_attr "mode" "SI")]) + (define_insn_and_split "*sse2_pmovmskb_ext_lt" [(set (match_operand:DI 0 "register_operand" "=r,jr") (sign_extend:DI @@ -22070,6 +22158,63 @@ (set_attr "prefix" "maybe_vex") (set_attr "mode" "SI")]) +(define_insn_and_split "*sse2_pmovmskb_ext_lt_avx512" + [(set (match_operand:DI 0 "register_operand" "=r,jr") + (sign_extend:DI + (unspec:SI + [(vec_merge:VI1_AVX2 + (match_operand:VI1_AVX2 3 "vector_all_ones_operand") + (match_operand:VI1_AVX2 4 "const0_operand") + (unspec: + [(match_operand:VI1_AVX2 1 "register_operand" "x,x") + (match_operand:VI1_AVX2 2 "const0_operand") + (const_int 1)] + UNSPEC_PCMP))] + UNSPEC_MOVMSK)))] + "TARGET_64BIT && TARGET_SSE2" + "#" + "&& 1" + [(set (match_dup 0) + (sign_extend:DI (unspec:SI [(match_dup 1)] UNSPEC_MOVMSK)))] + "" + [(set_attr "isa" "noavx,avx") + (set_attr "type" "ssemov") + (set (attr "prefix_data16") + (if_then_else + (match_test "TARGET_AVX") + (const_string "*") + (const_string "1"))) + (set_attr "prefix" "maybe_vex") + (set_attr "mode" "SI")]) + +(define_insn_and_split "*pmovsk_kmask_v16qi_avx512" + [(set (match_operand:SI 0 "register_operand") + (unspec:SI + [(vec_merge:V16QI + (match_operand:V16QI 2 "vector_all_ones_operand") + (match_operand:V16QI 3 "const0_operand") + (match_operand:HI 1 "register_operand"))] + UNSPEC_MOVMSK))] + "TARGET_SSE2 && ix86_pre_reload_split ()" + "#" + "&& 1" + [(set (match_dup 0) + (zero_extend:SI (match_dup 1)))]) + +(define_insn_and_split "*pmovsk_mask_v32qi_avx512" + [(set (match_operand:SI 0 "register_operand") + (unspec:SI + [(vec_merge:V32QI + (match_operand:V32QI 2 "vector_all_ones_operand") + (match_operand:V32QI 3 "const0_operand") + (match_operand:SI 1 "register_operand"))] + UNSPEC_MOVMSK))] + "TARGET_SSE2 && ix86_pre_reload_split ()" + "#" + "&& 1" + [(set (match_dup 0) + (match_dup 1))]) + ;; Optimize pxor/pcmpeqb/pmovmskb/cmp 0xffff to ptest. (define_mode_attr vi1avx2const [(V32QI "0xffffffff") (V16QI "0xffff")]) @@ -22088,6 +22233,47 @@ (match_dup 0)] UNSPEC_PTEST))]) +(define_insn_and_split "*pmovsk_mask_cmp__avx512" + [(set (reg:CCZ FLAGS_REG) + (compare:CCZ + (unspec:SI + [(vec_merge:VI1_AVX2 + (match_operand:VI1_AVX2 0 "vector_all_ones_operand") + (match_operand:VI1_AVX2 3 "const0_operand") + (match_operand: 1 "register_operand"))] + UNSPEC_MOVMSK) + (match_operand 2 "const_int_operand")))] + "TARGET_AVX512VL && UINTVAL (operands[2]) <= " + "#" + "&& 1" + [(set (reg:CCZ FLAGS_REG) + (compare:CCZ + (match_dup 1) + (match_dup 2)))] + "operands[2] = gen_int_mode (UINTVAL (operands[2]), mode);") + +(define_insn_and_split "*pmovsk_ptest__avx512" + [(set (reg:CCZ FLAGS_REG) + (compare:CCZ + (unspec:SI + [(vec_merge:VI1_AVX2 + (match_operand:VI1_AVX2 3 "vector_all_ones_operand") + (match_operand:VI1_AVX2 4 "const0_operand") + (unspec: + [(match_operand:VI1_AVX2 0 "vector_operand") + (match_operand:VI1_AVX2 1 "const0_operand") + (const_int 0)] + UNSPEC_PCMP))] + UNSPEC_MOVMSK) + (match_operand 2 "const_int_operand")))] + "TARGET_AVX512VL && (INTVAL (operands[2]) == (int) ())" + "#" + "&& 1" + [(set (reg:CCZ FLAGS_REG) + (unspec:CCZ [(match_dup 0) + (match_dup 0)] + UNSPEC_PTEST))]) + (define_expand "sse2_maskmovdqu" [(set (match_operand:V16QI 0 "memory_operand") (unspec:V16QI [(match_operand:V16QI 1 "register_operand")