From: liuhongt Date: Wed, 15 Oct 2025 09:00:30 +0000 (-0700) Subject: Support reduc_sbool_and_scal_m for V{QI,SI,DI}mode. X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=7821a827ab8701166d4723fd68636ece3e82206e;p=thirdparty%2Fgcc.git Support reduc_sbool_and_scal_m for V{QI,SI,DI}mode. gcc/ChangeLog: PR target/101639 * config/i386/sse.md (VI_AVX): New mode iterator. (VI_AVX_CMP): Ditto. (ssebytemode): Add V16HI, V32QI, V16QI. (reduc_sbool_and_scal_): New expander. (reduc_sbool_ior_scal_): Ditto. (reduc_sbool_xor_scal_): Ditto. (*eq3_2_negate): New pre_reload splitter. (*ptest_ccz): Ditto. gcc/testsuite/ChangeLog: * gcc.target/i386/pr101639_reduc_mask_vdi.c: New test. * gcc.target/i386/pr101639_reduc_mask_vqi.c: New test. * gcc.target/i386/pr101639_reduc_mask_vsi.c: New test. * gcc.target/i386/pr101639_reduc_mask_ior_vqi.c: New test. * gcc.target/i386/pr101639_reduc_mask_and_vqi.c: New test. --- diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index b1918c46286..5eba99225c4 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -569,6 +569,18 @@ (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX2") V4SI (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX2") V2DI]) +(define_mode_iterator VI_AVX + [(V32QI "TARGET_AVX") V16QI + (V16HI "TARGET_AVX") V8HI + (V8SI "TARGET_AVX") V4SI + (V4DI "TARGET_AVX") V2DI]) + +(define_mode_iterator VI_AVX2_CMP + [(V32QI "TARGET_AVX2") V16QI + (V16HI "TARGET_AVX2") V8HI + (V8SI "TARGET_AVX2") V4SI + (V4DI "TARGET_AVX2") V2DI]) + (define_mode_iterator VI_AVX_AVX512F [(V64QI "TARGET_AVX512F") (V32QI "TARGET_AVX") V16QI (V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX") V8HI @@ -896,7 +908,8 @@ (define_mode_attr ssebytemode [(V8DI "V64QI") (V4DI "V32QI") (V2DI "V16QI") (V16SI "V64QI") (V8SI "V32QI") (V4SI "V16QI") - (V8HI "V16QI")]) + (V16HI "V32QI") (V8HI "V16QI") + (V32QI "V32QI") (V16QI "V16QI")]) (define_mode_attr sseintconvert [(V32HI "w") (V16HI "w") (V8HI "w") @@ -4095,6 +4108,88 @@ DONE; }) +(define_expand "reduc_sbool_and_scal_" + [(match_operand:QI 0 "register_operand") + (match_operand:VI_AVX 1 "register_operand")] + "TARGET_SSE4_1" +{ + rtx flags = gen_rtx_REG (CCZmode, FLAGS_REG); + rtx op2, tmp; + if (TARGET_AVX2 || != 32) + { + op2 = force_reg (mode, CONST0_RTX (mode)); + tmp = gen_reg_rtx (mode); + rtx op1 = gen_rtx_EQ (mode, operands[1], op2); + emit_insn (gen_vec_cmp (tmp, op1, operands[1], op2)); + } + else + { + op2 = force_reg (mode, CONSTM1_RTX (mode)); + tmp = gen_reg_rtx (mode); + rtx ops[3] = { tmp, operands[1], op2 }; + ix86_expand_vector_logical_operator (XOR, mode, ops); + } + + tmp = gen_rtx_UNSPEC (CCZmode, gen_rtvec(2, tmp, tmp), UNSPEC_PTEST); + emit_insn (gen_rtx_SET (flags, tmp)); + rtx ret = gen_rtx_fmt_ee (EQ, VOIDmode, flags, const0_rtx); + PUT_MODE (ret, QImode); + emit_insn (gen_rtx_SET (operands[0], ret)); + DONE; + +}) + +(define_expand "reduc_sbool_ior_scal_" + [(match_operand:QI 0 "register_operand") + (match_operand:VI_AVX 1 "register_operand")] + "TARGET_SSE4_1" +{ + rtx flags = gen_rtx_REG (CCZmode, FLAGS_REG); + rtx tmp = gen_rtx_UNSPEC (CCZmode, gen_rtvec(2, operands[1], operands[1]), UNSPEC_PTEST); + emit_insn (gen_rtx_SET (flags, tmp)); + rtx ret = gen_rtx_fmt_ee (NE, VOIDmode, flags, const0_rtx); + PUT_MODE (ret, QImode); + emit_insn (gen_rtx_SET (operands[0], ret)); + DONE; +}) + +(define_expand "reduc_sbool_xor_scal_" + [(match_operand:QI 0 "register_operand") + (match_operand:VI1_AVX2 1 "register_operand")] + "TARGET_SSE2 && TARGET_POPCNT" +{ + rtx popcnt1 = gen_reg_rtx (SImode); + emit_insn (gen__pmovmskb (popcnt1,operands[1])); + + emit_insn (gen_popcountsi2 (popcnt1, popcnt1)); + emit_insn (gen_andsi3 (popcnt1, popcnt1, GEN_INT (0x1))); + + emit_move_insn (operands[0], gen_lowpart (QImode, popcnt1)); + DONE; +}) + +(define_mode_attr ssefltvecmode + [(V2DI "V2DF") (V4DI "V4DF") (V4SI "V4SF") (V8SI "V8SF")]) + +(define_expand "reduc_sbool_xor_scal_" + [(match_operand:QI 0 "register_operand") + (match_operand:VI48_AVX 1 "register_operand")] + "TARGET_SSE2 && TARGET_POPCNT" +{ + rtx popcnt1 = gen_reg_rtx (SImode); + rtx tmp = gen_rtx_UNSPEC (SImode, gen_rtvec(1, + gen_lowpart (mode, + operands[1])), + UNSPEC_MOVMSK); + emit_insn (gen_rtx_SET (popcnt1, tmp)); + + emit_insn (gen_popcountsi2 (popcnt1, popcnt1)); + emit_insn (gen_andsi3 (popcnt1, popcnt1, GEN_INT (0x1))); + + emit_move_insn (operands[0], gen_lowpart (QImode, popcnt1)); + DONE; +}) + (define_insn "reducep" [(set (match_operand:VFH_AVX512VL 0 "register_operand" "=v") (unspec:VFH_AVX512VL @@ -18084,6 +18179,24 @@ (set_attr "prefix" "vex") (set_attr "mode" "OI")]) +(define_insn_and_split "*eq3_2_negate" + [(set (match_operand:VI_AVX2_CMP 0 "register_operand") + (eq:VI_AVX2_CMP + (eq:VI_AVX2_CMP + (eq: VI_AVX2_CMP + (match_operand:VI_AVX2_CMP 1 "nonimmediate_operand") + (match_operand:VI_AVX2_CMP 2 "general_operand")) + (match_operand:VI_AVX2_CMP 3 "const0_operand")) + (match_operand:VI_AVX2_CMP 4 "const0_operand")))] + "TARGET_SSE4_1 && ix86_pre_reload_split ()" + "#" + "&& 1" + [(set (match_dup 0) + (eq:VI_AVX2_CMP (match_dup 1) + (match_dup 5)))] + "operands[5] = force_reg (mode, operands[2]);") + + (define_insn_and_split "*avx2_pcmp3_1" [(set (match_operand:VI_128_256 0 "register_operand") (vec_merge:VI_128_256 @@ -23774,9 +23887,6 @@ (set_attr "btver2_decode" "vector,vector,vector") (set_attr "mode" "")]) -(define_mode_attr ssefltvecmode - [(V2DI "V2DF") (V4DI "V4DF") (V4SI "V4SF") (V8SI "V8SF")]) - (define_insn_and_split "*_blendv_ltint" [(set (match_operand: 0 "register_operand" "=Yr,*x,x") (unspec: @@ -25591,6 +25701,36 @@ (match_dup 0) (pc)))]) + +;; (unspec:ccz [(eq (eq op0 const0) const0)] unspec_ptest) +;; is equal to (unspec:ccz [op0 op0] unspec_ptest). +(define_insn_and_split "*ptest_ccz" + [(set (reg:CCZ FLAGS_REG) + (unspec:CCZ + [(eq:VI_AVX + (eq:VI_AVX + (match_operand:VI_AVX 0 "vector_operand") + (match_operand:VI_AVX 1 "const0_operand")) + (match_operand:VI_AVX 2 "const0_operand")) + (eq:VI_AVX + (eq:VI_AVX (match_dup 0) (match_dup 1)) + (match_dup 2))] + UNSPEC_PTEST))] + "TARGET_SSE4_1 + && ix86_pre_reload_split ()" + "#" + "&& 1" + [(set (reg:CCZ FLAGS_REG) + (unspec:CCZ + [(match_dup 3) (match_dup 3)] + UNSPEC_PTEST))] +{ + if (MEM_P (operands[0])) + operands[3] = force_reg (mode, operands[0]); + else + operands[3] = operands[0]; +}) + (define_expand "nearbyint2" [(set (match_operand:VFH 0 "register_operand") (unspec:VFH diff --git a/gcc/testsuite/gcc.target/i386/pr101639_reduc_mask_and_vqi.c b/gcc/testsuite/gcc.target/i386/pr101639_reduc_mask_and_vqi.c new file mode 100644 index 00000000000..23fc67e8ae9 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr101639_reduc_mask_and_vqi.c @@ -0,0 +1,14 @@ +/* { dg-do compile } */ +/* { dg-options "-march=x86-64-v3 -O2" } */ +/* { dg-final { scan-assembler-times "vptest" 1 } } */ +/* { dg-final { scan-assembler-times "sete" 1 } } */ +/* { dg-final { scan-assembler-times "vpcmpeq" 1 } } */ + +bool f2(char * p, long n) +{ + bool r = true; + for(long i = 0; i < 32; ++i) + r &= (p[i] != 0); + return r; +} + diff --git a/gcc/testsuite/gcc.target/i386/pr101639_reduc_mask_ior_vqi.c b/gcc/testsuite/gcc.target/i386/pr101639_reduc_mask_ior_vqi.c new file mode 100644 index 00000000000..e1deb2fe21d --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr101639_reduc_mask_ior_vqi.c @@ -0,0 +1,14 @@ +/* { dg-do compile } */ +/* { dg-options "-march=x86-64-v3 -O2" } */ +/* { dg-final { scan-assembler-times "vptest" 1 } } */ +/* { dg-final { scan-assembler-times "setne" 1 } } */ +/* { dg-final { scan-assembler-not "vpcmpeq" } } */ + +bool f2(char * p, long n) +{ + bool r = false; + for(long i = 0; i < 32; ++i) + r |= (p[i] != 0); + return r; +} + diff --git a/gcc/testsuite/gcc.target/i386/pr101639_reduc_mask_vdi.c b/gcc/testsuite/gcc.target/i386/pr101639_reduc_mask_vdi.c new file mode 100644 index 00000000000..ee526973006 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr101639_reduc_mask_vdi.c @@ -0,0 +1,31 @@ +/* { dg-do compile } */ +/* { dg-options "-march=x86-64-v3 -O2" } */ +/* { dg-final { scan-assembler-times "vptest" 2 } } */ +/* { dg-final { scan-assembler-times "sete" 1 } } */ +/* { dg-final { scan-assembler-times "setne" 1 } } */ +/* { dg-final { scan-assembler-times "popcnt" 1 } } */ +/* { dg-final { scan-assembler-times "vmovmskpd" 1 } } */ + +bool f(long long *p, long n) +{ + bool r = true; + for(long i = 0; i < 4; ++i) + r &= (p[i] != 0); + return r; +} + +bool f2(long long *p, long n) +{ + bool r = false; + for(long i = 0; i < 4; ++i) + r |= (p[i] != 0); + return r; +} + +bool f3(long long *p, long n) +{ + bool r = false; + for(long i = 0; i < 4; ++i) + r ^= (p[i] != 0); + return r; +} diff --git a/gcc/testsuite/gcc.target/i386/pr101639_reduc_mask_vqi.c b/gcc/testsuite/gcc.target/i386/pr101639_reduc_mask_vqi.c new file mode 100644 index 00000000000..1707f15ce58 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr101639_reduc_mask_vqi.c @@ -0,0 +1,31 @@ +/* { dg-do compile } */ +/* { dg-options "-march=x86-64-v3 -O2" } */ +/* { dg-final { scan-assembler-times "vptest" 2 } } */ +/* { dg-final { scan-assembler-times "sete" 1 } } */ +/* { dg-final { scan-assembler-times "setne" 1 } } */ +/* { dg-final { scan-assembler-times "popcnt" 1 } } */ +/* { dg-final { scan-assembler-times "vpmovmskb" 1 } } */ + +bool f(char * p, long n) +{ + bool r = true; + for(long i = 0; i < 32; ++i) + r &= (p[i] != 0); + return r; +} + +bool f2(char * p, long n) +{ + bool r = false; + for(long i = 0; i < 32; ++i) + r |= (p[i] != 0); + return r; +} + +bool f3(char * p, long n) +{ + bool r = false; + for(long i = 0; i < 32; ++i) + r ^= (p[i] != 0); + return r; +} diff --git a/gcc/testsuite/gcc.target/i386/pr101639_reduc_mask_vsi.c b/gcc/testsuite/gcc.target/i386/pr101639_reduc_mask_vsi.c new file mode 100644 index 00000000000..2d4a39f71c8 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr101639_reduc_mask_vsi.c @@ -0,0 +1,31 @@ +/* { dg-do compile } */ +/* { dg-options "-march=x86-64-v3 -O2" } */ +/* { dg-final { scan-assembler-times "vptest" 2 } } */ +/* { dg-final { scan-assembler-times "sete" 1 } } */ +/* { dg-final { scan-assembler-times "setne" 1 } } */ +/* { dg-final { scan-assembler-times "popcnt" 1 } } */ +/* { dg-final { scan-assembler-times "vmovmskps" 1 } } */ + +bool f(int * p, long n) +{ + bool r = true; + for(long i = 0; i < 8; ++i) + r &= (p[i] != 0); + return r; +} + +bool f2(int * p, long n) +{ + bool r = false; + for(long i = 0; i < 8; ++i) + r |= (p[i] != 0); + return r; +} + +bool f3(int * p, long n) +{ + bool r = false; + for(long i = 0; i < 8; ++i) + r ^= (p[i] != 0); + return r; +}