From 06129071b510e29b437d2644c16ca3505a912d31 Mon Sep 17 00:00:00 2001 From: liuhongt Date: Mon, 20 Oct 2025 01:42:32 -0700 Subject: [PATCH] Simplify avx512 vector integer comparison when 2 operands are known equal For comparison NEQ/LT/NLE, it's simplified to 0. For comparison LE/EQ/NLT, it's simplied to (1u << nelt) - 1 gcc/ChangeLog: PR target/122320 * config/i386/sse.md (*_cmp3_dup_op): New define_insn_and_split. gcc/testsuite/ChangeLog: * gcc.target/i386/pr122320-mask16.c: New test. * gcc.target/i386/pr122320-mask2.c: New test. * gcc.target/i386/pr122320-mask32.c: New test. * gcc.target/i386/pr122320-mask4.c: New test. * gcc.target/i386/pr122320-mask64.c: New test. * gcc.target/i386/pr122320-mask8.c: New test. --- gcc/config/i386/sse.md | 27 ++++++++++++++++ .../gcc.target/i386/pr122320-mask16.c | 32 +++++++++++++++++++ .../gcc.target/i386/pr122320-mask2.c | 32 +++++++++++++++++++ .../gcc.target/i386/pr122320-mask32.c | 32 +++++++++++++++++++ .../gcc.target/i386/pr122320-mask4.c | 32 +++++++++++++++++++ .../gcc.target/i386/pr122320-mask64.c | 32 +++++++++++++++++++ .../gcc.target/i386/pr122320-mask8.c | 32 +++++++++++++++++++ 7 files changed, 219 insertions(+) create mode 100644 gcc/testsuite/gcc.target/i386/pr122320-mask16.c create mode 100644 gcc/testsuite/gcc.target/i386/pr122320-mask2.c create mode 100644 gcc/testsuite/gcc.target/i386/pr122320-mask32.c create mode 100644 gcc/testsuite/gcc.target/i386/pr122320-mask4.c create mode 100644 gcc/testsuite/gcc.target/i386/pr122320-mask64.c create mode 100644 gcc/testsuite/gcc.target/i386/pr122320-mask8.c diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index 8b28c8edb19..4ad17f67b9d 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -4632,6 +4632,33 @@ UNSPEC_PCMP_ITER))] "operands[4] = GEN_INT (INTVAL (operands[3]) ^ 4);") +(define_insn_and_split "*_cmp3_dup_op" + [(set (match_operand: 0 "register_operand") + (unspec: + [(match_operand:VI1248_AVX512VLBW 1 "general_operand") + (match_operand:VI1248_AVX512VLBW 2 "general_operand") + (match_operand:SI 3 "")] + UNSPEC_PCMP_ITER))] + "TARGET_AVX512F && ix86_pre_reload_split () + && rtx_equal_p (operands[1], operands[2])" + "#" + "&& 1" + [(set (match_dup 0) (match_dup 4))] +{ + int cmp_imm = INTVAL (operands[3]); + rtx res = CONST0_RTX (mode); + /* EQ/LE/NLT. */ + if (cmp_imm == 0 || cmp_imm == 2 || cmp_imm == 5) + { + int nelts = GET_MODE_NUNITS (mode); + if (nelts >= 8) + res = CONSTM1_RTX (mode); + else + res = gen_int_mode ((1u << nelts) - 1, QImode); + } + operands[4] = res; +}) + (define_insn "*_eq3_1" [(set (match_operand: 0 "register_operand" "=k,k") (unspec: diff --git a/gcc/testsuite/gcc.target/i386/pr122320-mask16.c b/gcc/testsuite/gcc.target/i386/pr122320-mask16.c new file mode 100644 index 00000000000..2796d748d46 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr122320-mask16.c @@ -0,0 +1,32 @@ +/* { dg-do compile } */ +/* { dg-options "-march=x86-64-v4 -O2" } */ +/* { dg-final { scan-assembler-not "vpcmp" } } */ + +#include + +__mmask16 dumpy_eq (__m512i vx){ + return _mm512_cmp_epi32_mask (vx, vx, 0); +} + +__mmask16 dumpy_lt (__m512i vx) +{ + return _mm512_cmp_epi32_mask (vx, vx, 1); +} + +__mmask16 dumpy_le (__m512i vx){ + return _mm512_cmp_epi32_mask (vx, vx, 2); +} + +__mmask16 dumpy_ne (__m512i vx) +{ + return _mm512_cmp_epi32_mask (vx, vx, 4); +} + +__mmask16 dumpy_nlt (__m512i vx) +{ + return _mm512_cmp_epi32_mask (vx, vx, 5); +} + +__mmask16 dumpy_nle (__m512i vx){ + return _mm512_cmp_epi32_mask (vx, vx, 6); +} diff --git a/gcc/testsuite/gcc.target/i386/pr122320-mask2.c b/gcc/testsuite/gcc.target/i386/pr122320-mask2.c new file mode 100644 index 00000000000..bcbc47aef5c --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr122320-mask2.c @@ -0,0 +1,32 @@ +/* { dg-do compile } */ +/* { dg-options "-march=x86-64-v4 -O2" } */ +/* { dg-final { scan-assembler-not "vpcmp" } } */ + +#include + +__mmask8 dumpy_eq (__m128i vx){ + return _mm_cmp_epi64_mask (vx, vx, 0); +} + +__mmask8 dumpy_lt (__m128i vx) +{ + return _mm_cmp_epi64_mask (vx, vx, 1); +} + +__mmask8 dumpy_le (__m128i vx){ + return _mm_cmp_epi64_mask (vx, vx, 2); +} + +__mmask8 dumpy_ne (__m128i vx) +{ + return _mm_cmp_epi64_mask (vx, vx, 4); +} + +__mmask8 dumpy_nlt (__m128i vx) +{ + return _mm_cmp_epi64_mask (vx, vx, 5); +} + +__mmask8 dumpy_nle (__m128i vx){ + return _mm_cmp_epi64_mask (vx, vx, 6); +} diff --git a/gcc/testsuite/gcc.target/i386/pr122320-mask32.c b/gcc/testsuite/gcc.target/i386/pr122320-mask32.c new file mode 100644 index 00000000000..d75c8b0dfac --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr122320-mask32.c @@ -0,0 +1,32 @@ +/* { dg-do compile } */ +/* { dg-options "-march=x86-64-v4 -O2" } */ +/* { dg-final { scan-assembler-not "vpcmp" } } */ + +#include + +__mmask32 dumpy_eq (__m512i vx){ + return _mm512_cmp_epi16_mask (vx, vx, 0); +} + +__mmask32 dumpy_lt (__m512i vx) +{ + return _mm512_cmp_epi16_mask (vx, vx, 1); +} + +__mmask32 dumpy_le (__m512i vx){ + return _mm512_cmp_epi16_mask (vx, vx, 2); +} + +__mmask32 dumpy_ne (__m512i vx) +{ + return _mm512_cmp_epi16_mask (vx, vx, 4); +} + +__mmask32 dumpy_nlt (__m512i vx) +{ + return _mm512_cmp_epi16_mask (vx, vx, 5); +} + +__mmask32 dumpy_nle (__m512i vx){ + return _mm512_cmp_epi16_mask (vx, vx, 6); +} diff --git a/gcc/testsuite/gcc.target/i386/pr122320-mask4.c b/gcc/testsuite/gcc.target/i386/pr122320-mask4.c new file mode 100644 index 00000000000..7f2ec7d5f22 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr122320-mask4.c @@ -0,0 +1,32 @@ +/* { dg-do compile } */ +/* { dg-options "-march=x86-64-v4 -O2" } */ +/* { dg-final { scan-assembler-not "vpcmp" } } */ + +#include + +__mmask8 dumpy_eq (__m256i vx){ + return _mm256_cmp_epi64_mask (vx, vx, 0); +} + +__mmask8 dumpy_lt (__m256i vx) +{ + return _mm256_cmp_epi64_mask (vx, vx, 1); +} + +__mmask8 dumpy_le (__m256i vx){ + return _mm256_cmp_epi64_mask (vx, vx, 2); +} + +__mmask8 dumpy_ne (__m256i vx) +{ + return _mm256_cmp_epi64_mask (vx, vx, 4); +} + +__mmask8 dumpy_nlt (__m256i vx) +{ + return _mm256_cmp_epi64_mask (vx, vx, 5); +} + +__mmask8 dumpy_nle (__m256i vx){ + return _mm256_cmp_epi64_mask (vx, vx, 6); +} diff --git a/gcc/testsuite/gcc.target/i386/pr122320-mask64.c b/gcc/testsuite/gcc.target/i386/pr122320-mask64.c new file mode 100644 index 00000000000..6a7ce5112c3 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr122320-mask64.c @@ -0,0 +1,32 @@ +/* { dg-do compile } */ +/* { dg-options "-march=x86-64-v4 -O2" } */ +/* { dg-final { scan-assembler-not "vpcmp" } } */ + +#include + +__mmask64 dumpy_eq (__m512i vx){ + return _mm512_cmp_epi8_mask (vx, vx, 0); +} + +__mmask64 dumpy_lt (__m512i vx) +{ + return _mm512_cmp_epi8_mask (vx, vx, 1); +} + +__mmask64 dumpy_le (__m512i vx){ + return _mm512_cmp_epi8_mask (vx, vx, 2); +} + +__mmask64 dumpy_ne (__m512i vx) +{ + return _mm512_cmp_epi8_mask (vx, vx, 4); +} + +__mmask64 dumpy_nlt (__m512i vx) +{ + return _mm512_cmp_epi8_mask (vx, vx, 5); +} + +__mmask64 dumpy_nle (__m512i vx){ + return _mm512_cmp_epi8_mask (vx, vx, 6); +} diff --git a/gcc/testsuite/gcc.target/i386/pr122320-mask8.c b/gcc/testsuite/gcc.target/i386/pr122320-mask8.c new file mode 100644 index 00000000000..e724a68e7eb --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr122320-mask8.c @@ -0,0 +1,32 @@ +/* { dg-do compile } */ +/* { dg-options "-march=x86-64-v4 -O2" } */ +/* { dg-final { scan-assembler-not "vpcmp" } } */ + +#include + +__mmask8 dumpy_eq (__m512i vx){ + return _mm512_cmp_epi64_mask (vx, vx, 0); +} + +__mmask8 dumpy_lt (__m512i vx) +{ + return _mm512_cmp_epi64_mask (vx, vx, 1); +} + +__mmask8 dumpy_le (__m512i vx){ + return _mm512_cmp_epi64_mask (vx, vx, 2); +} + +__mmask8 dumpy_ne (__m512i vx) +{ + return _mm512_cmp_epi64_mask (vx, vx, 4); +} + +__mmask8 dumpy_nlt (__m512i vx) +{ + return _mm512_cmp_epi64_mask (vx, vx, 5); +} + +__mmask8 dumpy_nle (__m512i vx){ + return _mm512_cmp_epi64_mask (vx, vx, 6); +} -- 2.47.3