]> git.ipfire.org Git - thirdparty/gcc.git/commitdiff
Also handle vptestnm + and15/and3 to just vptestnm.
authorliuhongt <hongtao.liu@intel.com>
Fri, 14 Nov 2025 03:59:08 +0000 (19:59 -0800)
committerliuhongt <hongtao.liu@intel.com>
Tue, 18 Nov 2025 02:44:12 +0000 (18:44 -0800)
r16-1298-gcdfa5fe03512f7 optimizes vpcmp + and15/and3 to vpcmp when VF
is 2 or 4. vptestnm is a variant of vpcmpeq which accepts
nonimm_or_0_operand. The patch handles that.

gcc/ChangeLog:

PR target/103750
* config/i386/sse.md (*<avx512>_eq<mode>3_and15): New
define_insn.
(*avx512vl_eqv2di_and3): Ditto.
* config/i386/i386.md (*ior<mode>_ccz_1): Fix the typo in the
comments above.

gcc/testsuite/ChangeLog:

* gcc.target/i386/avx512vl-pr103750-2.c: New test.

gcc/config/i386/i386.md
gcc/config/i386/sse.md
gcc/testsuite/gcc.target/i386/avx512vl-pr103750-2.c [new file with mode: 0644]

index b925a037b2df3d96438db2b003ca6228257ddb8b..6af7dcfcdd3232852ce941a36178c976b90e8619 100644 (file)
    (set_attr "isa" "*,apx_ndd")
    (set_attr "mode" "SI")])
 
-;; It must be put before *<code><mode>_3, the blow one.
+;; It must be put before *<code><mode>_3, the one below.
 (define_insn "*ior<mode>_ccz_1"
   [(set (reg:CCZ FLAGS_REG)
        (compare:CCZ
index 7d91585b05d13ac2089093409b0ddad6edd0f3ca..8b90845260a397bba47e5b10cf405d97785d7c14 100644 (file)
           UNSPEC_PCMP))]
   "operands[4] = GEN_INT (INTVAL (operands[3]) ^ 4);")
 
+(define_int_iterator UNSPEC_PCMP_ITER
+  [UNSPEC_PCMP UNSPEC_UNSIGNED_PCMP])
+
 (define_insn "*<avx512>_cmp<mode>3_and15"
   [(set (match_operand:QI 0 "register_operand" "=k")
        (and:QI
    (set_attr "prefix" "evex")
    (set_attr "mode" "<sseinsnmode>")])
 
+(define_insn "*<avx512>_eq<mode>3_and15"
+  [(set (match_operand:QI 0 "register_operand" "=k, k")
+       (and:QI
+         (unspec:QI
+           [(match_operand:VI48_AVX512VL_4 1 "nonimm_or_0_operand" "%v, v")
+            (match_operand:VI48_AVX512VL_4 2 "nonimm_or_0_operand" "vm, C")
+            (const_int 0)]
+           UNSPEC_PCMP_ITER)
+         (const_int 15)))]
+  "TARGET_AVX512F && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
+  "@
+   vpcmpeq<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}
+   vptestnm<ssemodesuffix>\t{%1, %1, %0|%0, %1, %1}"
+  [(set_attr "type" "ssecmp")
+   (set_attr "prefix" "evex")
+   (set_attr "mode" "<sseinsnmode>")])
+
 (define_insn "*<avx512>_cmp<mode>3_and3"
   [(set (match_operand:QI 0 "register_operand" "=k")
        (and:QI
    (set_attr "prefix" "evex")
    (set_attr "mode" "TI")])
 
+(define_insn "*avx512vl_eqv2di_and3"
+  [(set (match_operand:QI 0 "register_operand" "=k, k")
+       (and:QI
+         (unspec:QI
+           [(match_operand:V2DI 1 "nonimm_or_0_operand" "%v, v")
+            (match_operand:V2DI 2 "nonimm_or_0_operand" "vm, C")
+            (const_int 0)]
+           UNSPEC_PCMP_ITER)
+         (const_int 3)))]
+  "TARGET_AVX512F && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
+  "@
+   vpcmpeqq\t{%2, %1, %0|%0, %1, %2}
+   vptestnmq\t{%1, %1, %0|%0, %1, %1}"
+  [(set_attr "type" "ssecmp")
+   (set_attr "prefix" "evex")
+   (set_attr "mode" "TI")])
+
 (define_insn "<avx512>_cmp<mode>3<mask_scalar_merge_name>"
   [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k")
        (unspec:<avx512fmaskmode>
    (set_attr "prefix" "evex")
    (set_attr "mode" "<VI12_AVX512VL:sseinsnmode>")])
 
-(define_int_iterator UNSPEC_PCMP_ITER
-  [UNSPEC_PCMP UNSPEC_UNSIGNED_PCMP])
-
 (define_insn_and_split "*<avx512>_cmp<mode>3"
   [(set (match_operand:<avx512fmaskmode> 0 "register_operand")
        (not:<avx512fmaskmode>
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-pr103750-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-pr103750-2.c
new file mode 100644 (file)
index 0000000..7c6e77b
--- /dev/null
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-options "-march=x86-64-v4 -mprefer-vector-width=128 -O3" } */
+/* { dg-final { scan-assembler "kortest" } } */
+/* { dg-final { scan-assembler-not "kmov" } } */
+
+int
+foo (int *__restrict a)
+{
+  for (int i = 0; i != 100; i++)
+    if (a[i] == 0)
+      return 1;
+  return 0;
+}