]> git.ipfire.org Git - thirdparty/gcc.git/commitdiff
rs6000: Fix vector parity support [PR108699]
authorKewen Lin <linkw@linux.ibm.com>
Tue, 4 Apr 2023 02:47:44 +0000 (21:47 -0500)
committerKewen Lin <linkw@linux.ibm.com>
Tue, 4 Apr 2023 05:11:54 +0000 (00:11 -0500)
The failures on the original failed case builtin-bitops-1.c
and the associated test case pr108699.c here show that the
current support of parity vector mode is wrong on Power.
The hardware insns vprtyb[wdq] which operate on the least
significant bit of each byte per element, they doesn't match
what RTL opcode parity needs, but the current implementation
expands it with them wrongly.

This patch is to fix the handling with one more insn vpopcntb.

PR target/108699

gcc/ChangeLog:

* config/rs6000/altivec.md (*p9v_parity<mode>2): Rename to ...
(rs6000_vprtyb<mode>2): ... this.
* config/rs6000/rs6000-builtins.def (VPRTYBD): Replace parityv2di2 with
rs6000_vprtybv2di2.
(VPRTYBW): Replace parityv4si2 with rs6000_vprtybv4si2.
(VPRTYBQ): Replace parityv1ti2 with rs6000_vprtybv1ti2.
* config/rs6000/vector.md (parity<mode>2 with VEC_IP): Expand with
popcountv16qi2 and the corresponding rs6000_vprtyb<mode>2.

gcc/testsuite/ChangeLog:

* gcc.target/powerpc/p9-vparity.c: Add scan-assembler-not for vpopcntb
to distinguish parity byte from parity.
* gcc.target/powerpc/pr108699.c: New test.

gcc/config/rs6000/altivec.md
gcc/config/rs6000/rs6000-builtins.def
gcc/config/rs6000/vector.md
gcc/testsuite/gcc.target/powerpc/p9-vparity.c
gcc/testsuite/gcc.target/powerpc/pr108699.c [new file with mode: 0644]

index 30606b8ab21294428321414483e6f8e600d32a23..49b0c964f4da76120b35380ad8fbf77c61747125 100644 (file)
   [(set_attr "type" "vecsimple")])
 
 ;; Vector parity
-(define_insn "*p9v_parity<mode>2"
-  [(set (match_operand:VParity 0 "register_operand" "=v")
-        (parity:VParity (match_operand:VParity 1 "register_operand" "v")))]
+(define_insn "rs6000_vprtyb<mode>2"
+  [(set (match_operand:VEC_IP 0 "register_operand" "=v")
+        (unspec:VEC_IP
+          [(match_operand:VEC_IP 1 "register_operand" "v")]
+          UNSPEC_PARITY))]
   "TARGET_P9_VECTOR"
   "vprtyb<wd> %0,%1"
   [(set_attr "type" "vecsimple")])
index e0d9f5adc97d58b3339e1ea642c80af0b72241c3..03fb194b15108c5cfd7d1c1b4219f2fc34386ff5 100644 (file)
     VMSUMUDM altivec_vmsumudm {}
 
   const vsll __builtin_altivec_vprtybd (vsll);
-    VPRTYBD parityv2di2 {}
+    VPRTYBD rs6000_vprtybv2di2 {}
 
   const vsq __builtin_altivec_vprtybq (vsq);
-    VPRTYBQ parityv1ti2 {}
+    VPRTYBQ rs6000_vprtybv1ti2 {}
 
   const vsi __builtin_altivec_vprtybw (vsi);
-    VPRTYBW parityv4si2 {}
+    VPRTYBW rs6000_vprtybv4si2 {}
 
   const vsll __builtin_altivec_vrldmi (vsll, vsll, vsll);
     VRLDMI altivec_vrldmi {}
index 12fd5f976ed20846885639daae052d39dce54f01..1ae04c8e0a801bfe8ab7caeff9b465467e8ae62b 100644 (file)
 (define_expand "parity<mode>2"
   [(set (match_operand:VEC_IP 0 "register_operand")
        (parity:VEC_IP (match_operand:VEC_IP 1 "register_operand")))]
-  "TARGET_P9_VECTOR")
+  "TARGET_P9_VECTOR"
+{
+  rtx op1 = gen_lowpart (V16QImode, operands[1]);
+  rtx res = gen_reg_rtx (V16QImode);
+  emit_insn (gen_popcountv16qi2 (res, op1));
+  emit_insn (gen_rs6000_vprtyb<mode>2 (operands[0],
+                                      gen_lowpart (<MODE>mode, res)));
+
+  DONE;
+})
 
 \f
 ;; Same size conversions
index f4aba1567cd09d6761c8df62bec951480d6f7620..8f6f1239f7a006af2b023edd4e83508077ff7d94 100644 (file)
@@ -105,3 +105,4 @@ parity_ti_4u (__uint128_t a)
 /* { dg-final { scan-assembler "vprtybd" } } */
 /* { dg-final { scan-assembler "vprtybq" } } */
 /* { dg-final { scan-assembler "vprtybw" } } */
+/* { dg-final { scan-assembler-not "vpopcntb" } } */
diff --git a/gcc/testsuite/gcc.target/powerpc/pr108699.c b/gcc/testsuite/gcc.target/powerpc/pr108699.c
new file mode 100644 (file)
index 0000000..f02bac1
--- /dev/null
@@ -0,0 +1,42 @@
+/* { dg-run } */
+/* { dg-options "-O2 -ftree-vectorize -fno-vect-cost-model" } */
+
+#define N 16
+
+unsigned long long vals[N];
+unsigned int res[N];
+unsigned int expects[N] = {0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0};
+
+unsigned long long inputs[N]
+  = {0x0000000000000000ULL, 0x0000000000000001ULL, 0x8000000000000000ULL,
+     0x0000000000000002ULL, 0x4000000000000000ULL, 0x0000000100000000ULL,
+     0x0000000080000000ULL, 0xa5a5a5a5a5a5a5a5ULL, 0x5a5a5a5a5a5a5a5aULL,
+     0xcafecafe00000000ULL, 0x0000cafecafe0000ULL, 0x00000000cafecafeULL,
+     0x8070600000000000ULL, 0xffffffffffffffffULL};
+
+__attribute__ ((noipa)) void
+init ()
+{
+  for (int i = 0; i < N; i++)
+    vals[i] = inputs[i];
+}
+
+__attribute__ ((noipa)) void
+do_parity ()
+{
+  for (int i = 0; i < N; i++)
+    res[i] = __builtin_parityll (vals[i]);
+}
+
+int
+main (void)
+{
+  init ();
+  do_parity ();
+  for (int i = 0; i < N; i++)
+    if (res[i] != expects[i])
+      __builtin_abort();
+
+  return 0;
+}
+