The failures on the original failed case builtin-bitops-1.c
and the associated test case pr108699.c here show that the
current support of parity vector mode is wrong on Power.
The hardware insns vprtyb[wdq] which operate on the least
significant bit of each byte per element, they doesn't match
what RTL opcode parity needs, but the current implementation
expands it with them wrongly.
This patch is to fix the handling with one more insn vpopcntb.
PR target/108699
gcc/ChangeLog:
* config/rs6000/altivec.md (*p9v_parity<mode>2): Rename to ...
(rs6000_vprtyb<mode>2): ... this.
* config/rs6000/rs6000-builtin.def (VPRTYBD): Replace parityv2di2 with
rs6000_vprtybv2di2.
(VPRTYBW): Replace parityv4si2 with rs6000_vprtybv4si2.
(VPRTYBQ): Replace parityv1ti2 with rs6000_vprtybv1ti2.
* config/rs6000/vector.md (parity<mode>2 with VEC_IP): Expand with
popcountv16qi2 and the corresponding rs6000_vprtyb<mode>2.
gcc/testsuite/ChangeLog:
* gcc.target/powerpc/p9-vparity.c: Add scan-assembler-not for vpopcntb
to distinguish parity byte from parity.
* gcc.target/powerpc/pr108699.c: New test.
(cherry picked from commit
cdd2d6643f7fef40e335a7027edfea7276cde608)
[(set_attr "type" "vecsimple")])
;; Vector parity
-(define_insn "*p9v_parity<mode>2"
- [(set (match_operand:VParity 0 "register_operand" "=v")
- (parity:VParity (match_operand:VParity 1 "register_operand" "v")))]
+(define_insn "rs6000_vprtyb<mode>2"
+ [(set (match_operand:VEC_IP 0 "register_operand" "=v")
+ (unspec:VEC_IP
+ [(match_operand:VEC_IP 1 "register_operand" "v")]
+ UNSPEC_PARITY))]
"TARGET_P9_VECTOR"
"vprtyb<wd> %0,%1"
[(set_attr "type" "vecsimple")])
BU_P9V_AV_1 (VCTZH, "vctzh", CONST, ctzv8hi2)
BU_P9V_AV_1 (VCTZW, "vctzw", CONST, ctzv4si2)
BU_P9V_AV_1 (VCTZD, "vctzd", CONST, ctzv2di2)
-BU_P9V_AV_1 (VPRTYBD, "vprtybd", CONST, parityv2di2)
-BU_P9V_AV_1 (VPRTYBQ, "vprtybq", CONST, parityv1ti2)
-BU_P9V_AV_1 (VPRTYBW, "vprtybw", CONST, parityv4si2)
+BU_P9V_AV_1 (VPRTYBD, "vprtybd", CONST, rs6000_vprtybv2di2)
+BU_P9V_AV_1 (VPRTYBQ, "vprtybq", CONST, rs6000_vprtybv1ti2)
+BU_P9V_AV_1 (VPRTYBW, "vprtybw", CONST, rs6000_vprtybv4si2)
/* ISA 3.0 vector overloaded 1 argument functions. */
BU_P9V_OVERLOAD_1 (VCTZ, "vctz")
(define_expand "parity<mode>2"
[(set (match_operand:VEC_IP 0 "register_operand")
(parity:VEC_IP (match_operand:VEC_IP 1 "register_operand")))]
- "TARGET_P9_VECTOR")
+ "TARGET_P9_VECTOR"
+{
+ rtx op1 = gen_lowpart (V16QImode, operands[1]);
+ rtx res = gen_reg_rtx (V16QImode);
+ emit_insn (gen_popcountv16qi2 (res, op1));
+ emit_insn (gen_rs6000_vprtyb<mode>2 (operands[0],
+ gen_lowpart (<MODE>mode, res)));
+
+ DONE;
+})
\f
;; Same size conversions
/* { dg-final { scan-assembler "vprtybd" } } */
/* { dg-final { scan-assembler "vprtybq" } } */
/* { dg-final { scan-assembler "vprtybw" } } */
+/* { dg-final { scan-assembler-not "vpopcntb" } } */
--- /dev/null
+/* { dg-run } */
+/* { dg-options "-O2 -ftree-vectorize -fno-vect-cost-model" } */
+
+#define N 16
+
+unsigned long long vals[N];
+unsigned int res[N];
+unsigned int expects[N] = {0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0};
+
+unsigned long long inputs[N]
+ = {0x0000000000000000ULL, 0x0000000000000001ULL, 0x8000000000000000ULL,
+ 0x0000000000000002ULL, 0x4000000000000000ULL, 0x0000000100000000ULL,
+ 0x0000000080000000ULL, 0xa5a5a5a5a5a5a5a5ULL, 0x5a5a5a5a5a5a5a5aULL,
+ 0xcafecafe00000000ULL, 0x0000cafecafe0000ULL, 0x00000000cafecafeULL,
+ 0x8070600000000000ULL, 0xffffffffffffffffULL};
+
+__attribute__ ((noipa)) void
+init ()
+{
+ for (int i = 0; i < N; i++)
+ vals[i] = inputs[i];
+}
+
+__attribute__ ((noipa)) void
+do_parity ()
+{
+ for (int i = 0; i < N; i++)
+ res[i] = __builtin_parityll (vals[i]);
+}
+
+int
+main (void)
+{
+ init ();
+ do_parity ();
+ for (int i = 0; i < N; i++)
+ if (res[i] != expects[i])
+ __builtin_abort();
+
+ return 0;
+}
+