From d88e488ec9321e44291fcaf2a3b14333f64aac01 Mon Sep 17 00:00:00 2001 From: Kewen Lin Date: Mon, 3 Apr 2023 21:47:44 -0500 Subject: [PATCH] rs6000: Fix vector parity support [PR108699] The failures on the original failed case builtin-bitops-1.c and the associated test case pr108699.c here show that the current support of parity vector mode is wrong on Power. The hardware insns vprtyb[wdq] which operate on the least significant bit of each byte per element, they doesn't match what RTL opcode parity needs, but the current implementation expands it with them wrongly. This patch is to fix the handling with one more insn vpopcntb. PR target/108699 gcc/ChangeLog: * config/rs6000/altivec.md (*p9v_parity2): Rename to ... (rs6000_vprtyb2): ... this. * config/rs6000/rs6000-builtin.def (VPRTYBD): Replace parityv2di2 with rs6000_vprtybv2di2. (VPRTYBW): Replace parityv4si2 with rs6000_vprtybv4si2. (VPRTYBQ): Replace parityv1ti2 with rs6000_vprtybv1ti2. * config/rs6000/vector.md (parity2 with VEC_IP): Expand with popcountv16qi2 and the corresponding rs6000_vprtyb2. gcc/testsuite/ChangeLog: * gcc.target/powerpc/p9-vparity.c: Add scan-assembler-not for vpopcntb to distinguish parity byte from parity. * gcc.target/powerpc/pr108699.c: New test. (cherry picked from commit cdd2d6643f7fef40e335a7027edfea7276cde608) --- gcc/config/rs6000/altivec.md | 8 ++-- gcc/config/rs6000/rs6000-builtin.def | 6 +-- gcc/config/rs6000/vector.md | 11 ++++- gcc/testsuite/gcc.target/powerpc/p9-vparity.c | 1 + gcc/testsuite/gcc.target/powerpc/pr108699.c | 42 +++++++++++++++++++ 5 files changed, 61 insertions(+), 7 deletions(-) create mode 100644 gcc/testsuite/gcc.target/powerpc/pr108699.c diff --git a/gcc/config/rs6000/altivec.md b/gcc/config/rs6000/altivec.md index 844978544a0d..b43b5edf027a 100644 --- a/gcc/config/rs6000/altivec.md +++ b/gcc/config/rs6000/altivec.md @@ -3916,9 +3916,11 @@ [(set_attr "type" "vecsimple")]) ;; Vector parity -(define_insn "*p9v_parity2" - [(set (match_operand:VParity 0 "register_operand" "=v") - (parity:VParity (match_operand:VParity 1 "register_operand" "v")))] +(define_insn "rs6000_vprtyb2" + [(set (match_operand:VEC_IP 0 "register_operand" "=v") + (unspec:VEC_IP + [(match_operand:VEC_IP 1 "register_operand" "v")] + UNSPEC_PARITY))] "TARGET_P9_VECTOR" "vprtyb %0,%1" [(set_attr "type" "vecsimple")]) diff --git a/gcc/config/rs6000/rs6000-builtin.def b/gcc/config/rs6000/rs6000-builtin.def index b3ae6fea6402..36ba145ad3d2 100644 --- a/gcc/config/rs6000/rs6000-builtin.def +++ b/gcc/config/rs6000/rs6000-builtin.def @@ -2627,9 +2627,9 @@ BU_P9V_AV_1 (VCTZB, "vctzb", CONST, ctzv16qi2) BU_P9V_AV_1 (VCTZH, "vctzh", CONST, ctzv8hi2) BU_P9V_AV_1 (VCTZW, "vctzw", CONST, ctzv4si2) BU_P9V_AV_1 (VCTZD, "vctzd", CONST, ctzv2di2) -BU_P9V_AV_1 (VPRTYBD, "vprtybd", CONST, parityv2di2) -BU_P9V_AV_1 (VPRTYBQ, "vprtybq", CONST, parityv1ti2) -BU_P9V_AV_1 (VPRTYBW, "vprtybw", CONST, parityv4si2) +BU_P9V_AV_1 (VPRTYBD, "vprtybd", CONST, rs6000_vprtybv2di2) +BU_P9V_AV_1 (VPRTYBQ, "vprtybq", CONST, rs6000_vprtybv1ti2) +BU_P9V_AV_1 (VPRTYBW, "vprtybw", CONST, rs6000_vprtybv4si2) /* ISA 3.0 vector overloaded 1 argument functions. */ BU_P9V_OVERLOAD_1 (VCTZ, "vctz") diff --git a/gcc/config/rs6000/vector.md b/gcc/config/rs6000/vector.md index 62c60956f4da..68fa65520889 100644 --- a/gcc/config/rs6000/vector.md +++ b/gcc/config/rs6000/vector.md @@ -1161,7 +1161,16 @@ (define_expand "parity2" [(set (match_operand:VEC_IP 0 "register_operand") (parity:VEC_IP (match_operand:VEC_IP 1 "register_operand")))] - "TARGET_P9_VECTOR") + "TARGET_P9_VECTOR" +{ + rtx op1 = gen_lowpart (V16QImode, operands[1]); + rtx res = gen_reg_rtx (V16QImode); + emit_insn (gen_popcountv16qi2 (res, op1)); + emit_insn (gen_rs6000_vprtyb2 (operands[0], + gen_lowpart (mode, res))); + + DONE; +}) ;; Same size conversions diff --git a/gcc/testsuite/gcc.target/powerpc/p9-vparity.c b/gcc/testsuite/gcc.target/powerpc/p9-vparity.c index bd6c83d1b3be..1ebf0fc420e2 100644 --- a/gcc/testsuite/gcc.target/powerpc/p9-vparity.c +++ b/gcc/testsuite/gcc.target/powerpc/p9-vparity.c @@ -104,3 +104,4 @@ parity_ti_4u (__uint128_t a) /* { dg-final { scan-assembler "vprtybd" } } */ /* { dg-final { scan-assembler "vprtybq" } } */ /* { dg-final { scan-assembler "vprtybw" } } */ +/* { dg-final { scan-assembler-not "vpopcntb" } } */ diff --git a/gcc/testsuite/gcc.target/powerpc/pr108699.c b/gcc/testsuite/gcc.target/powerpc/pr108699.c new file mode 100644 index 000000000000..f02bac130cc7 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/pr108699.c @@ -0,0 +1,42 @@ +/* { dg-run } */ +/* { dg-options "-O2 -ftree-vectorize -fno-vect-cost-model" } */ + +#define N 16 + +unsigned long long vals[N]; +unsigned int res[N]; +unsigned int expects[N] = {0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0}; + +unsigned long long inputs[N] + = {0x0000000000000000ULL, 0x0000000000000001ULL, 0x8000000000000000ULL, + 0x0000000000000002ULL, 0x4000000000000000ULL, 0x0000000100000000ULL, + 0x0000000080000000ULL, 0xa5a5a5a5a5a5a5a5ULL, 0x5a5a5a5a5a5a5a5aULL, + 0xcafecafe00000000ULL, 0x0000cafecafe0000ULL, 0x00000000cafecafeULL, + 0x8070600000000000ULL, 0xffffffffffffffffULL}; + +__attribute__ ((noipa)) void +init () +{ + for (int i = 0; i < N; i++) + vals[i] = inputs[i]; +} + +__attribute__ ((noipa)) void +do_parity () +{ + for (int i = 0; i < N; i++) + res[i] = __builtin_parityll (vals[i]); +} + +int +main (void) +{ + init (); + do_parity (); + for (int i = 0; i < N; i++) + if (res[i] != expects[i]) + __builtin_abort(); + + return 0; +} + -- 2.47.2