]> git.ipfire.org Git - thirdparty/gcc.git/commitdiff
aarch64: Implement popcountti2 pattern [PR113042]
authorAndrew Pinski <quic_apinski@quicinc.com>
Fri, 16 Aug 2024 18:18:31 +0000 (11:18 -0700)
committerAndrew Pinski <quic_apinski@quicinc.com>
Wed, 21 Aug 2024 15:10:57 +0000 (08:10 -0700)
When CSSC is not enabled, 128bit popcount can be implemented
just via the vector (v16qi) cnt instruction followed by a reduction,
like how the 64bit one is currently implemented instead of
splitting into 2 64bit popcount.

Changes since v1:
* v2: Make operand 0 be DImode instead of TImode and simplify.

Build and tested for aarch64-linux-gnu.

PR target/113042

gcc/ChangeLog:

* config/aarch64/aarch64.md (popcountti2): New define_expand.

gcc/testsuite/ChangeLog:

* gcc.target/aarch64/popcnt10.c: New test.
* gcc.target/aarch64/popcnt9.c: New test.

Signed-off-by: Andrew Pinski <quic_apinski@quicinc.com>
gcc/config/aarch64/aarch64.md
gcc/testsuite/gcc.target/aarch64/popcnt10.c [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/popcnt9.c [new file with mode: 0644]

index 12dcc16529ad8ede4578b7ce0f47d7ecac3a61b2..c54b29cd64b9e0dc6c6d12735049386ccedc5408 100644 (file)
     }
 })
 
+(define_expand "popcountti2"
+  [(match_operand:DI 0 "register_operand")
+   (match_operand:TI 1 "register_operand")]
+  "TARGET_SIMD && !TARGET_CSSC"
+{
+  rtx v = gen_reg_rtx (V16QImode);
+  rtx v1 = gen_reg_rtx (V16QImode);
+  emit_move_insn (v, gen_lowpart (V16QImode, operands[1]));
+  emit_insn (gen_popcountv16qi2 (v1, v));
+  emit_insn (gen_aarch64_zero_extenddi_reduc_plus_v16qi (operands[0], v1));
+  DONE;
+})
+
 (define_insn "clrsb<mode>2"
   [(set (match_operand:GPI 0 "register_operand" "=r")
         (clrsb:GPI (match_operand:GPI 1 "register_operand" "r")))]
diff --git a/gcc/testsuite/gcc.target/aarch64/popcnt10.c b/gcc/testsuite/gcc.target/aarch64/popcnt10.c
new file mode 100644 (file)
index 0000000..4d01fc6
--- /dev/null
@@ -0,0 +1,25 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-optimized" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+/* PR target/113042 */
+
+#pragma GCC target "+cssc"
+
+/*
+** h128:
+**     ldp     x([0-9]+), x([0-9]+), \[x0\]
+**     cnt     x([0-9]+), x([0-9]+)
+**     cnt     x([0-9]+), x([0-9]+)
+**     add     w0, w([0-9]+), w([0-9]+)
+**     ret
+*/
+
+
+unsigned h128 (const unsigned __int128 *a) {
+  return __builtin_popcountg (a[0]);
+}
+
+/* popcount with CSSC should be split into 2 sections. */
+/* { dg-final { scan-tree-dump-not "POPCOUNT " "optimized" } } */
+/* { dg-final { scan-tree-dump-times " __builtin_popcount" 2 "optimized" } } */
+
diff --git a/gcc/testsuite/gcc.target/aarch64/popcnt9.c b/gcc/testsuite/gcc.target/aarch64/popcnt9.c
new file mode 100644 (file)
index 0000000..c778fc7
--- /dev/null
@@ -0,0 +1,25 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-optimized" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+/* PR target/113042 */
+
+#pragma GCC target "+nocssc"
+
+/*
+** h128:
+**     ldr     q([0-9]+), \[x0\]
+**     cnt     v([0-9]+).16b, v\1.16b
+**     addv    b([0-9]+), v\2.16b
+**     fmov    w0, s\3
+**     ret
+*/
+
+
+unsigned h128 (const unsigned __int128 *a) {
+         return __builtin_popcountg (a[0]);
+}
+
+/* There should be only one POPCOUNT. */
+/* { dg-final { scan-tree-dump-times "POPCOUNT " 1 "optimized" } } */
+/* { dg-final { scan-tree-dump-not " __builtin_popcount"  "optimized" } } */
+