From 1b7bcac0327ccd84f1966c748f4d1aedef64a9c5 Mon Sep 17 00:00:00 2001 From: Kyrylo Tkachov Date: Thu, 3 Jul 2025 09:45:02 -0700 Subject: [PATCH] aarch64: Handle DImode BCAX operations To handle DImode BCAX operations we want to do them on the SIMD side only if the incoming arguments don't require a cross-bank move. This means we need to split back the combination to separate GP BIC+EOR instructions if the operands are expected to be in GP regs through reload. The split happens pre-reload if we already know that the destination will be a GP reg. Otherwise if reload descides to use the "=r,r" alternative we ensure operand 0 is early-clobber. This scheme is similar to how we handle the BSL operations elsewhere in aarch64-simd.md. Thus, for the functions: uint64_t bcax_d_gp (uint64_t a, uint64_t b, uint64_t c) { return BCAX (a, b, c); } uint64x1_t bcax_d (uint64x1_t a, uint64x1_t b, uint64x1_t c) { return BCAX (a, b, c); } we now generate the desired: bcax_d_gp: bic x1, x1, x2 eor x0, x1, x0 ret bcax_d: bcax v0.16b, v0.16b, v1.16b, v2.16b ret When the inputs are in SIMD regs we use BCAX and when they are in GP regs we don't force them to SIMD with extra moves. Bootstrapped and tested on aarch64-none-linux-gnu. Signed-off-by: Kyrylo Tkachov gcc/ * config/aarch64/aarch64-simd.md (*bcaxqdi4): New define_insn_and_split. gcc/testsuite/ * gcc.target/aarch64/simd/bcax_d.c: Add tests for DImode arguments. --- gcc/config/aarch64/aarch64-simd.md | 29 +++++++++++++++++++ .../gcc.target/aarch64/simd/bcax_d.c | 6 +++- 2 files changed, 34 insertions(+), 1 deletion(-) diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md index 4493e55603d..270cb2ff3a1 100644 --- a/gcc/config/aarch64/aarch64-simd.md +++ b/gcc/config/aarch64/aarch64-simd.md @@ -9252,6 +9252,35 @@ [(set_attr "type" "crypto_sha3")] ) +(define_insn_and_split "*bcaxqdi4" + [(set (match_operand:DI 0 "register_operand") + (xor:DI + (and:DI + (not:DI (match_operand:DI 3 "register_operand")) + (match_operand:DI 2 "register_operand")) + (match_operand:DI 1 "register_operand")))] + "TARGET_SHA3" + {@ [ cons: =0, 1, 2 , 3 ; attrs: type ] + [ w , w, w , w ; crypto_sha3 ] bcax\t%0.16b, %1.16b, %2.16b, %3.16b + [ &r , r, r0, r0 ; multiple ] # + } + "&& REG_P (operands[0]) && GP_REGNUM_P (REGNO (operands[0]))" + [(set (match_dup 4) + (and:DI (not:DI (match_dup 3)) + (match_dup 2))) + (set (match_dup 0) + (xor:DI (match_dup 4) + (match_dup 1)))] + { + if (reload_completed) + operands[4] = operands[0]; + else if (can_create_pseudo_p ()) + operands[4] = gen_reg_rtx (DImode); + else + FAIL; + } +) + ;; SM3 (define_insn "aarch64_sm3ss1qv4si" diff --git a/gcc/testsuite/gcc.target/aarch64/simd/bcax_d.c b/gcc/testsuite/gcc.target/aarch64/simd/bcax_d.c index d68f0e102bf..a7640c3f6f1 100644 --- a/gcc/testsuite/gcc.target/aarch64/simd/bcax_d.c +++ b/gcc/testsuite/gcc.target/aarch64/simd/bcax_d.c @@ -7,9 +7,13 @@ #define BCAX(x,y,z) ((x) ^ ((y) & ~(z))) +/* When the inputs come from GP regs don't form a BCAX. */ +uint64_t bcax_d_gp (uint64_t a, uint64_t b, uint64_t c) { return BCAX (a, b, c); } + +uint64x1_t bcax_d (uint64x1_t a, uint64x1_t b, uint64x1_t c) { return BCAX (a, b, c); } uint32x2_t bcax_s (uint32x2_t a, uint32x2_t b, uint32x2_t c) { return BCAX (a, b, c); } uint16x4_t bcax_h (uint16x4_t a, uint16x4_t b, uint16x4_t c) { return BCAX (a, b, c); } uint8x8_t bcax_b (uint8x8_t a, uint8x8_t b, uint8x8_t c) { return BCAX (a, b, c); } -/* { dg-final { scan-assembler-times {bcax\tv0.16b, v0.16b, v1.16b, v2.16b} 3 } } */ +/* { dg-final { scan-assembler-times {bcax\tv0.16b, v0.16b, v1.16b, v2.16b} 4 } } */ -- 2.47.2