From: Prathamesh Kulkarni Date: Tue, 6 Dec 2022 01:21:14 +0000 (+0530) Subject: aarch64: Use dup and zip1 for interleaving elements in vector initializer. X-Git-Tag: basepoints/gcc-14~2710 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=769370f3e2e04823c8a621d8ffa756dd83ebf21e;p=thirdparty%2Fgcc.git aarch64: Use dup and zip1 for interleaving elements in vector initializer. gcc/ChangeLog: * config/aarch64/aarch64.cc (aarch64_expand_vector_init): Use dup and zip1 for interleaving elements in vector initializer. gcc/testsuite/ChangeLog: * gcc.target/aarch64/interleave-init-1.c: New test. --- diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc index e97f3b32f7c7..dedda68261dc 100644 --- a/gcc/config/aarch64/aarch64.cc +++ b/gcc/config/aarch64/aarch64.cc @@ -22058,6 +22058,38 @@ aarch64_expand_vector_init (rtx target, rtx vals) return; } + /* Check for interleaving case. + For eg if initializer is (int16x8_t) {x, y, x, y, x, y, x, y}. + Generate following code: + dup v0.h, x + dup v1.h, y + zip1 v0.h, v0.h, v1.h + for "large enough" initializer. */ + + if (n_elts >= 8) + { + int i; + for (i = 2; i < n_elts; i++) + if (!rtx_equal_p (XVECEXP (vals, 0, i), XVECEXP (vals, 0, i % 2))) + break; + + if (i == n_elts) + { + machine_mode mode = GET_MODE (target); + rtx dest[2]; + + for (int i = 0; i < 2; i++) + { + rtx x = expand_vector_broadcast (mode, XVECEXP (vals, 0, i)); + dest[i] = force_reg (mode, x); + } + + rtvec v = gen_rtvec (2, dest[0], dest[1]); + emit_set_insn (target, gen_rtx_UNSPEC (mode, v, UNSPEC_ZIP1)); + return; + } + } + enum insn_code icode = optab_handler (vec_set_optab, mode); gcc_assert (icode != CODE_FOR_nothing); diff --git a/gcc/testsuite/gcc.target/aarch64/interleave-init-1.c b/gcc/testsuite/gcc.target/aarch64/interleave-init-1.c new file mode 100644 index 000000000000..ee775048589d --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/interleave-init-1.c @@ -0,0 +1,37 @@ +/* { dg-do compile } */ +/* { dg-options "-O3" } */ +/* { dg-final { check-function-bodies "**" "" "" } } */ + +#include + +/* +** foo: +** ... +** dup v[0-9]+\.8h, w[0-9]+ +** dup v[0-9]+\.8h, w[0-9]+ +** zip1 v[0-9]+\.8h, v[0-9]+\.8h, v[0-9]+\.8h +** ... +** ret +*/ + +int16x8_t foo(int16_t x, int y) +{ + int16x8_t v = (int16x8_t) {x, y, x, y, x, y, x, y}; + return v; +} + +/* +** foo2: +** ... +** dup v[0-9]+\.8h, w[0-9]+ +** movi v[0-9]+\.8h, 0x1 +** zip1 v[0-9]+\.8h, v[0-9]+\.8h, v[0-9]+\.8h +** ... +** ret +*/ + +int16x8_t foo2(int16_t x) +{ + int16x8_t v = (int16x8_t) {x, 1, x, 1, x, 1, x, 1}; + return v; +}