From: Richard Biener Date: Wed, 24 May 2023 08:07:36 +0000 (+0200) Subject: target/109944 - avoid STLF fail for V16QImode CTOR expansion X-Git-Tag: basepoints/gcc-15~8973 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=affee7dcfa1ee272d43ac7cb68cf423dbd956fd8;p=thirdparty%2Fgcc.git target/109944 - avoid STLF fail for V16QImode CTOR expansion The following dispatches to V2DImode CTOR expansion instead of using sets of (subreg:DI (reg:V16QI 146) [08]) which causes LRA to spill DImode and reload V16QImode. The same applies for V8QImode or V4HImode construction from SImode parts which happens during 32bit libgcc build. PR target/109944 * config/i386/i386-expand.cc (ix86_expand_vector_init_general): Perform final vector composition using ix86_expand_vector_init_general instead of setting the highpart and lowpart which causes spilling. * gcc.target/i386/pr109944-1.c: New testcase. * gcc.target/i386/pr109944-2.c: Likewise. --- diff --git a/gcc/config/i386/i386-expand.cc b/gcc/config/i386/i386-expand.cc index ff3d382f1b40..19acd9c01f99 100644 --- a/gcc/config/i386/i386-expand.cc +++ b/gcc/config/i386/i386-expand.cc @@ -16367,11 +16367,12 @@ quarter: emit_move_insn (target, gen_lowpart (mode, words[0])); else if (n_words == 2) { - rtx tmp = gen_reg_rtx (mode); - emit_clobber (tmp); - emit_move_insn (gen_lowpart (tmp_mode, tmp), words[0]); - emit_move_insn (gen_highpart (tmp_mode, tmp), words[1]); - emit_move_insn (target, tmp); + gcc_assert (tmp_mode == DImode || tmp_mode == SImode); + machine_mode concat_mode = tmp_mode == DImode ? V2DImode : V2SImode; + rtx tmp = gen_reg_rtx (concat_mode); + vals = gen_rtx_PARALLEL (concat_mode, gen_rtvec_v (2, words)); + ix86_expand_vector_init_general (false, concat_mode, tmp, vals); + emit_move_insn (target, gen_lowpart (mode, tmp)); } else if (n_words == 4) { diff --git a/gcc/testsuite/gcc.target/i386/pr109944-1.c b/gcc/testsuite/gcc.target/i386/pr109944-1.c new file mode 100644 index 000000000000..d82214d9ebcd --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr109944-1.c @@ -0,0 +1,30 @@ +/* { dg-do compile } */ +/* { dg-options "-O2" } */ + +void foo (char * __restrict a, char *b) +{ + a[0] = b[0]; + a[1] = b[16]; + a[2] = b[32]; + a[3] = b[48]; + a[4] = b[64]; + a[5] = b[80]; + a[6] = b[96]; + a[7] = b[112]; + a[8] = b[128]; + a[9] = b[144]; + a[10] = b[160]; + a[11] = b[176]; + a[12] = b[192]; + a[13] = b[208]; + a[14] = b[224]; + a[15] = b[240]; +} + +/* We do not want to generate a spill/reload for when the store is vectorized. + movq %rdx, -24(%rsp) +... + movq %rax, -16(%rsp) + movdqa -24(%rsp), %xmm0 + movups %xmm0, (%rdi) */ +/* { dg-final { scan-assembler-not "movdq\[^\r\n\]*\[bs\]p\\\), %xmm" } } */ diff --git a/gcc/testsuite/gcc.target/i386/pr109944-2.c b/gcc/testsuite/gcc.target/i386/pr109944-2.c new file mode 100644 index 000000000000..318dfab02500 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr109944-2.c @@ -0,0 +1,17 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -msse2" } */ + +typedef char v16qi __attribute__((vector_size(16))); +v16qi foo (char *b) +{ + return (v16qi){ b[0], b[16], b[32], b[48], b[64], b[80], b[96], b[112], + b[128], b[144], b[160], b[176], b[192], b[208], b[224], b[240] }; +} + +/* We do not want to generate a spill/reload + movq %rdx, -24(%rsp) +... + movq %rax, -16(%rsp) + movdqa -24(%rsp), %xmm0 + movups %xmm0, (%rdi) */ +/* { dg-final { scan-assembler-not "movdq\[^\r\n\]*\[bs\]p\\\), %xmm" } } */