From: Tamar Christina Date: Fri, 3 Jul 2026 08:15:01 +0000 (+0100) Subject: middle-end: Handle variable-length vector types in store_constructor X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=7e3889b7a7af7aea7549cecaa463bb10ab6eb75f;p=thirdparty%2Fgcc.git middle-end: Handle variable-length vector types in store_constructor Currently vec_init does not support VLA vec_init and we instead fall back to storing piecewise through memory. However there's no defined semantics for this. This patch adds the semantics that for VLA constructors the vector has to be cleared with zero before piecewise being constructed from scalar elements. This means unspecified elements are initialized to zero. Without this patch #include svint32_t __attribute__ ((noipa)) func_init4 (int32_t a, int32_t b, int32_t c) { svint32_t temp = {a, b, c}; return temp; } compiles to: func_init4: addvl sp, sp, #-3 movi d30, #0 str z30, [sp, #2, mul vl] addvl x3, sp, #2 str w0, [x3] addvl x0, sp, #1 add x0, x0, 4 ldr z31, [sp, #2, mul vl] str z31, [sp, #1, mul vl] str w1, [x0] ldr z31, [sp, #1, mul vl] str z31, [sp] str w2, [sp, 8] ldr z0, [sp] addvl sp, sp, #3 ret and with the patch func_init4: fmov s0, w2 fmov s0, s0 insr z0.s, w1 insr z0.s, w0 ret note that this is still not optimal as the fmov s0, s0 that's doing the zero-ing of the vector is not actually needed since the transfer instruction fmov s0, w2 already zeros the destination SVE register. But this is an AArch64 deficiency that will be dealt with in the backend. the optimal codegen here is: func_init4: orr x1, x1, x2, lsl 32 fmov d0, x1 insr z0.s, w0 ret Co-Authored-By: Chris Bazley gcc/ChangeLog: * expr.cc (store_constructor): Handle VLA vec_init support and generic fall through piecewise copy. * doc/md.texi: Document change gcc/testsuite/ChangeLog: * gcc.target/aarch64/sve/copsi.c: New test. --- diff --git a/gcc/doc/md.texi b/gcc/doc/md.texi index 5ad4a201273..a98a572cc01 100644 --- a/gcc/doc/md.texi +++ b/gcc/doc/md.texi @@ -7557,7 +7557,9 @@ Initialize the vector to given values. Operand 0 is the vector to initialize and operand 1 is parallel containing values for individual fields. The @var{n} mode is the mode of the elements, should be either element mode of the vector mode @var{m}, or a vector mode with the same element mode and -smaller number of elements. +smaller number of elements. If @var{m} specifies a scalable vector mode, +then operand 1 only specifies the minimum number of elements implied +by @var{m} and elements beyond are zero initialized. @mdindex vec_duplicate@var{m} @item @samp{vec_duplicate@var{m}} diff --git a/gcc/expr.cc b/gcc/expr.cc index 469c4d013b4..f42aab21272 100644 --- a/gcc/expr.cc +++ b/gcc/expr.cc @@ -7502,11 +7502,14 @@ fields_length (const_tree type) return count; } - /* Store the value of constructor EXP into the rtx TARGET. TARGET is either a REG or a MEM; we know it cannot conflict, since safe_from_p has been called. CLEARED is true if TARGET is known to have been zero'd. + If the constructor EXP has a vector type then elements of TARGET for which + there is no corresponding element in EXP are zero'd. For a variable-length + vector type, only elements up to the minimum number of subparts of the type + are explicitly zero'd; any elements beyond that are implicitly zero. SIZE is the number of bytes of TARGET we are allowed to modify: this may not be the same as the size of EXP if we are assigning to a field which has been packed to exclude padding bits. @@ -8079,14 +8082,22 @@ store_constructor (tree exp, rtx target, int cleared, poly_int64 size, similarly non-const type vectors. */ icode = convert_optab_handler (vec_init_optab, mode, eltmode); } + else + { + /* Handle variable-length vector types. */ + icode = convert_optab_handler (vec_init_optab, mode, eltmode); + const_n_elts = constant_lower_bound (n_elts); + } - if (const_n_elts && icode != CODE_FOR_nothing) - { - vector = rtvec_alloc (const_n_elts); - for (unsigned int k = 0; k < const_n_elts; k++) - RTVEC_ELT (vector, k) = CONST0_RTX (eltmode); - } + if (const_n_elts && icode != CODE_FOR_nothing) + { + vector = rtvec_alloc (const_n_elts); + for (unsigned int k = 0; k < const_n_elts; k++) + RTVEC_ELT (vector, k) = CONST0_RTX (eltmode); + } } + else + gcc_assert (n_elts.is_constant ()); /* Compute the size of the elements in the CTOR. It differs from the size of the vector type elements only when the diff --git a/gcc/testsuite/gcc.target/aarch64/sve/copsi.c b/gcc/testsuite/gcc.target/aarch64/sve/copsi.c new file mode 100644 index 00000000000..d85403640b9 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/copsi.c @@ -0,0 +1,33 @@ +/* { dg-do compile } */ +/* { dg-additional-options "-O2" } */ +/* { dg-final { check-function-bodies "**" "" "" } } */ + +#include + +/* +** func_init4: +** mov z0\.d, x1 +** insr z0\.d, x0 +** ret +*/ +svint64_t __attribute__ ((noipa)) +func_init4 (int64_t a, int64_t b) +{ + svint64_t temp = { a, b }; + return temp; +} + +/* +** func_init3: +** fmov s0, w2 +** fmov s0, s0 +** insr z0\.s, w1 +** insr z0\.s, w0 +** ret +*/ +svint32_t __attribute__ ((noipa)) +func_init3 (int32_t a, int32_t b, int32_t c) +{ + svint32_t temp = { a, b, c }; + return temp; +}