In determination of base block alignment we only examine a COMPONENT_REF
tree node at hand without ever checking if its ultimate alignment has
been reduced by the combined offset going back to the outermost object.
Consequently cases have been observed where quadword accesses have been
produced for a memory location referring a nested struct member only
aligned to the longword boundary, causing emulation to trigger.
Address this issue by recursing into COMPONENT_REF tree nodes until the
outermost one has been reached, which is supposed to be a MEM_REF one,
accumulating the offset as we go, fixing a commit
e0dae4da4c45 ("Alpha:
Also use tree information to get base block alignment") regression.
Bail out and refrain from using tree information for alignment if we end
up at something different or we are unable to calculate the offset at
any point.
gcc/
* config/alpha/alpha.cc
(alpha_get_mem_rtx_alignment_and_offset): Recurse into
COMPONENT_REF nodes.
gcc/testsuite/
* gcc.target/alpha/memcpy-nested-offset-long.c: New file.
* gcc.target/alpha/memcpy-nested-offset-quad.c: New file.
tree mem = MEM_EXPR (expr);
if (mem != NULL_TREE)
- switch (TREE_CODE (mem))
- {
- case MEM_REF:
- tree_offset = mem_ref_offset (mem).force_shwi ();
- tree_align = get_object_alignment (get_base_address (mem));
- break;
+ {
+ HOST_WIDE_INT comp_offset = 0;
- case COMPONENT_REF:
+ for (; TREE_CODE (mem) == COMPONENT_REF; mem = TREE_OPERAND (mem, 0))
{
tree byte_offset = component_ref_field_offset (mem);
tree bit_offset = DECL_FIELD_BIT_OFFSET (TREE_OPERAND (mem, 1));
|| !poly_int_tree_p (byte_offset, &offset)
|| !tree_fits_shwi_p (bit_offset))
break;
- tree_offset = offset + tree_to_shwi (bit_offset) / BITS_PER_UNIT;
+ comp_offset += offset + tree_to_shwi (bit_offset) / BITS_PER_UNIT;
}
- tree_align = get_object_alignment (get_base_address (mem));
- break;
- default:
- break;
- }
+ if (TREE_CODE (mem) == MEM_REF)
+ {
+ tree_offset = comp_offset + mem_ref_offset (mem).force_shwi ();
+ tree_align = get_object_alignment (get_base_address (mem));
+ }
+ }
if (reg_align > mem_align)
{
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "" } */
+/* { dg-skip-if "" { *-*-* } { "-O0" } } */
+
+typedef unsigned int __attribute__ ((mode (DI))) int64_t;
+typedef unsigned int __attribute__ ((mode (SI))) int32_t;
+
+typedef union
+ {
+ int32_t l[8];
+ }
+val;
+
+typedef struct
+ {
+ int32_t l[2];
+ val v;
+ }
+tre;
+
+typedef struct
+ {
+ int32_t l[3];
+ tre t;
+ }
+due;
+
+typedef struct
+ {
+ val v;
+ int64_t q;
+ int32_t l[2];
+ due d;
+ }
+uno;
+
+void
+memcpy_nested_offset_long (uno *u)
+{
+ u->d.t.v = u->v;
+}
+
+/* Expect assembly such as:
+
+ ldq $4,0($16)
+ ldq $3,8($16)
+ ldq $2,16($16)
+ srl $4,32,$7
+ ldq $1,24($16)
+ srl $3,32,$6
+ stl $4,68($16)
+ srl $2,32,$5
+ stl $7,72($16)
+ srl $1,32,$4
+ stl $3,76($16)
+ stl $6,80($16)
+ stl $2,84($16)
+ stl $5,88($16)
+ stl $1,92($16)
+ stl $4,96($16)
+
+ that is with four quadword loads at offsets 0, 8, 16, 24 each and
+ eight longword stores at offsets 68, 72, 76, 80, 84, 88, 92, 96 each. */
+
+/* { dg-final { scan-assembler-times "\\sldq\\s\\\$\[0-9\]+,0\\\(\\\$16\\\)\\s" 1 } } */
+/* { dg-final { scan-assembler-times "\\sldq\\s\\\$\[0-9\]+,8\\\(\\\$16\\\)\\s" 1 } } */
+/* { dg-final { scan-assembler-times "\\sldq\\s\\\$\[0-9\]+,16\\\(\\\$16\\\)\\s" 1 } } */
+/* { dg-final { scan-assembler-times "\\sldq\\s\\\$\[0-9\]+,24\\\(\\\$16\\\)\\s" 1 } } */
+/* { dg-final { scan-assembler-times "\\sstl\\s\\\$\[0-9\]+,68\\\(\\\$16\\\)\\s" 1 } } */
+/* { dg-final { scan-assembler-times "\\sstl\\s\\\$\[0-9\]+,72\\\(\\\$16\\\)\\s" 1 } } */
+/* { dg-final { scan-assembler-times "\\sstl\\s\\\$\[0-9\]+,76\\\(\\\$16\\\)\\s" 1 } } */
+/* { dg-final { scan-assembler-times "\\sstl\\s\\\$\[0-9\]+,80\\\(\\\$16\\\)\\s" 1 } } */
+/* { dg-final { scan-assembler-times "\\sstl\\s\\\$\[0-9\]+,84\\\(\\\$16\\\)\\s" 1 } } */
+/* { dg-final { scan-assembler-times "\\sstl\\s\\\$\[0-9\]+,88\\\(\\\$16\\\)\\s" 1 } } */
+/* { dg-final { scan-assembler-times "\\sstl\\s\\\$\[0-9\]+,92\\\(\\\$16\\\)\\s" 1 } } */
+/* { dg-final { scan-assembler-times "\\sstl\\s\\\$\[0-9\]+,96\\\(\\\$16\\\)\\s" 1 } } */
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "" } */
+/* { dg-skip-if "" { *-*-* } { "-O0" } } */
+
+typedef unsigned int __attribute__ ((mode (DI))) int64_t;
+typedef unsigned int __attribute__ ((mode (SI))) int32_t;
+
+typedef union
+ {
+ int32_t l[8];
+ }
+val;
+
+typedef struct
+ {
+ int32_t l[2];
+ val v;
+ }
+tre;
+
+typedef struct
+ {
+ int32_t l[3];
+ tre t;
+ }
+due;
+
+typedef struct
+ {
+ val v;
+ int64_t q;
+ int32_t l[3];
+ due d;
+ }
+uno;
+
+void
+memcpy_nested_offset_quad (uno *u)
+{
+ u->d.t.v = u->v;
+}
+
+/* Expect assembly such as:
+
+ ldq $4,0($16)
+ ldq $3,8($16)
+ ldq $2,16($16)
+ ldq $1,24($16)
+ stq $4,72($16)
+ stq $3,80($16)
+ stq $2,88($16)
+ stq $1,96($16)
+
+ that is with four quadword loads at offsets 0, 8, 16, 24 each
+ and four quadword stores at offsets 72, 80, 88, 96 each. */
+
+/* { dg-final { scan-assembler-times "\\sldq\\s\\\$\[0-9\]+,0\\\(\\\$16\\\)\\s" 1 } } */
+/* { dg-final { scan-assembler-times "\\sldq\\s\\\$\[0-9\]+,8\\\(\\\$16\\\)\\s" 1 } } */
+/* { dg-final { scan-assembler-times "\\sldq\\s\\\$\[0-9\]+,16\\\(\\\$16\\\)\\s" 1 } } */
+/* { dg-final { scan-assembler-times "\\sldq\\s\\\$\[0-9\]+,24\\\(\\\$16\\\)\\s" 1 } } */
+/* { dg-final { scan-assembler-times "\\sstq\\s\\\$\[0-9\]+,72\\\(\\\$16\\\)\\s" 1 } } */
+/* { dg-final { scan-assembler-times "\\sstq\\s\\\$\[0-9\]+,80\\\(\\\$16\\\)\\s" 1 } } */
+/* { dg-final { scan-assembler-times "\\sstq\\s\\\$\[0-9\]+,88\\\(\\\$16\\\)\\s" 1 } } */
+/* { dg-final { scan-assembler-times "\\sstq\\s\\\$\[0-9\]+,96\\\(\\\$16\\\)\\s" 1 } } */