{
words = bytes / 4;
- for (i = 0; i < words; ++i)
- data_regs[nregs + i] = gen_reg_rtx (SImode);
+ /* Load an even quantity of SImode data pieces only. */
+ unsigned int hwords = words / 2;
+ for (i = 0; i / 2 < hwords; ++i)
+ {
+ data_regs[nregs + i] = gen_reg_rtx (SImode);
+ emit_move_insn (data_regs[nregs + i],
+ adjust_address (orig_src, SImode, ofs + i * 4));
+ }
- for (i = 0; i < words; ++i)
- emit_move_insn (data_regs[nregs + i],
- adjust_address (orig_src, SImode, ofs + i * 4));
+ /* If we'll be using unaligned stores, merge data from pairs
+ of SImode registers into DImode registers so that we can
+ store it more efficiently via quadword unaligned stores. */
+ unsigned int j;
+ if (dst_align < 32)
+ for (i = 0, j = 0; i < words / 2; ++i, j = i * 2)
+ {
+ rtx hi = expand_simple_binop (DImode, ASHIFT,
+ data_regs[nregs + j + 1],
+ GEN_INT (32), NULL_RTX,
+ 1, OPTAB_WIDEN);
+ data_regs[nregs + i] = expand_simple_binop (DImode, IOR, hi,
+ data_regs[nregs + j],
+ NULL_RTX,
+ 1, OPTAB_WIDEN);
+ }
+ else
+ j = i;
- nregs += words;
+ /* Take care of any remaining odd trailing SImode data piece. */
+ if (j < words)
+ {
+ data_regs[nregs + i] = gen_reg_rtx (SImode);
+ emit_move_insn (data_regs[nregs + i],
+ adjust_address (orig_src, SImode, ofs + j * 4));
+ ++i;
+ }
+
+ nregs += i;
bytes -= words * 4;
ofs += words * 4;
}
}
/* Due to the above, this won't be aligned. */
- /* ??? If we have more than one of these, consider constructing full
- words in registers and using alpha_expand_unaligned_store_words. */
while (i < nregs && GET_MODE (data_regs[i]) == SImode)
{
alpha_expand_unaligned_store (orig_dst, data_regs[i], 4, ofs);
ofs += 4;
i++;
+ gcc_assert (i == nregs || GET_MODE (data_regs[i]) != SImode);
}
if (dst_align >= 16)
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "" } */
+/* { dg-skip-if "" { *-*-* } { "-O0" } } */
+
+unsigned int aligned_src_si[17] = { [0 ... 16] = 0xeaebeced };
+unsigned int aligned_dst_si[17] = { [0 ... 16] = 0xdcdbdad9 };
+
+void
+memcpy_aligned_data_si (void)
+{
+ __builtin_memcpy (aligned_dst_si + 1, aligned_src_si + 1, 60);
+}
+
+/* { dg-final { scan-assembler-times "\\sldl\\s" 15 } } */
+/* { dg-final { scan-assembler-times "\\sstl\\s" 15 } } */
+/* { dg-final { scan-assembler-not "\\s(?:ldq_u|stq_u)\\s" } } */
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "" } */
+/* { dg-skip-if "" { *-*-* } { "-O0" } } */
+
+unsigned int unaligned_src_si[17] = { [0 ... 16] = 0xfefdfcfb };
+
+void
+memcpy_unaligned_dst_si (void *dst)
+{
+ __builtin_memcpy (dst, unaligned_src_si + 1, 60);
+}
+
+/* { dg-final { scan-assembler-times "\\sldl\\s" 15 } } */
+/* { dg-final { scan-assembler-times "\\sldq_u\\s" 4 } } */
+/* { dg-final { scan-assembler-times "\\sstq_u\\s" 10 } } */
+/* { dg-final { scan-assembler-not "\\sstl\\s" } } */
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-mbwx" } */
+/* { dg-skip-if "" { *-*-* } { "-O0" } } */
+
+#include "memcpy-si-unaligned-src.c"
+
+/* { dg-final { scan-assembler-times "\\sldbu\\s" 4 } } */
+/* { dg-final { scan-assembler-times "\\sldq_u\\s" 8 } } */
+/* { dg-final { scan-assembler-times "\\sstb\\s" 4 } } */
+/* { dg-final { scan-assembler-times "\\sstl\\s" 14 } } */
+/* { dg-final { scan-assembler-not "\\s(?:ldl|stq_u)\\s" } } */
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-mno-bwx" } */
+/* { dg-skip-if "" { *-*-* } { "-O0" } } */
+
+unsigned int unaligned_dst_si[17] = { [0 ... 16] = 0xc8c9cacb };
+
+void
+memcpy_unaligned_src_si (const void *src)
+{
+ __builtin_memcpy (unaligned_dst_si + 1, src, 60);
+}
+
+/* { dg-final { scan-assembler-times "\\sldq_u\\s" 10 } } */
+/* { dg-final { scan-assembler-times "\\sstl\\s" 15 } } */
+/* { dg-final { scan-assembler-not "\\s(?:ldl|stq_u)\\s" } } */
--- /dev/null
+/* { dg-do run } */
+/* { dg-additional-sources memcpy-si-aligned.c } */
+/* { dg-additional-sources memcpy-si-unaligned-src.c } */
+/* { dg-additional-sources memcpy-si-unaligned-dst.c } */
+/* { dg-options "" } */
+
+void memcpy_aligned_data_si (void);
+void memcpy_unaligned_dst_si (void *);
+void memcpy_unaligned_src_si (const void *);
+
+extern unsigned int aligned_src_si[];
+extern unsigned int aligned_dst_si[];
+extern unsigned int unaligned_src_si[];
+extern unsigned int unaligned_dst_si[];
+
+int
+main (void)
+{
+ unsigned int v;
+ int i;
+
+ for (i = 1, v = 0x04030201; i < 16; i++, v += 0x04040404)
+ unaligned_src_si[i] = v;
+ asm ("" : : : "memory");
+ memcpy_unaligned_dst_si (aligned_src_si + 1);
+ asm ("" : : : "memory");
+ memcpy_aligned_data_si ();
+ asm ("" : : : "memory");
+ memcpy_unaligned_src_si (aligned_dst_si + 1);
+ asm ("" : : : "memory");
+ for (i = 1, v = 0x04030201; i < 16; i++, v += 0x04040404)
+ if (unaligned_dst_si[i] != v)
+ return 1;
+ if (unaligned_src_si[0] != 0xfefdfcfb)
+ return 1;
+ if (unaligned_src_si[16] != 0xfefdfcfb)
+ return 1;
+ if (aligned_src_si[0] != 0xeaebeced)
+ return 1;
+ if (aligned_src_si[16] != 0xeaebeced)
+ return 1;
+ if (aligned_dst_si[0] != 0xdcdbdad9)
+ return 1;
+ if (aligned_dst_si[16] != 0xdcdbdad9)
+ return 1;
+ if (unaligned_dst_si[0] != 0xc8c9cacb)
+ return 1;
+ if (unaligned_dst_si[16] != 0xc8c9cacb)
+ return 1;
+ return 0;
+}