unsigned inc = GET_MODE_SIZE (mode);
unsigned offset = 0;
+ /* If val is a constant, then build a new constant value duplicating
+ the byte across to the size of stores we might do.
+ e.g. if val is 0xab and we can store in 4-byte chunks, build
+ 0xabababab and use that to do the memset.
+ If val is not a constant, then by constraint it is a QImode register
+ and we similarly duplicate the byte across. */
+ rtx src;
+ if (CONST_INT_P (val))
+ {
+ unsigned HOST_WIDE_INT tmp = UINTVAL (val) & 0xff;
+ /* Need src in the proper mode. */
+ switch (mode)
+ {
+ case DImode:
+ src = gen_rtx_CONST_INT (DImode, tmp * 0x0101010101010101);
+ break;
+ case SImode:
+ src = gen_rtx_CONST_INT (SImode, tmp * 0x01010101);
+ break;
+ case HImode:
+ src = gen_rtx_CONST_INT (HImode, tmp * 0x0101);
+ break;
+ default:
+ src = val;
+ break;
+ }
+ }
+ else
+ {
+ /* VAL is a subreg:QI (reg:DI N).
+ Copy that byte to fill the whole register. */
+ src = gen_reg_rtx (mode);
+ emit_move_insn (src, gen_rtx_ZERO_EXTEND (mode, val));
+
+ /* We can fill the whole register with copies of the byte by multiplying
+ by 0x010101...
+ For DImode this requires a tmp reg with lldw, but only if we will
+ actually do nonzero iterations of stxdw. */
+ if (mode < DImode || iters == 0)
+ emit_move_insn (src, gen_rtx_MULT (mode, src, GEN_INT (0x01010101)));
+ else
+ {
+ rtx tmp = gen_reg_rtx (mode);
+ emit_move_insn (tmp, GEN_INT (0x0101010101010101));
+ emit_move_insn (src, gen_rtx_MULT (mode, src, tmp));
+ }
+ }
+
for (unsigned int i = 0; i < iters; i++)
{
- emit_move_insn (adjust_address (dst, mode, offset), val);
+ emit_move_insn (adjust_address (dst, mode, offset), src);
offset += inc;
}
if (remainder & 4)
{
- emit_move_insn (adjust_address (dst, SImode, offset), val);
+ emit_move_insn (adjust_address (dst, SImode, offset),
+ REG_P (src)
+ ? simplify_gen_subreg (SImode, src, mode, 0)
+ : src);
offset += 4;
remainder -= 4;
}
if (remainder & 2)
{
- emit_move_insn (adjust_address (dst, HImode, offset), val);
+ emit_move_insn (adjust_address (dst, HImode, offset),
+ REG_P (src)
+ ? simplify_gen_subreg (HImode, src, mode, 0)
+ : src);
offset += 2;
remainder -= 2;
}
if (remainder & 1)
- emit_move_insn (adjust_address (dst, QImode, offset), val);
+ emit_move_insn (adjust_address (dst, QImode, offset),
+ REG_P (src)
+ ? simplify_gen_subreg (QImode, src, mode, 0)
+ : src);
return true;
}
--- /dev/null
+/* Test that inline memset expansion properly duplicates the byte value
+ across the bytes to fill. PR target/122139. */
+/* { dg-do compile } */
+/* { dg-options "-O1 -masm=normal" } */
+
+#define SIZE 63
+
+unsigned char cdata[SIZE];
+unsigned short sdata[SIZE / 2 + 1];
+unsigned int idata[SIZE / 4 + 1];
+unsigned long ldata[SIZE / 8 + 1];
+
+void
+a (void)
+{
+ __builtin_memset (cdata, 0x54, SIZE);
+}
+/* 0x54=84 */
+/* { dg-final { scan-assembler-times "\[\t \]stb\[^\r\n\]+,84" 63 } } */
+
+void
+b (void)
+{
+ __builtin_memset (sdata, 0x7a, SIZE);
+}
+
+/* 0x7a=122, 0x7a7a=31354 */
+/* { dg-final { scan-assembler-times "\[\t \]sth\[^\r\n\]+,31354" 31 } } */
+/* { dg-final { scan-assembler-times "\[\t \]stb\[^\r\n\]+,122" 1 } } */
+
+void
+c (void)
+{
+ __builtin_memset (idata, 0x23, SIZE);
+}
+
+/* 0x23=35, 0x2323=8995, 0x23232323=589505315 */
+/* { dg-final { scan-assembler-times "\[\t \]stw\[^\r\n\]+,589505315" 15 } } */
+/* { dg-final { scan-assembler-times "\[\t \]sth\[^\r\n\]+,8995" 1 } } */
+/* { dg-final { scan-assembler-times "\[\t \]stb\[^\r\n\]+,35" 1 } } */
+
+void
+d (void)
+{
+ __builtin_memset (ldata, 0xcb, SIZE);
+}
+
+/* 0xcbcbcbcb_cbcbcbcb = -3761688987579986997,
+ 0xcbcbcbcb = -875836469
+ 0xcbcb = -13365
+ 0xcb = -53 */
+/* { dg-final { scan-assembler-times "lddw\t%r.,-3761688987579986997"}} */
+/* { dg-final { scan-assembler-times "stxdw" 7 } } */
+/* { dg-final { scan-assembler-times "\[\t \]stw\[^\r\n\]+,-875836469" 1 } } */
+/* { dg-final { scan-assembler-times "\[\t \]sth\[^\r\n\]+,-13365" 1 } } */
+/* { dg-final { scan-assembler-times "\[\t \]stb\[^\r\n\]+,-53" 1 } } */
--- /dev/null
+/* Test that inline memset expansion properly duplicates the byte value
+ across the bytes to fill for non-const value. PR target/122139. */
+/* { dg-do compile } */
+/* { dg-options "-O1 -masm=normal" } */
+
+#define SIZE 63
+
+unsigned char cdata[SIZE];
+unsigned short sdata[SIZE / 2 + 1];
+unsigned int idata[SIZE / 4 + 1];
+unsigned long ldata[SIZE / 8 + 1];
+
+void
+c (unsigned char byte)
+{
+ __builtin_memset (idata, byte, SIZE);
+}
+
+/* Hard to verify for non-const value. Look for the mul by 0x01010101
+ and the proper number of stores... */
+/* { dg-final { scan-assembler "mul32\[\t \]%r.,16843009" } } */
+/* { dg-final { scan-assembler-times "stxw" 15 } } */
+/* { dg-final { scan-assembler-times "stxh" 1 } } */
+/* { dg-final { scan-assembler-times "stxb" 1 } } */