rtx max_size_exp, rtx probable_max_size_exp,
bool issetmem)
{
+ if (TARGET_MISALIGNED_MOVE_STRING_PRO_EPILOGUES)
+ {
+ /* Expand bounded memset and memcpy as memmove if misaligned moves
+ are preferred. Since
+
+ commit b41f96465190751561f6909e858604ceab00595b
+ Author: H.J. Lu <hjl.tools@gmail.com>
+ Date: Mon Oct 20 16:14:34 2025 +0800
+
+ x86-64: Inline memmove with overlapping unaligned loads and stores.
+
+ inlines memmove with overlapping unaligned and stores, which
+ reduces the numbers of branches and memory moves, comparing
+ against the regular memset and memcpy inlining. */
+ rtx operands[9];
+ operands[0] = dst;
+ operands[1] = issetmem ? val_exp : src;
+ operands[2] = count_exp;
+ operands[3] = align_exp;
+ operands[4] = expected_align_exp;
+ operands[5] = expected_size_exp;
+ operands[6] = min_size_exp;
+ operands[7] = max_size_exp;
+ operands[8] = probable_max_size_exp;
+ if (ix86_expand_set_or_movmem (operands, !issetmem, issetmem))
+ return true;
+ }
+
rtx destreg;
rtx srcreg = NULL;
rtx_code_label *label = NULL;
return true;
}
+/* Value kind in MEMSET_VALS:
+
+ memset_val_byte: The value rtx in QImode.
+ memset_val_word: The value rtx in word_mode.
+ memset_val_vector: The value rtx in QI vector mode.
+
+ */
+enum memset_val_kind
+{
+ memset_val_byte = 0,
+ memset_val_word = 1,
+ memset_val_vector = 2,
+ memset_val_max = 3
+};
+
+/* Return a value rtx in MODE for memset from MEMSET_VALS. */
+
+static rtx
+ix86_expand_memset_val (rtx *memset_vals, machine_mode mode)
+{
+ rtx byte_val = memset_vals[memset_val_byte];
+
+ if (mode == QImode)
+ return byte_val;
+ else if (mode == word_mode)
+ return memset_vals[memset_val_word];
+
+ /* All-zero/all-ones is a property of the original byte value, so
+ detect it once here rather than re-deriving it from each slot. */
+ if (byte_val == const0_rtx)
+ return CONST0_RTX (mode);
+ if (byte_val == constm1_rtx)
+ return CONSTM1_RTX (mode);
+
+ if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
+ {
+ if (GET_MODE (memset_vals[memset_val_vector]) == mode)
+ return memset_vals[memset_val_vector];
+ return gen_rtx_SUBREG (mode, memset_vals[memset_val_vector], 0);
+ }
+
+ gcc_assert (mode == HImode || mode == SImode);
+ return gen_rtx_SUBREG (mode, memset_vals[memset_val_word], 0);
+}
+
/* Expand memmove of size with MOVES * mode size and MOVES <= 4. If
FORWARD is true, copy forward. Otherwise copy backward. */
static void
-ix86_expand_n_move_movmem (rtx destmem, rtx srcmem, machine_mode mode,
- unsigned int moves, bool forward)
+ix86_expand_n_move_set_or_movmem (rtx destmem, rtx srcmem,
+ rtx *memset_vals, machine_mode mode,
+ unsigned int moves, bool forward)
{
gcc_assert (moves <= 4);
unsigned int i;
rtx tmp[8];
- for (i = 0; i < moves; i++)
- tmp[i] = gen_reg_rtx (mode);
-
rtx step;
if (forward)
step = GEN_INT (GET_MODE_SIZE (mode));
else
step = GEN_INT (-GET_MODE_SIZE (mode));
- /* Load MOVES. */
- for (i = 0; i < moves - 1; i++)
+ if (memset_vals)
{
+ /* Expand memset. */
+ rtx val = ix86_expand_memset_val (memset_vals, mode);
+ for (i = 0; i < moves; i++)
+ tmp[i] = val;
+ }
+ else
+ {
+ /* Expand memmove. */
+ for (i = 0; i < moves; i++)
+ tmp[i] = gen_reg_rtx (mode);
+
+ /* Load MOVES. */
+ for (i = 0; i < moves - 1; i++)
+ {
+ emit_move_insn (tmp[i], srcmem);
+ srcmem = offset_address (srcmem, step, GET_MODE_SIZE (mode));
+ }
emit_move_insn (tmp[i], srcmem);
- srcmem = offset_address (srcmem, step, GET_MODE_SIZE (mode));
}
- emit_move_insn (tmp[i], srcmem);
/* Store MOVES. */
for (i = 0; i < moves - 1; i++)
MOVES >= 2 and MOVES <= 8. */
static void
-ix86_expand_n_overlapping_move_movmem (rtx dst, rtx src, rtx destreg,
- rtx srcreg, rtx count_exp,
- machine_mode mode,
- unsigned int moves)
+ix86_expand_n_overlapping_move_set_or_movmem (rtx dst, rtx src,
+ rtx *memset_vals,
+ rtx destreg, rtx srcreg,
+ rtx count_exp,
+ machine_mode mode,
+ unsigned int moves)
{
gcc_assert (moves >= 2 && moves <= 8 && (moves & 1) == 0);
unsigned int i, j;
rtx tmp[8];
- for (i = 0; i < moves; i++)
- tmp[i] = gen_reg_rtx (mode);
+ if (memset_vals)
+ {
+ /* Expand memset. */
+ rtx val = ix86_expand_memset_val (memset_vals, mode);
+ for (i = 0; i < moves; i++)
+ tmp[i] = val;
+ }
+ else
+ {
+ /* Expand memmove. */
+ for (i = 0; i < moves; i++)
+ tmp[i] = gen_reg_rtx (mode);
- rtx base_srcmem = change_address (src, mode, srcreg);
+ rtx base_srcmem = change_address (src, mode, srcreg);
- /* Load the first half. */
- rtx srcmem = base_srcmem;
- for (i = 0; i < half_moves - 1; i++)
- {
+ /* Load the first half. */
+ rtx srcmem = base_srcmem;
+ for (i = 0; i < half_moves - 1; i++)
+ {
+ emit_move_insn (tmp[i], srcmem);
+ srcmem = offset_address (srcmem,
+ GEN_INT (GET_MODE_SIZE (mode)),
+ GET_MODE_SIZE (mode));
+ }
emit_move_insn (tmp[i], srcmem);
- srcmem = offset_address (srcmem,
- GEN_INT (GET_MODE_SIZE (mode)),
- GET_MODE_SIZE (mode));
- }
- emit_move_insn (tmp[i], srcmem);
- /* Load the second half. */
- srcmem = offset_address (base_srcmem, count_exp, 1);
- srcmem = offset_address (srcmem,
- GEN_INT (-GET_MODE_SIZE (mode)),
- GET_MODE_SIZE (mode));
- for (j = half_moves, i = 0; i < half_moves - 1; i++, j++)
- {
- emit_move_insn (tmp[j], srcmem);
+ /* Load the second half. */
+ srcmem = offset_address (base_srcmem, count_exp, 1);
srcmem = offset_address (srcmem,
GEN_INT (-GET_MODE_SIZE (mode)),
GET_MODE_SIZE (mode));
+ for (j = half_moves, i = 0; i < half_moves - 1; i++, j++)
+ {
+ emit_move_insn (tmp[j], srcmem);
+ srcmem = offset_address (srcmem,
+ GEN_INT (-GET_MODE_SIZE (mode)),
+ GET_MODE_SIZE (mode));
+ }
+ emit_move_insn (tmp[j], srcmem);
}
- emit_move_insn (tmp[j], srcmem);
rtx base_destmem = change_address (dst, mode, destreg);
/* Expand memmove of size < mode size which is <= 64. */
static void
-ix86_expand_less_move_movmem (rtx dst, rtx src, rtx destreg,
- rtx srcreg, rtx count_exp,
- unsigned HOST_WIDE_INT min_size,
- machine_mode mode,
- rtx_code_label *done_label)
+ix86_expand_less_move_set_or_movmem (rtx dst, rtx src, rtx *memset_vals,
+ rtx destreg, rtx srcreg,
+ rtx count_exp,
+ unsigned HOST_WIDE_INT min_size,
+ machine_mode mode,
+ rtx_code_label *done_label)
{
bool skip = false;
machine_mode count_mode = counter_mode (count_exp);
profile_probability::unlikely ());
/* Move 1 byte. */
- rtx tmp0 = gen_reg_rtx (QImode);
- rtx srcmem = change_address (src, QImode, srcreg);
- emit_move_insn (tmp0, srcmem);
+ rtx tmp0;
+ /* Use the value rtx in QImode for memset. */
+ if (memset_vals)
+ tmp0 = memset_vals[memset_val_byte];
+ else
+ {
+ tmp0 = gen_reg_rtx (QImode);
+ rtx srcmem = change_address (src, QImode, srcreg);
+ emit_move_insn (tmp0, srcmem);
+ }
rtx destmem = change_address (dst, QImode, destreg);
emit_move_insn (destmem, tmp0);
emit_barrier ();
}
- if (between_32_63_label)
- {
- emit_label (between_32_63_label);
- ix86_expand_n_overlapping_move_movmem (dst, src, destreg, srcreg,
- count_exp, OImode, 2);
- emit_jump_insn (gen_jump (done_label));
- emit_barrier ();
- }
-
- if (between_16_31_label)
- {
- emit_label (between_16_31_label);
- ix86_expand_n_overlapping_move_movmem (dst, src, destreg, srcreg,
- count_exp, TImode, 2);
- emit_jump_insn (gen_jump (done_label));
- emit_barrier ();
- }
-
- if (between_8_15_label)
- {
- emit_label (between_8_15_label);
- ix86_expand_n_overlapping_move_movmem (dst, src, destreg, srcreg,
- count_exp, DImode, 2);
- emit_jump_insn (gen_jump (done_label));
- emit_barrier ();
- }
-
- if (between_4_7_label)
- {
- emit_label (between_4_7_label);
- ix86_expand_n_overlapping_move_movmem (dst, src, destreg, srcreg,
- count_exp, SImode, 2);
- emit_jump_insn (gen_jump (done_label));
- emit_barrier ();
- }
-
- if (between_2_3_label)
- {
- emit_label (between_2_3_label);
- ix86_expand_n_overlapping_move_movmem (dst, src, destreg, srcreg,
- count_exp, HImode, 2);
- emit_jump_insn (gen_jump (done_label));
- emit_barrier ();
- }
+ /* For each size band, memset uses a QI-vector mode above a word so it
+ can broadcast the fill value, while memmove uses the same-size
+ scalar integer mode; at and below a word both use the scalar
+ integer mode. */
+ struct {
+ rtx_code_label *label;
+ machine_mode set_mode;
+ machine_mode move_mode;
+ } bands[] = {
+ { between_32_63_label, V32QImode, OImode },
+ { between_16_31_label, V16QImode, TImode },
+ { between_8_15_label, DImode, DImode },
+ { between_4_7_label, SImode, SImode },
+ { between_2_3_label, HImode, HImode },
+ };
+
+ for (auto &band : bands)
+ if (band.label)
+ {
+ emit_label (band.label);
+ machine_mode bmode = memset_vals ? band.set_mode : band.move_mode;
+ ix86_expand_n_overlapping_move_set_or_movmem (dst, src,
+ memset_vals,
+ destreg, srcreg,
+ count_exp, bmode,
+ 2);
+ emit_jump_insn (gen_jump (done_label));
+ emit_barrier ();
+ }
}
/* Expand movmem with overlapping unaligned loads and stores:
*/
bool
-ix86_expand_movmem (rtx operands[])
+ix86_expand_set_or_movmem (rtx operands[], bool iscpymem, bool issetmem)
{
/* Since there are much less registers available in 32-bit mode, don't
inline movmem in 32-bit mode. */
return false;
rtx dst = operands[0];
- rtx src = operands[1];
+ rtx src, memset_val_exp;
+ if (issetmem)
+ {
+ src = nullptr;
+ memset_val_exp = operands[1];
+ }
+ else
+ {
+ src = operands[1];
+ memset_val_exp = nullptr;
+ }
rtx count_exp = operands[2];
rtx expected_size_exp = operands[5];
rtx min_size_exp = operands[6];
+ rtx max_size_exp = operands[7];
rtx probable_max_size_exp = operands[8];
unsigned HOST_WIDE_INT count = HOST_WIDE_INT_0U;
HOST_WIDE_INT expected_size = HOST_WIDE_INT_M1U;
unsigned HOST_WIDE_INT min_size = HOST_WIDE_INT_0U;
+ unsigned HOST_WIDE_INT max_size = HOST_WIDE_INT_M1U;
unsigned HOST_WIDE_INT probable_max_size = HOST_WIDE_INT_M1U;
if (CONST_INT_P (count_exp))
{
- min_size = probable_max_size = count = expected_size
+ min_size = max_size = probable_max_size = count = expected_size
= INTVAL (count_exp);
/* When COUNT is 0, there is nothing to do. */
if (!count)
{
if (min_size_exp)
min_size = INTVAL (min_size_exp);
+ if (max_size_exp)
+ max_size = INTVAL (max_size_exp);
if (probable_max_size_exp)
probable_max_size = INTVAL (probable_max_size_exp);
if (CONST_INT_P (expected_size_exp))
return false;
addr_space_t dst_as = MEM_ADDR_SPACE (dst);
- addr_space_t src_as = MEM_ADDR_SPACE (src);
+ addr_space_t src_as = (issetmem
+ ? ADDR_SPACE_GENERIC
+ : MEM_ADDR_SPACE (src));
int dynamic_check;
bool noalign;
enum stringop_alg alg = decide_alg (count, expected_size, min_size,
- probable_max_size, false, false,
+ probable_max_size, issetmem,
+ (issetmem
+ && memset_val_exp == const0_rtx),
dst_as, src_as, &dynamic_check,
&noalign, false);
if (alg == libcall)
return false;
+ /* Expand memcpy and memset like memmove only for bounded size. */
+ if (iscpymem || issetmem)
+ {
+ unsigned HOST_WIDE_INT unbounded
+ = GET_MODE_MASK (counter_mode (count_exp));
+ if (count != 0 /* Fixed size. */
+ || max_size == 0 /* Unbounded size. */
+ || max_size == unbounded) /* Unbounded size. */
+ return false;
+ }
+
rtx destreg = ix86_copy_addr_to_reg (XEXP (dst, 0));
- rtx srcreg = ix86_copy_addr_to_reg (XEXP (src, 0));
+ rtx srcreg = (issetmem
+ ? nullptr
+ : ix86_copy_addr_to_reg (XEXP (src, 0)));
unsigned int move_max = MOVE_MAX;
machine_mode mode = smallest_int_mode_for_size
mode))
return true;
+ rtx memset_vals[memset_val_max];
+ rtx *memset_vals_p;
+ if (issetmem)
+ {
+ /* Use vector mode if MODE size > word size. */
+ unsigned int size = GET_MODE_SIZE (mode);
+ poly_uint64 nunits;
+ if (size > UNITS_PER_WORD)
+ {
+ nunits = size / GET_MODE_SIZE (QImode);
+ mode = mode_for_vector (QImode, nunits).require ();
+ }
+
+ /* Populate MEMSET_VALS to expand memset. */
+ rtx val_word;
+ memset_vals[memset_val_byte] = memset_val_exp;
+ if (memset_val_exp == const0_rtx || memset_val_exp == constm1_rtx)
+ val_word = memset_val_exp;
+ else
+ val_word = promote_duplicated_reg (word_mode, memset_val_exp);
+ memset_vals[memset_val_word] = val_word;
+ if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
+ {
+ if (memset_val_exp == const0_rtx)
+ memset_vals[memset_val_vector] = CONST0_RTX (mode);
+ else if (memset_val_exp == constm1_rtx)
+ memset_vals[memset_val_vector] = CONSTM1_RTX (mode);
+ else
+ {
+ /* Use the vector mode based on WORD_MODE to avoid extra
+ GPR moves. */
+ nunits = size / GET_MODE_SIZE (word_mode);
+ machine_mode vector_mode
+ = mode_for_vector (word_mode, nunits).require ();
+ rtx vector = promote_duplicated_reg (vector_mode,
+ val_word);
+ memset_vals[memset_val_vector]
+ = convert_to_mode (mode, vector, 1);
+ }
+ }
+ else
+ memset_vals[memset_val_vector] = nullptr;
+ memset_vals_p = memset_vals;
+ }
+ else
+ memset_vals_p = nullptr;
+
rtx_code_label *done_label = gen_label_rtx ();
rtx_code_label *less_vec_label = nullptr;
if (min_size == 0 || min_size <= 2 * move_max)
{
/* Size >= MOVE_MAX and size <= 2 * MOVE_MAX. */
- ix86_expand_n_overlapping_move_movmem (dst, src, destreg, srcreg,
- count_exp, mode, 2);
+ ix86_expand_n_overlapping_move_set_or_movmem (dst, src,
+ memset_vals_p,
+ destreg, srcreg,
+ count_exp, mode, 2);
emit_jump_insn (gen_jump (done_label));
emit_barrier ();
}
{
/* Size < MOVE_MAX. */
emit_label (less_vec_label);
- ix86_expand_less_move_movmem (dst, src, destreg, srcreg,
- count_exp, min_size, mode,
- done_label);
+ ix86_expand_less_move_set_or_movmem (dst, src, memset_vals_p,
+ destreg, srcreg, count_exp,
+ min_size, mode, done_label);
emit_jump_insn (gen_jump (done_label));
emit_barrier ();
}
if (probable_max_size == 0 || probable_max_size > 4 * move_max)
{
/* Size > 4 * MOVE_MAX and size <= 8 * MOVE_MAX. */
- ix86_expand_n_overlapping_move_movmem (dst, src, destreg,
- srcreg, count_exp,
- mode, 8);
+ ix86_expand_n_overlapping_move_set_or_movmem (dst, src,
+ memset_vals_p,
+ destreg, srcreg,
+ count_exp, mode,
+ 8);
emit_jump_insn (gen_jump (done_label));
emit_barrier ();
}
{
/* Size > 2 * MOVE_MAX and size <= 4 * MOVE_MAX. */
emit_label (last_4x_vec_label);
- ix86_expand_n_overlapping_move_movmem (dst, src, destreg,
- srcreg, count_exp,
- mode, 4);
+ ix86_expand_n_overlapping_move_set_or_movmem (dst, src,
+ memset_vals_p,
+ destreg, srcreg,
+ count_exp, mode,
+ 4);
emit_jump_insn (gen_jump (done_label));
emit_barrier ();
}
rtx loop_count = gen_reg_rtx (count_mode);
emit_move_insn (loop_count, count_exp);
- /* Jump to MORE_8X_VEC_BACKWARD_LABEL if source address is
- lower than destination address. */
- rtx_code_label *more_8x_vec_backward_label = gen_label_rtx ();
- emit_cmp_and_jump_insns (srcreg, destreg, LTU, nullptr,
- GET_MODE (destreg), 1,
- more_8x_vec_backward_label);
-
- /* Skip if source == destination which is less common. */
- emit_cmp_and_jump_insns (srcreg, destreg, EQ, nullptr,
- GET_MODE (destreg), 1, done_label,
- profile_probability::unlikely ());
-
- rtx base_destreg = gen_reg_rtx (GET_MODE (destreg));
- emit_move_insn (base_destreg, destreg);
-
- /* Load the last 4 * MOVE_MAX. */
+ rtx_code_label *more_8x_vec_backward_label;
+ rtx base_destreg;
+ rtx srcmem;
rtx regs[4];
- ix86_expand_load_movmem (src, srcreg, count_exp, mode,
- ARRAY_SIZE (regs), regs, true);
+ if (iscpymem || issetmem)
+ {
+ /* Always store forward for memcpy and memset. */
+ more_8x_vec_backward_label = nullptr;
+ if (iscpymem)
+ {
+ /* Load the last 4 * MOVE_MAX for memcpy. */
+ ix86_expand_load_movmem (src, srcreg, count_exp, mode,
+ ARRAY_SIZE (regs), regs,
+ true);
+ srcmem = change_address (src, mode, srcreg);
+ }
+ else
+ {
+ /* Fill REGS with MEMSET_VALS for memset. */
+ rtx val = ix86_expand_memset_val (memset_vals, mode);
+ for (unsigned int i = 0; i < 4; i++)
+ regs[i] = val;
+ srcmem = nullptr;
+ }
+ base_destreg = gen_reg_rtx (GET_MODE (destreg));
+ emit_move_insn (base_destreg, destreg);
+ }
+ else
+ {
+ /* Jump to MORE_8X_VEC_BACKWARD_LABEL if source address is
+ lower than destination address. */
+ more_8x_vec_backward_label = gen_label_rtx ();
+ emit_cmp_and_jump_insns (srcreg, destreg, LTU, nullptr,
+ GET_MODE (destreg), 1,
+ more_8x_vec_backward_label);
+
+ /* Skip if source == destination which is less common. */
+ emit_cmp_and_jump_insns (srcreg, destreg, EQ, nullptr,
+ GET_MODE (destreg), 1, done_label,
+ profile_probability::unlikely ());
+
+ base_destreg = gen_reg_rtx (GET_MODE (destreg));
+ emit_move_insn (base_destreg, destreg);
+
+ /* Load the last 4 * MOVE_MAX. */
+ ix86_expand_load_movmem (src, srcreg, count_exp, mode,
+ ARRAY_SIZE (regs), regs, true);
+
+ srcmem = change_address (src, mode, srcreg);
+ }
- rtx srcmem = change_address (src, mode, srcreg);
rtx destmem = change_address (dst, mode, destreg);
/* Copy forward with a 4 * MOVE_MAX loop. */
rtx_code_label *loop_4x_vec_forward_label = gen_label_rtx ();
emit_label (loop_4x_vec_forward_label);
- ix86_expand_n_move_movmem (destmem, srcmem, mode, 4, true);
+ ix86_expand_n_move_set_or_movmem (destmem, srcmem,
+ memset_vals_p, mode, 4,
+ true);
rtx tmp;
rtx delta = GEN_INT (4 * MOVE_MAX);
OPTAB_DIRECT);
if (tmp != destreg)
emit_move_insn (destreg, tmp);
- tmp = expand_simple_binop (GET_MODE (srcreg), PLUS, srcreg,
- delta, nullptr, 1, OPTAB_DIRECT);
- if (tmp != srcreg)
- emit_move_insn (srcreg, tmp);
+ if (!issetmem)
+ {
+ tmp = expand_simple_binop (GET_MODE (srcreg), PLUS,
+ srcreg, delta, nullptr, 1,
+ OPTAB_DIRECT);
+ if (tmp != srcreg)
+ emit_move_insn (srcreg, tmp);
+ }
/* Stop if LOOP_EXP <= 4 * MOVE_MAX. */
emit_cmp_and_jump_insns (loop_count, delta, GTU, nullptr,
emit_jump_insn (gen_jump (done_label));
emit_barrier ();
- /* Copy backward with a 4 * MOVE_MAX loop. */
- emit_label (more_8x_vec_backward_label);
-
- base_destreg = gen_reg_rtx (GET_MODE (destreg));
- emit_move_insn (base_destreg, destreg);
-
- /* Load the first 4 * MOVE_MAX. */
- ix86_expand_load_movmem (src, srcreg, count_exp, mode,
- ARRAY_SIZE (regs), regs, false);
-
- /* Increment DESTREG and SRCREG by COUNT_EXP. */
- tmp = expand_simple_binop (GET_MODE (destreg), PLUS,
- destreg, count_exp, nullptr, 1,
- OPTAB_DIRECT);
- if (tmp != destreg)
- emit_move_insn (destreg, tmp);
- tmp = expand_simple_binop (GET_MODE (srcreg), PLUS, srcreg,
- count_exp, nullptr, 1, OPTAB_DIRECT);
- if (tmp != srcreg)
- emit_move_insn (srcreg, tmp);
-
- srcmem = change_address (src, mode, srcreg);
- destmem = change_address (dst, mode, destreg);
- rtx step = GEN_INT (-GET_MODE_SIZE (mode));
- srcmem = offset_address (srcmem, step, GET_MODE_SIZE (mode));
- destmem = offset_address (destmem, step, GET_MODE_SIZE (mode));
-
- rtx_code_label *loop_4x_vec_backward_label = gen_label_rtx ();
- emit_label (loop_4x_vec_backward_label);
-
- ix86_expand_n_move_movmem (destmem, srcmem, mode, 4, false);
-
- /* Decrement LOOP_COUNT by 4 * MOVE_MAX. */
- tmp = expand_simple_binop (GET_MODE (loop_count), MINUS,
- loop_count, delta, nullptr, 1,
- OPTAB_DIRECT);
- if (tmp != loop_count)
- emit_move_insn (loop_count, tmp);
-
- /* Decrement DESTREG and SRCREG by 4 * MOVE_MAX. */
- tmp = expand_simple_binop (GET_MODE (destreg), MINUS,
- destreg, delta, nullptr, 1,
- OPTAB_DIRECT);
- if (tmp != destreg)
- emit_move_insn (destreg, tmp);
- tmp = expand_simple_binop (GET_MODE (srcreg), MINUS, srcreg,
- delta, nullptr, 1, OPTAB_DIRECT);
- if (tmp != srcreg)
- emit_move_insn (srcreg, tmp);
-
- /* Stop if LOOP_EXP <= 4 * MOVE_MAX. */
- emit_cmp_and_jump_insns (loop_count, delta, GTU, nullptr,
- GET_MODE (loop_count), 1,
- loop_4x_vec_backward_label);
-
- /* Store the first 4 * MOVE_MAX. */
- ix86_expand_store_movmem (dst, base_destreg, count_exp, mode,
- ARRAY_SIZE (regs), regs, false);
-
- emit_jump_insn (gen_jump (done_label));
- emit_barrier ();
+ if (more_8x_vec_backward_label)
+ {
+ /* Copy backward with a 4 * MOVE_MAX loop. */
+ emit_label (more_8x_vec_backward_label);
+
+ base_destreg = gen_reg_rtx (GET_MODE (destreg));
+ emit_move_insn (base_destreg, destreg);
+
+ /* Load the first 4 * MOVE_MAX. */
+ ix86_expand_load_movmem (src, srcreg, count_exp, mode,
+ ARRAY_SIZE (regs), regs, false);
+
+ /* Increment DESTREG and SRCREG by COUNT_EXP. */
+ tmp = expand_simple_binop (GET_MODE (destreg), PLUS,
+ destreg, count_exp, nullptr, 1,
+ OPTAB_DIRECT);
+ if (tmp != destreg)
+ emit_move_insn (destreg, tmp);
+ tmp = expand_simple_binop (GET_MODE (srcreg), PLUS, srcreg,
+ count_exp, nullptr, 1,
+ OPTAB_DIRECT);
+ if (tmp != srcreg)
+ emit_move_insn (srcreg, tmp);
+
+ srcmem = change_address (src, mode, srcreg);
+ destmem = change_address (dst, mode, destreg);
+ rtx step = GEN_INT (-GET_MODE_SIZE (mode));
+ srcmem = offset_address (srcmem, step,
+ GET_MODE_SIZE (mode));
+ destmem = offset_address (destmem, step,
+ GET_MODE_SIZE (mode));
+
+ rtx_code_label *loop_4x_vec_backward_label
+ = gen_label_rtx ();
+ emit_label (loop_4x_vec_backward_label);
+
+ ix86_expand_n_move_set_or_movmem (destmem, srcmem,
+ memset_vals_p, mode, 4,
+ false);
+
+ /* Decrement LOOP_COUNT by 4 * MOVE_MAX. */
+ tmp = expand_simple_binop (GET_MODE (loop_count), MINUS,
+ loop_count, delta, nullptr, 1,
+ OPTAB_DIRECT);
+ if (tmp != loop_count)
+ emit_move_insn (loop_count, tmp);
+
+ /* Decrement DESTREG and SRCREG by 4 * MOVE_MAX. */
+ tmp = expand_simple_binop (GET_MODE (destreg), MINUS,
+ destreg, delta, nullptr, 1,
+ OPTAB_DIRECT);
+ if (tmp != destreg)
+ emit_move_insn (destreg, tmp);
+ tmp = expand_simple_binop (GET_MODE (srcreg), MINUS,
+ srcreg, delta, nullptr, 1,
+ OPTAB_DIRECT);
+ if (tmp != srcreg)
+ emit_move_insn (srcreg, tmp);
+
+ /* Stop if LOOP_EXP <= 4 * MOVE_MAX. */
+ emit_cmp_and_jump_insns (loop_count, delta, GTU, nullptr,
+ GET_MODE (loop_count), 1,
+ loop_4x_vec_backward_label);
+
+ /* Store the first 4 * MOVE_MAX. */
+ ix86_expand_store_movmem (dst, base_destreg, count_exp,
+ mode, ARRAY_SIZE (regs), regs,
+ false);
+
+ emit_jump_insn (gen_jump (done_label));
+ emit_barrier ();
+ }
}
}
extern bool ix86_expand_strlen (rtx, rtx, rtx, rtx);
extern bool ix86_expand_set_or_cpymem (rtx, rtx, rtx, rtx, rtx, rtx,
rtx, rtx, rtx, rtx, bool);
-extern bool ix86_expand_movmem (rtx[]);
+extern bool ix86_expand_set_or_movmem (rtx[], bool, bool);
extern bool ix86_expand_cmpstrn_or_cmpmem (rtx, rtx, rtx, rtx, rtx, bool);
extern enum reg_class ix86_insn_base_reg_class (rtx_insn *);
(use (match_operand:SI 8 ""))]
""
{
- if (ix86_expand_movmem (operands))
+ if (ix86_expand_set_or_movmem (operands, false, false))
DONE;
FAIL;
})
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=x86-64 -minline-all-stringops" } */
+/* Keep labels and directives ('.cfi_startproc', '.cfi_endproc'). */
+/* { dg-final { check-function-bodies "**" "" "" { target lp64 } {^\t?\.} } } */
+
+/*
+**foo:
+**.LFB0:
+** .cfi_startproc
+** movdqu \(%rsi\), %xmm0
+** movups %xmm0, \(%rdi\)
+** movdqu 16\(%rsi\), %xmm0
+** movups %xmm0, 16\(%rdi\)
+** movdqu 32\(%rsi\), %xmm0
+** movups %xmm0, 32\(%rdi\)
+** movdqu 48\(%rsi\), %xmm0
+** movups %xmm0, 48\(%rdi\)
+** movdqu 64\(%rsi\), %xmm0
+** movups %xmm0, 64\(%rdi\)
+** movdqu 80\(%rsi\), %xmm0
+** movups %xmm0, 80\(%rdi\)
+** movdqu 96\(%rsi\), %xmm0
+** movups %xmm0, 96\(%rdi\)
+** movdqu 112\(%rsi\), %xmm0
+** movups %xmm0, 112\(%rdi\)
+** movzbl 128\(%rsi\), %eax
+** movb %al, 128\(%rdi\)
+** ret
+** .cfi_endproc
+**...
+*/
+
+void
+foo (char *dest, const char *src)
+{
+ __builtin_memcpy (dest, src, 129);
+}
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=x86-64 -mtune=znver3 -minline-all-stringops" } */
+/* Keep labels and directives ('.cfi_startproc', '.cfi_endproc'). */
+/* { dg-final { check-function-bodies "**" "" "" { target lp64 } {^\t?\.} } } */
+
+/*
+**foo:
+**.LFB0:
+** .cfi_startproc
+** movl \(%rsi\), %eax
+** movq %rdi, %rcx
+** leaq 4\(%rdi\), %rdi
+** movl %eax, -4\(%rdi\)
+** movl 125\(%rsi\), %eax
+** movl %eax, 121\(%rdi\)
+** andq \$-4, %rdi
+** subq %rdi, %rcx
+** subq %rcx, %rsi
+** addl \$129, %ecx
+** shrl \$2, %ecx
+** rep movsl
+** ret
+** .cfi_endproc
+**...
+*/
+
+#include "builtin-memcpy-1a.c"
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=x86-64 -minline-all-stringops" } */
+/* Keep labels and directives ('.cfi_startproc', '.cfi_endproc'). */
+/* { dg-final { check-function-bodies "**" "" "" { target { lp64 } } {^\t?\.} } } */
+
+/*
+**gcc_memcpy:
+**.LFB0:
+** .cfi_startproc
+** cmpq \$15, %rdx
+** jbe .L9
+**.L1:
+** ret
+** .p2align 4,,10
+** .p2align 3
+**.L9:
+** cmpl \$8, %edx
+** jnb .L10
+** cmpl \$4, %edx
+** jnb .L5
+** cmpl \$1, %edx
+** ja .L6
+** jb .L1
+** movzbl \(%rsi\), %eax
+** movb %al, \(%rdi\)
+** ret
+** .p2align 4,,10
+** .p2align 3
+**.L10:
+** movl %edx, %edx
+** movq \(%rsi\), %rcx
+** movq -8\(%rsi,%rdx\), %rax
+** movq %rcx, \(%rdi\)
+** movq %rax, -8\(%rdi,%rdx\)
+** ret
+** .p2align 4,,10
+** .p2align 3
+**.L5:
+** movl %edx, %edx
+** movl \(%rsi\), %ecx
+** movl -4\(%rsi,%rdx\), %eax
+** movl %ecx, \(%rdi\)
+** movl %eax, -4\(%rdi,%rdx\)
+** ret
+** .p2align 4,,10
+** .p2align 3
+**.L6:
+** movl %edx, %edx
+** movzwl \(%rsi\), %ecx
+** movzwl -2\(%rsi,%rdx\), %eax
+** movw %cx, \(%rdi\)
+** movw %ax, -2\(%rdi,%rdx\)
+** ret
+** .cfi_endproc
+**...
+*/
+
+void
+gcc_memcpy (void *a, void *b, __SIZE_TYPE__ n)
+{
+ if (n < 16)
+ __builtin_memcpy (a, b, n);
+}
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=x86-64-v3 -minline-all-stringops" } */
+/* Keep labels and directives ('.cfi_startproc', '.cfi_endproc'). */
+/* { dg-final { check-function-bodies "**" "" "" { target { lp64 } } {^\t?\.} } } */
+
+/*
+**gcc_memcpy:
+**.LFB0:
+** .cfi_startproc
+** cmpq \$15, %rdx
+** jbe .L10
+**.L8:
+** ret
+** .p2align 4,,10
+** .p2align 3
+**.L10:
+** cmpl \$8, %edx
+** jnb .L11
+** cmpl \$4, %edx
+** jnb .L5
+** cmpl \$1, %edx
+** ja .L6
+** jb .L8
+** movzbl \(%rsi\), %eax
+** movb %al, \(%rdi\)
+** ret
+** .p2align 4,,10
+** .p2align 3
+**.L11:
+** movl %edx, %edx
+** movq \(%rsi\), %rcx
+** movq -8\(%rsi,%rdx\), %rax
+** movq %rcx, \(%rdi\)
+** movq %rax, -8\(%rdi,%rdx\)
+** ret
+** .p2align 4,,10
+** .p2align 3
+**.L5:
+** movl %edx, %edx
+** movl \(%rsi\), %ecx
+** movl -4\(%rsi,%rdx\), %eax
+** movl %ecx, \(%rdi\)
+** movl %eax, -4\(%rdi,%rdx\)
+** ret
+** .p2align 4,,10
+** .p2align 3
+**.L6:
+** movl %edx, %edx
+** movzwl \(%rsi\), %ecx
+** movzwl -2\(%rsi,%rdx\), %eax
+** movw %cx, \(%rdi\)
+** movw %ax, -2\(%rdi,%rdx\)
+** ret
+** .cfi_endproc
+**...
+*/
+
+#include "builtin-memcpy-2a.c"
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=x86-64-v4 -minline-all-stringops" } */
+/* Keep labels and directives ('.cfi_startproc', '.cfi_endproc'). */
+/* { dg-final { check-function-bodies "**" "" "" { target { lp64 } } {^\t?\.} } } */
+
+/*
+**gcc_memcpy:
+**.LFB0:
+** .cfi_startproc
+** cmpq \$15, %rdx
+** jbe .L10
+**.L8:
+** ret
+** .p2align 4,,10
+** .p2align 3
+**.L10:
+** cmpl \$8, %edx
+** jnb .L11
+** cmpl \$4, %edx
+** jnb .L5
+** cmpl \$1, %edx
+** ja .L6
+** jb .L8
+** movzbl \(%rsi\), %eax
+** movb %al, \(%rdi\)
+** ret
+** .p2align 4,,10
+** .p2align 3
+**.L11:
+** movl %edx, %edx
+** movq \(%rsi\), %rcx
+** movq -8\(%rsi,%rdx\), %rax
+** movq %rcx, \(%rdi\)
+** movq %rax, -8\(%rdi,%rdx\)
+** ret
+** .p2align 4,,10
+** .p2align 3
+**.L5:
+** movl %edx, %edx
+** movl \(%rsi\), %ecx
+** movl -4\(%rsi,%rdx\), %eax
+** movl %ecx, \(%rdi\)
+** movl %eax, -4\(%rdi,%rdx\)
+** ret
+** .p2align 4,,10
+** .p2align 3
+**.L6:
+** movl %edx, %edx
+** movzwl \(%rsi\), %ecx
+** movzwl -2\(%rsi,%rdx\), %eax
+** movw %cx, \(%rdi\)
+** movw %ax, -2\(%rdi,%rdx\)
+** ret
+** .cfi_endproc
+**...
+*/
+
+#include "builtin-memcpy-2a.c"
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=x86-64 -minline-all-stringops" } */
+/* Keep labels and directives ('.cfi_startproc', '.cfi_endproc'). */
+/* { dg-final { check-function-bodies "**" "" "" { target { lp64 } } {^\t?\.} } } */
+
+/*
+**gcc_memcpy:
+**.LFB0:
+** .cfi_startproc
+** cmpq \$31, %rdx
+** jbe .L10
+**.L1:
+** ret
+** .p2align 4,,10
+** .p2align 3
+**.L10:
+** cmpl \$16, %edx
+** jnb .L11
+** cmpl \$8, %edx
+** jnb .L5
+** cmpl \$4, %edx
+** jnb .L6
+** cmpl \$1, %edx
+** ja .L7
+** jb .L1
+** movzbl \(%rsi\), %eax
+** movb %al, \(%rdi\)
+** ret
+** .p2align 4,,10
+** .p2align 3
+**.L11:
+** movl %edx, %edx
+** movdqu \(%rsi\), %xmm1
+** movdqu -16\(%rsi,%rdx\), %xmm0
+** movups %xmm1, \(%rdi\)
+** movups %xmm0, -16\(%rdi,%rdx\)
+** ret
+** .p2align 4,,10
+** .p2align 3
+**.L5:
+** movl %edx, %edx
+** movq \(%rsi\), %rcx
+** movq -8\(%rsi,%rdx\), %rax
+** movq %rcx, \(%rdi\)
+** movq %rax, -8\(%rdi,%rdx\)
+** ret
+** .p2align 4,,10
+** .p2align 3
+**.L6:
+** movl %edx, %edx
+** movl \(%rsi\), %ecx
+** movl -4\(%rsi,%rdx\), %eax
+** movl %ecx, \(%rdi\)
+** movl %eax, -4\(%rdi,%rdx\)
+** ret
+** .p2align 4,,10
+** .p2align 3
+**.L7:
+** movl %edx, %edx
+** movzwl \(%rsi\), %ecx
+** movzwl -2\(%rsi,%rdx\), %eax
+** movw %cx, \(%rdi\)
+** movw %ax, -2\(%rdi,%rdx\)
+** ret
+** .cfi_endproc
+**...
+*/
+
+void
+gcc_memcpy (void *a, void *b, __SIZE_TYPE__ n)
+{
+ if (n < 32)
+ __builtin_memcpy (a, b, n);
+}
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=x86-64-v3 -minline-all-stringops" } */
+/* Keep labels and directives ('.cfi_startproc', '.cfi_endproc'). */
+/* { dg-final { check-function-bodies "**" "" "" { target { lp64 } } {^\t?\.} } } */
+
+/*
+**gcc_memcpy:
+**.LFB0:
+** .cfi_startproc
+** cmpq \$31, %rdx
+** jbe .L11
+**.L9:
+** ret
+** .p2align 4,,10
+** .p2align 3
+**.L11:
+** cmpl \$16, %edx
+** jnb .L12
+** cmpl \$8, %edx
+** jnb .L5
+** cmpl \$4, %edx
+** jnb .L6
+** cmpl \$1, %edx
+** ja .L7
+** jb .L9
+** movzbl \(%rsi\), %eax
+** movb %al, \(%rdi\)
+** ret
+** .p2align 4,,10
+** .p2align 3
+**.L12:
+** movl %edx, %edx
+** vmovdqu \(%rsi\), %xmm1
+** vmovdqu -16\(%rsi,%rdx\), %xmm0
+** vmovdqu %xmm1, \(%rdi\)
+** vmovdqu %xmm0, -16\(%rdi,%rdx\)
+** ret
+** .p2align 4,,10
+** .p2align 3
+**.L5:
+** movl %edx, %edx
+** movq \(%rsi\), %rcx
+** movq -8\(%rsi,%rdx\), %rax
+** movq %rcx, \(%rdi\)
+** movq %rax, -8\(%rdi,%rdx\)
+** ret
+** .p2align 4,,10
+** .p2align 3
+**.L6:
+** movl %edx, %edx
+** movl \(%rsi\), %ecx
+** movl -4\(%rsi,%rdx\), %eax
+** movl %ecx, \(%rdi\)
+** movl %eax, -4\(%rdi,%rdx\)
+** ret
+** .p2align 4,,10
+** .p2align 3
+**.L7:
+** movl %edx, %edx
+** movzwl \(%rsi\), %ecx
+** movzwl -2\(%rsi,%rdx\), %eax
+** movw %cx, \(%rdi\)
+** movw %ax, -2\(%rdi,%rdx\)
+** ret
+** .cfi_endproc
+**...
+*/
+
+#include "builtin-memcpy-3a.c"
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=x86-64-v4 -minline-all-stringops" } */
+/* Keep labels and directives ('.cfi_startproc', '.cfi_endproc'). */
+/* { dg-final { check-function-bodies "**" "" "" { target { lp64 } } {^\t?\.} } } */
+
+/*
+**gcc_memcpy:
+**.LFB0:
+** .cfi_startproc
+** cmpq \$31, %rdx
+** jbe .L11
+**.L9:
+** ret
+** .p2align 4,,10
+** .p2align 3
+**.L11:
+** cmpl \$16, %edx
+** jnb .L12
+** cmpl \$8, %edx
+** jnb .L5
+** cmpl \$4, %edx
+** jnb .L6
+** cmpl \$1, %edx
+** ja .L7
+** jb .L9
+** movzbl \(%rsi\), %eax
+** movb %al, \(%rdi\)
+** ret
+** .p2align 4,,10
+** .p2align 3
+**.L12:
+** movl %edx, %edx
+** vmovdqu \(%rsi\), %xmm1
+** vmovdqu -16\(%rsi,%rdx\), %xmm0
+** vmovdqu %xmm1, \(%rdi\)
+** vmovdqu %xmm0, -16\(%rdi,%rdx\)
+** ret
+** .p2align 4,,10
+** .p2align 3
+**.L5:
+** movl %edx, %edx
+** movq \(%rsi\), %rcx
+** movq -8\(%rsi,%rdx\), %rax
+** movq %rcx, \(%rdi\)
+** movq %rax, -8\(%rdi,%rdx\)
+** ret
+** .p2align 4,,10
+** .p2align 3
+**.L6:
+** movl %edx, %edx
+** movl \(%rsi\), %ecx
+** movl -4\(%rsi,%rdx\), %eax
+** movl %ecx, \(%rdi\)
+** movl %eax, -4\(%rdi,%rdx\)
+** ret
+** .p2align 4,,10
+** .p2align 3
+**.L7:
+** movl %edx, %edx
+** movzwl \(%rsi\), %ecx
+** movzwl -2\(%rsi,%rdx\), %eax
+** movw %cx, \(%rdi\)
+** movw %ax, -2\(%rdi,%rdx\)
+** ret
+** .cfi_endproc
+**...
+*/
+
+#include "builtin-memcpy-3a.c"
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=x86-64 -minline-all-stringops" } */
+/* Keep labels and directives ('.cfi_startproc', '.cfi_endproc'). */
+/* { dg-final { check-function-bodies "**" "" "" { target { lp64 } } {^\t?\.} } } */
+
+/*
+**gcc_memcpy:
+**.LFB0:
+** .cfi_startproc
+** cmpq \$63, %rdx
+** jbe .L12
+**.L1:
+** ret
+** .p2align 4,,10
+** .p2align 3
+**.L12:
+** cmpl \$16, %edx
+** jnb .L13
+** cmpl \$8, %edx
+** jnb .L6
+** cmpl \$4, %edx
+** jnb .L7
+** cmpl \$1, %edx
+** ja .L8
+** jb .L1
+** movzbl \(%rsi\), %eax
+** movb %al, \(%rdi\)
+** ret
+** .p2align 4,,10
+** .p2align 3
+**.L13:
+** cmpl \$32, %edx
+** ja .L5
+** movl %edx, %edx
+** movdqu \(%rsi\), %xmm1
+** movdqu -16\(%rsi,%rdx\), %xmm0
+** movups %xmm1, \(%rdi\)
+** movups %xmm0, -16\(%rdi,%rdx\)
+** ret
+** .p2align 4,,10
+** .p2align 3
+**.L5:
+** movl %edx, %edx
+** movdqu \(%rsi\), %xmm3
+** movdqu 16\(%rsi\), %xmm2
+** addq %rdx, %rsi
+** movdqu -16\(%rsi\), %xmm1
+** movdqu -32\(%rsi\), %xmm0
+** movups %xmm3, \(%rdi\)
+** movups %xmm2, 16\(%rdi\)
+** movups %xmm1, -16\(%rdi,%rdx\)
+** movups %xmm0, -32\(%rdi,%rdx\)
+** ret
+** .p2align 4,,10
+** .p2align 3
+**.L6:
+** movl %edx, %edx
+** movq \(%rsi\), %rcx
+** movq -8\(%rsi,%rdx\), %rax
+** movq %rcx, \(%rdi\)
+** movq %rax, -8\(%rdi,%rdx\)
+** ret
+** .p2align 4,,10
+** .p2align 3
+**.L7:
+** movl %edx, %edx
+** movl \(%rsi\), %ecx
+** movl -4\(%rsi,%rdx\), %eax
+** movl %ecx, \(%rdi\)
+** movl %eax, -4\(%rdi,%rdx\)
+** ret
+** .p2align 4,,10
+** .p2align 3
+**.L8:
+** movl %edx, %edx
+** movzwl \(%rsi\), %ecx
+** movzwl -2\(%rsi,%rdx\), %eax
+** movw %cx, \(%rdi\)
+** movw %ax, -2\(%rdi,%rdx\)
+** ret
+** .cfi_endproc
+**...
+*/
+
+void
+gcc_memcpy (void *a, void *b, __SIZE_TYPE__ n)
+{
+ if (n < 64)
+ __builtin_memcpy (a, b, n);
+}
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=x86-64-v3 -minline-all-stringops" } */
+/* Keep labels and directives ('.cfi_startproc', '.cfi_endproc'). */
+/* { dg-final { check-function-bodies "**" "" "" { target { lp64 } } {^\t?\.} } } */
+
+/*
+**gcc_memcpy:
+**.LFB0:
+** .cfi_startproc
+** cmpq \$63, %rdx
+** jbe .L12
+**.L10:
+** ret
+** .p2align 4,,10
+** .p2align 3
+**.L12:
+** cmpl \$32, %edx
+** jnb .L13
+** cmpl \$16, %edx
+** jnb .L5
+** cmpl \$8, %edx
+** jnb .L6
+** cmpl \$4, %edx
+** jnb .L7
+** cmpl \$1, %edx
+** ja .L8
+** jb .L10
+** movzbl \(%rsi\), %eax
+** movb %al, \(%rdi\)
+** ret
+** .p2align 4,,10
+** .p2align 3
+**.L13:
+** movl %edx, %edx
+** vmovdqu \(%rsi\), %ymm1
+** vmovdqu -32\(%rsi,%rdx\), %ymm0
+** vmovdqu %ymm1, \(%rdi\)
+** vmovdqu %ymm0, -32\(%rdi,%rdx\)
+** vzeroupper
+** ret
+** .p2align 4,,10
+** .p2align 3
+**.L5:
+** movl %edx, %edx
+** vmovdqu \(%rsi\), %xmm1
+** vmovdqu -16\(%rsi,%rdx\), %xmm0
+** vmovdqu %xmm1, \(%rdi\)
+** vmovdqu %xmm0, -16\(%rdi,%rdx\)
+** ret
+** .p2align 4,,10
+** .p2align 3
+**.L6:
+** movl %edx, %edx
+** movq \(%rsi\), %rcx
+** movq -8\(%rsi,%rdx\), %rax
+** movq %rcx, \(%rdi\)
+** movq %rax, -8\(%rdi,%rdx\)
+** ret
+** .p2align 4,,10
+** .p2align 3
+**.L7:
+** movl %edx, %edx
+** movl \(%rsi\), %ecx
+** movl -4\(%rsi,%rdx\), %eax
+** movl %ecx, \(%rdi\)
+** movl %eax, -4\(%rdi,%rdx\)
+** ret
+** .p2align 4,,10
+** .p2align 3
+**.L8:
+** movl %edx, %edx
+** movzwl \(%rsi\), %ecx
+** movzwl -2\(%rsi,%rdx\), %eax
+** movw %cx, \(%rdi\)
+** movw %ax, -2\(%rdi,%rdx\)
+** ret
+**...
+*/
+
+#include "builtin-memcpy-4a.c"
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=x86-64-v4 -minline-all-stringops" } */
+/* Keep labels and directives ('.cfi_startproc', '.cfi_endproc'). */
+/* { dg-final { check-function-bodies "**" "" "" { target { lp64 } } {^\t?\.} } } */
+
+/*
+**gcc_memcpy:
+**.LFB0:
+** .cfi_startproc
+** cmpq \$63, %rdx
+** jbe .L12
+**.L10:
+** ret
+** .p2align 4,,10
+** .p2align 3
+**.L12:
+** cmpl \$32, %edx
+** jnb .L13
+** cmpl \$16, %edx
+** jnb .L5
+** cmpl \$8, %edx
+** jnb .L6
+** cmpl \$4, %edx
+** jnb .L7
+** cmpl \$1, %edx
+** ja .L8
+** jb .L10
+** movzbl \(%rsi\), %eax
+** movb %al, \(%rdi\)
+** ret
+** .p2align 4,,10
+** .p2align 3
+**.L13:
+** movl %edx, %edx
+** vmovdqu \(%rsi\), %ymm1
+** vmovdqu -32\(%rsi,%rdx\), %ymm0
+** vmovdqu %ymm1, \(%rdi\)
+** vmovdqu %ymm0, -32\(%rdi,%rdx\)
+** vzeroupper
+** ret
+** .p2align 4,,10
+** .p2align 3
+**.L5:
+** movl %edx, %edx
+** vmovdqu \(%rsi\), %xmm1
+** vmovdqu -16\(%rsi,%rdx\), %xmm0
+** vmovdqu %xmm1, \(%rdi\)
+** vmovdqu %xmm0, -16\(%rdi,%rdx\)
+** ret
+** .p2align 4,,10
+** .p2align 3
+**.L6:
+** movl %edx, %edx
+** movq \(%rsi\), %rcx
+** movq -8\(%rsi,%rdx\), %rax
+** movq %rcx, \(%rdi\)
+** movq %rax, -8\(%rdi,%rdx\)
+** ret
+** .p2align 4,,10
+** .p2align 3
+**.L7:
+** movl %edx, %edx
+** movl \(%rsi\), %ecx
+** movl -4\(%rsi,%rdx\), %eax
+** movl %ecx, \(%rdi\)
+** movl %eax, -4\(%rdi,%rdx\)
+** ret
+** .p2align 4,,10
+** .p2align 3
+**.L8:
+** movl %edx, %edx
+** movzwl \(%rsi\), %ecx
+** movzwl -2\(%rsi,%rdx\), %eax
+** movw %cx, \(%rdi\)
+** movw %ax, -2\(%rdi,%rdx\)
+** ret
+**...
+*/
+
+#include "builtin-memcpy-4a.c"
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=x86-64 -minline-all-stringops" } */
+/* Keep labels and directives ('.cfi_startproc', '.cfi_endproc'). */
+/* { dg-final { check-function-bodies "**" "" "" { target { lp64 } } {^\t?\.} } } */
+
+/*
+**gcc_memcpy:
+**.LFB0:
+** .cfi_startproc
+** cmpq \$127, %rdx
+** jbe .L12
+**.L1:
+** ret
+** .p2align 4,,10
+** .p2align 3
+**.L12:
+** cmpl \$16, %edx
+** jnb .L13
+** cmpl \$8, %edx
+** jnb .L6
+** cmpl \$4, %edx
+** jnb .L7
+** cmpl \$1, %edx
+** ja .L8
+** jb .L1
+** movzbl \(%rsi\), %eax
+** movb %al, \(%rdi\)
+** ret
+** .p2align 4,,10
+** .p2align 3
+**.L13:
+** cmpl \$32, %edx
+** ja .L5
+** movl %edx, %edx
+** movdqu \(%rsi\), %xmm1
+** movdqu -16\(%rsi,%rdx\), %xmm0
+** movups %xmm1, \(%rdi\)
+** movups %xmm0, -16\(%rdi,%rdx\)
+** ret
+** .p2align 4,,10
+** .p2align 3
+**.L5:
+** cmpl \$64, %edx
+** ja .L14
+** movl %edx, %edx
+** movdqu \(%rsi\), %xmm3
+** movdqu 16\(%rsi\), %xmm2
+** addq %rdx, %rsi
+** movdqu -16\(%rsi\), %xmm1
+** movdqu -32\(%rsi\), %xmm0
+** movups %xmm3, \(%rdi\)
+** movups %xmm2, 16\(%rdi\)
+** movups %xmm1, -16\(%rdi,%rdx\)
+** movups %xmm0, -32\(%rdi,%rdx\)
+** ret
+** .p2align 4,,10
+** .p2align 3
+**.L6:
+** movl %edx, %edx
+** movq \(%rsi\), %rcx
+** movq -8\(%rsi,%rdx\), %rax
+** movq %rcx, \(%rdi\)
+** movq %rax, -8\(%rdi,%rdx\)
+** ret
+** .p2align 4,,10
+** .p2align 3
+**.L14:
+** movl %edx, %edx
+** movdqu \(%rsi\), %xmm7
+** movdqu 16\(%rsi\), %xmm6
+** movdqu 32\(%rsi\), %xmm5
+** movdqu 48\(%rsi\), %xmm4
+** addq %rdx, %rsi
+** movdqu -16\(%rsi\), %xmm3
+** movdqu -32\(%rsi\), %xmm2
+** movdqu -48\(%rsi\), %xmm1
+** movdqu -64\(%rsi\), %xmm0
+** movups %xmm7, \(%rdi\)
+** movups %xmm6, 16\(%rdi\)
+** movups %xmm5, 32\(%rdi\)
+** movups %xmm4, 48\(%rdi\)
+** movups %xmm3, -16\(%rdi,%rdx\)
+** movups %xmm2, -32\(%rdi,%rdx\)
+** movups %xmm1, -48\(%rdi,%rdx\)
+** movups %xmm0, -64\(%rdi,%rdx\)
+** ret
+** .p2align 4,,10
+** .p2align 3
+**.L7:
+** movl %edx, %edx
+** movl \(%rsi\), %ecx
+** movl -4\(%rsi,%rdx\), %eax
+** movl %ecx, \(%rdi\)
+** movl %eax, -4\(%rdi,%rdx\)
+** ret
+** .p2align 4,,10
+** .p2align 3
+**.L8:
+** movl %edx, %edx
+** movzwl \(%rsi\), %ecx
+** movzwl -2\(%rsi,%rdx\), %eax
+** movw %cx, \(%rdi\)
+** movw %ax, -2\(%rdi,%rdx\)
+** ret
+** .cfi_endproc
+**...
+*/
+
+void
+gcc_memcpy (void *a, void *b, __SIZE_TYPE__ n)
+{
+ if (n < 128)
+ __builtin_memcpy (a, b, n);
+}
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=x86-64-v3 -minline-all-stringops" } */
+/* Keep labels and directives ('.cfi_startproc', '.cfi_endproc'). */
+/* { dg-final { check-function-bodies "**" "" "" { target { lp64 } } {^\t?\.} } } */
+
+/*
+**gcc_memcpy:
+**.LFB0:
+** .cfi_startproc
+** cmpq \$127, %rdx
+** jbe .L14
+**.L12:
+** ret
+** .p2align 4,,10
+** .p2align 3
+**.L14:
+** cmpl \$32, %edx
+** jnb .L15
+** cmpl \$16, %edx
+** jnb .L6
+** cmpl \$8, %edx
+** jnb .L7
+** cmpl \$4, %edx
+** jnb .L8
+** cmpl \$1, %edx
+** ja .L9
+** jb .L12
+** movzbl \(%rsi\), %eax
+** movb %al, \(%rdi\)
+** ret
+** .p2align 4,,10
+** .p2align 3
+**.L15:
+** cmpl \$64, %edx
+** ja .L5
+** movl %edx, %edx
+** vmovdqu \(%rsi\), %ymm1
+** vmovdqu -32\(%rsi,%rdx\), %ymm0
+** vmovdqu %ymm1, \(%rdi\)
+** vmovdqu %ymm0, -32\(%rdi,%rdx\)
+** vzeroupper
+** ret
+** .p2align 4,,10
+** .p2align 3
+**.L5:
+** movl %edx, %edx
+** vmovdqu \(%rsi\), %ymm3
+** vmovdqu 32\(%rsi\), %ymm2
+** addq %rdx, %rsi
+** vmovdqu -32\(%rsi\), %ymm1
+** vmovdqu -64\(%rsi\), %ymm0
+** vmovdqu %ymm3, \(%rdi\)
+** vmovdqu %ymm2, 32\(%rdi\)
+** vmovdqu %ymm1, -32\(%rdi,%rdx\)
+** vmovdqu %ymm0, -64\(%rdi,%rdx\)
+** vzeroupper
+** ret
+** .p2align 4,,10
+** .p2align 3
+**.L6:
+** movl %edx, %edx
+** vmovdqu \(%rsi\), %xmm1
+** vmovdqu -16\(%rsi,%rdx\), %xmm0
+** vmovdqu %xmm1, \(%rdi\)
+** vmovdqu %xmm0, -16\(%rdi,%rdx\)
+** ret
+** .p2align 4,,10
+** .p2align 3
+**.L7:
+** movl %edx, %edx
+** movq \(%rsi\), %rcx
+** movq -8\(%rsi,%rdx\), %rax
+** movq %rcx, \(%rdi\)
+** movq %rax, -8\(%rdi,%rdx\)
+** ret
+** .p2align 4,,10
+** .p2align 3
+**.L8:
+** movl %edx, %edx
+** movl \(%rsi\), %ecx
+** movl -4\(%rsi,%rdx\), %eax
+** movl %ecx, \(%rdi\)
+** movl %eax, -4\(%rdi,%rdx\)
+** ret
+** .p2align 4,,10
+** .p2align 3
+**.L9:
+** movl %edx, %edx
+** movzwl \(%rsi\), %ecx
+** movzwl -2\(%rsi,%rdx\), %eax
+** movw %cx, \(%rdi\)
+** movw %ax, -2\(%rdi,%rdx\)
+** ret
+**...
+*/
+
+#include "builtin-memcpy-5a.c"
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=x86-64-v4 -minline-all-stringops" } */
+/* Keep labels and directives ('.cfi_startproc', '.cfi_endproc'). */
+/* { dg-final { check-function-bodies "**" "" "" { target { lp64 } } {^\t?\.} } } */
+
+/*
+**gcc_memcpy:
+**.LFB0:
+** .cfi_startproc
+** cmpq \$127, %rdx
+** jbe .L13
+**.L11:
+** ret
+** .p2align 4,,10
+** .p2align 3
+**.L13:
+** cmpl \$64, %edx
+** jnb .L14
+** cmpl \$32, %edx
+** jnb .L5
+** cmpl \$16, %edx
+** jnb .L6
+** cmpl \$8, %edx
+** jnb .L7
+** cmpl \$4, %edx
+** jnb .L8
+** cmpl \$1, %edx
+** ja .L9
+** jb .L11
+** movzbl \(%rsi\), %eax
+** movb %al, \(%rdi\)
+** ret
+** .p2align 4,,10
+** .p2align 3
+**.L14:
+** movl %edx, %edx
+** vmovdqu64 \(%rsi\), %zmm1
+** vmovdqu64 -64\(%rsi,%rdx\), %zmm0
+** vmovdqu64 %zmm1, \(%rdi\)
+** vmovdqu64 %zmm0, -64\(%rdi,%rdx\)
+** vzeroupper
+** ret
+** .p2align 4,,10
+** .p2align 3
+**.L5:
+** movl %edx, %edx
+** vmovdqu \(%rsi\), %ymm1
+** vmovdqu -32\(%rsi,%rdx\), %ymm0
+** vmovdqu %ymm1, \(%rdi\)
+** vmovdqu %ymm0, -32\(%rdi,%rdx\)
+** vzeroupper
+** ret
+** .p2align 4,,10
+** .p2align 3
+**.L6:
+** movl %edx, %edx
+** vmovdqu \(%rsi\), %xmm1
+** vmovdqu -16\(%rsi,%rdx\), %xmm0
+** vmovdqu %xmm1, \(%rdi\)
+** vmovdqu %xmm0, -16\(%rdi,%rdx\)
+** ret
+** .p2align 4,,10
+** .p2align 3
+**.L7:
+** movl %edx, %edx
+** movq \(%rsi\), %rcx
+** movq -8\(%rsi,%rdx\), %rax
+** movq %rcx, \(%rdi\)
+** movq %rax, -8\(%rdi,%rdx\)
+** ret
+** .p2align 4,,10
+** .p2align 3
+**.L8:
+** movl %edx, %edx
+** movl \(%rsi\), %ecx
+** movl -4\(%rsi,%rdx\), %eax
+** movl %ecx, \(%rdi\)
+** movl %eax, -4\(%rdi,%rdx\)
+** ret
+**.L9:
+** movl %edx, %edx
+** movzwl \(%rsi\), %ecx
+** movzwl -2\(%rsi,%rdx\), %eax
+** movw %cx, \(%rdi\)
+** movw %ax, -2\(%rdi,%rdx\)
+** ret
+** .cfi_endproc
+**...
+*/
+
+#include "builtin-memcpy-5a.c"
--- /dev/null
+/* { dg-do compile { target { maybe_x32 && lp64 } } } */
+/* { dg-options "-O2 -mx32 -march=x86-64 -minline-all-stringops" } */
+/* Keep labels and directives ('.cfi_startproc', '.cfi_endproc'). */
+/* { dg-final { check-function-bodies "**" "" "" { target lp64 } {^\t?\.} } } */
+
+
+/*
+**gcc_memcpy:
+**.LFB0:
+** .cfi_startproc
+** cmpl \$64, %edx
+** jnb .L2
+** testb \$32, %dl
+** jne .L19
+** testb \$16, %dl
+** jne .L20
+** testb \$8, %dl
+** jne .L21
+** testb \$4, %dl
+** jne .L22
+** testl %edx, %edx
+** jne .L23
+**.L1:
+** ret
+** .p2align 4,,10
+** .p2align 3
+**.L2:
+** movdqu -64\(%edx,%esi\), %xmm0
+** subl \$1, %edx
+** movups %xmm0, -63\(%edx,%edi\)
+** movdqu -47\(%edx,%esi\), %xmm0
+** movups %xmm0, -47\(%edx,%edi\)
+** movdqu -31\(%edx,%esi\), %xmm0
+** movups %xmm0, -31\(%edx,%edi\)
+** movdqu -15\(%edx,%esi\), %xmm0
+** movups %xmm0, -15\(%edx,%edi\)
+** cmpl \$64, %edx
+** jb .L1
+** andl \$-64, %edx
+** xorl %eax, %eax
+**.L9:
+** movdqu \(%eax,%esi\), %xmm3
+** movdqu 16\(%eax,%esi\), %xmm2
+** addl \$64, %eax
+** movdqu -32\(%eax,%esi\), %xmm1
+** movdqu -16\(%eax,%esi\), %xmm0
+** movups %xmm3, -64\(%eax,%edi\)
+** movups %xmm2, -48\(%eax,%edi\)
+** movups %xmm1, -32\(%eax,%edi\)
+** movups %xmm0, -16\(%eax,%edi\)
+** cmpl %edx, %eax
+** jb .L9
+** ret
+** .p2align 4,,10
+** .p2align 3
+**.L23:
+** movzbl \(%esi\), %eax
+** movb %al, \(%edi\)
+** testb \$2, %dl
+** je .L1
+** movzwl -2\(%edx,%esi\), %eax
+** movw %ax, -2\(%edx,%edi\)
+** ret
+** .p2align 4,,10
+** .p2align 3
+**.L19:
+** movdqu \(%esi\), %xmm0
+** movups %xmm0, \(%edi\)
+** movdqu 16\(%esi\), %xmm0
+** movups %xmm0, 16\(%edi\)
+** movdqu -32\(%edx,%esi\), %xmm0
+** movups %xmm0, -32\(%edx,%edi\)
+** movdqu -16\(%edx,%esi\), %xmm0
+** movups %xmm0, -16\(%edx,%edi\)
+** ret
+** .p2align 4,,10
+** .p2align 3
+**.L20:
+** movdqu \(%esi\), %xmm0
+** movups %xmm0, \(%edi\)
+** movdqu -16\(%edx,%esi\), %xmm0
+** movups %xmm0, -16\(%edx,%edi\)
+** ret
+** .p2align 4,,10
+** .p2align 3
+**.L21:
+** movq \(%esi\), %rax
+** movq %rax, \(%edi\)
+** movq -8\(%edx,%esi\), %rax
+** movq %rax, -8\(%edx,%edi\)
+** ret
+** .p2align 4,,10
+** .p2align 3
+**.L22:
+** movl \(%esi\), %eax
+** movl %eax, \(%edi\)
+** movl -4\(%edx,%esi\), %eax
+** movl %eax, -4\(%edx,%edi\)
+** ret
+** .cfi_endproc
+**...
+*/
+
+void
+gcc_memcpy (void *a, void *b, __SIZE_TYPE__ n)
+{
+ if (n <= (__SIZE_TYPE__) -1)
+ __builtin_memcpy (a, b, n);
+}
--- /dev/null
+/* { dg-do run } */
+/* { dg-options "-O2 -minline-all-stringops" } */
+
+/* Test -O2 -minline-all-stringops on memcpy with various bounds. */
+
+#include <stdlib.h>
+
+#define MAX_LENGTH 4096
+#define EXTRA 64
+#define TOTAL (EXTRA + MAX_LENGTH + EXTRA)
+
+#define MEMCPY_BOUND(BOUND) \
+__attribute__ ((noipa, noinline)) \
+static char * \
+memcpy_##BOUND (char *dest, char *src, size_t len) \
+{ \
+ if (len <= BOUND) \
+ return __builtin_memcpy (dest + EXTRA, src, len); \
+ else \
+ return dest + EXTRA; \
+}
+
+#define CHECK_MEMCPY_BOUND(BOUND, SIZE) \
+ { \
+ for (i = 0; i < TOTAL; i++) \
+ { \
+ dest[i] = 'a'; \
+ src[i] = 'A'; \
+ } \
+ p = memcpy_##BOUND (dest, src, SIZE); \
+ if (p != dest + EXTRA) \
+ abort (); \
+ for (i = 0; i < SIZE; i++, p++) \
+ if (*p != 'A') \
+ abort (); \
+ for (; i < (TOTAL - EXTRA); i++, p++) \
+ if (*p != 'a') \
+ abort (); \
+ p = dest; \
+ for (i = 0; i < EXTRA; i++, p++) \
+ if (*p != 'a') \
+ abort (); \
+ }
+
+#define CHECK_MEMCPY(SIZE) \
+ CHECK_MEMCPY_BOUND (SIZE, SIZE) \
+ if (SIZE > 1) \
+ CHECK_MEMCPY_BOUND (SIZE, SIZE - 1) \
+ if (SIZE > 2) \
+ CHECK_MEMCPY_BOUND (SIZE, SIZE - 2) \
+ if (SIZE > 3) \
+ CHECK_MEMCPY_BOUND (SIZE, SIZE - 3) \
+ if (SIZE > 4) \
+ CHECK_MEMCPY_BOUND (SIZE, SIZE - 4) \
+ if (SIZE > 5) \
+ CHECK_MEMCPY_BOUND (SIZE, SIZE - 5) \
+ if (SIZE > 6) \
+ CHECK_MEMCPY_BOUND (SIZE, SIZE - 6) \
+ if (SIZE > 7) \
+ CHECK_MEMCPY_BOUND (SIZE, SIZE - 7)
+
+char dest[TOTAL];
+char src[TOTAL];
+
+MEMCPY_BOUND (0);
+MEMCPY_BOUND (1);
+MEMCPY_BOUND (2);
+MEMCPY_BOUND (3);
+MEMCPY_BOUND (4);
+MEMCPY_BOUND (5);
+MEMCPY_BOUND (7);
+MEMCPY_BOUND (8);
+MEMCPY_BOUND (9);
+MEMCPY_BOUND (15);
+MEMCPY_BOUND (16);
+MEMCPY_BOUND (17);
+MEMCPY_BOUND (31);
+MEMCPY_BOUND (32);
+MEMCPY_BOUND (33);
+MEMCPY_BOUND (63);
+MEMCPY_BOUND (64);
+MEMCPY_BOUND (65);
+MEMCPY_BOUND (127);
+MEMCPY_BOUND (128);
+MEMCPY_BOUND (129);
+MEMCPY_BOUND (255);
+MEMCPY_BOUND (256);
+MEMCPY_BOUND (257);
+
+int
+main (void)
+{
+ unsigned int i;
+ char *p;
+
+ CHECK_MEMCPY (0);
+ CHECK_MEMCPY (1);
+ CHECK_MEMCPY (2);
+ CHECK_MEMCPY (3);
+ CHECK_MEMCPY (4);
+ CHECK_MEMCPY (5);
+ CHECK_MEMCPY (7);
+ CHECK_MEMCPY (8);
+ CHECK_MEMCPY (9);
+ CHECK_MEMCPY (15);
+ CHECK_MEMCPY (16);
+ CHECK_MEMCPY (17);
+ CHECK_MEMCPY (31);
+ CHECK_MEMCPY (32);
+ CHECK_MEMCPY (33);
+ CHECK_MEMCPY (63);
+ CHECK_MEMCPY (64);
+ CHECK_MEMCPY (65);
+ CHECK_MEMCPY (127);
+ CHECK_MEMCPY (128);
+ CHECK_MEMCPY (129);
+ CHECK_MEMCPY (255);
+ CHECK_MEMCPY (256);
+ CHECK_MEMCPY (257);
+
+ return 0;
+}
--- /dev/null
+/* { dg-do run } */
+/* { dg-options "-O0 -minline-all-stringops" } */
+
+/* Test -O0 -minline-all-stringops on memcpy with various bounds. */
+
+#include "builtin-memcpy-bounded-1a.c"
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=x86-64 -minline-all-stringops" } */
+/* Keep labels and directives ('.cfi_startproc', '.cfi_endproc'). */
+/* { dg-final { check-function-bodies "**" "" "" { target lp64 } {^\t?\.} } } */
+
+/*
+**foo:
+**.LFB0:
+** .cfi_startproc
+** movd %esi, %xmm0
+** movb %sil, 128\(%rdi\)
+** punpcklbw %xmm0, %xmm0
+** punpcklwd %xmm0, %xmm0
+** pshufd \$0, %xmm0, %xmm0
+** movups %xmm0, \(%rdi\)
+** movups %xmm0, 16\(%rdi\)
+** movups %xmm0, 32\(%rdi\)
+** movups %xmm0, 48\(%rdi\)
+** movups %xmm0, 64\(%rdi\)
+** movups %xmm0, 80\(%rdi\)
+** movups %xmm0, 96\(%rdi\)
+** movups %xmm0, 112\(%rdi\)
+** ret
+** .cfi_endproc
+**...
+*/
+
+void
+foo (char *dest, int c)
+{
+ __builtin_memset (dest, c, 129);
+}
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=x86-64 -mtune=znver3 -minline-all-stringops" } */
+/* Keep labels and directives ('.cfi_startproc', '.cfi_endproc'). */
+/* { dg-final { check-function-bodies "**" "" "" { target lp64 } {^\t?\.} } } */
+
+/*
+**foo:
+**.LFB0:
+** .cfi_startproc
+** movzbl %sil, %eax
+** movabsq \$72340172838076673, %rdx
+** movq %rdi, %rcx
+** leaq 8\(%rdi\), %rdi
+** imulq %rdx, %rax
+** movq %rax, -8\(%rdi\)
+** movq %rax, 113\(%rdi\)
+** andq \$-8, %rdi
+** subq %rdi, %rcx
+** addl \$129, %ecx
+** shrl \$3, %ecx
+** rep stosq
+** ret
+** .cfi_endproc
+**...
+*/
+
+#include "builtin-memset-1a.c"
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=x86-64 -minline-all-stringops" } */
+/* Keep labels and directives ('.cfi_startproc', '.cfi_endproc'). */
+/* { dg-final { check-function-bodies "**" "" "" { target lp64 } {^\t?\.} } } */
+
+/*
+**foo:
+**.LFB0:
+** .cfi_startproc
+** cmpq \$15, %rdx
+** jbe .L9
+**.L1:
+** ret
+** .p2align 4,,10
+** .p2align 3
+**.L9:
+** movabsq \$72340172838076673, %rcx
+** movzbl %sil, %eax
+** imulq %rcx, %rax
+** cmpl \$8, %edx
+** jnb .L10
+** cmpl \$4, %edx
+** jnb .L5
+** cmpl \$1, %edx
+** ja .L6
+** jb .L1
+** movb %sil, \(%rdi\)
+** ret
+** .p2align 4,,10
+** .p2align 3
+**.L10:
+** movl %edx, %edx
+** movq %rax, \(%rdi\)
+** movq %rax, -8\(%rdi,%rdx\)
+** ret
+** .p2align 4,,10
+** .p2align 3
+**.L5:
+** movl %edx, %edx
+** movl %eax, \(%rdi\)
+** movl %eax, -4\(%rdi,%rdx\)
+** ret
+** .p2align 4,,10
+** .p2align 3
+**.L6:
+** movl %edx, %edx
+** movw %ax, \(%rdi\)
+** movw %ax, -2\(%rdi,%rdx\)
+** ret
+** .cfi_endproc
+**...
+*/
+
+void
+foo (char *dest, int c, __SIZE_TYPE__ n)
+{
+ if (n < 16)
+ __builtin_memset (dest, c, n);
+}
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=x86-64-v3 -minline-all-stringops" } */
+/* Keep labels and directives ('.cfi_startproc', '.cfi_endproc'). */
+/* { dg-final { check-function-bodies "**" "" "" { target lp64 } {^\t?\.} } } */
+
+/*
+**foo:
+**.LFB0:
+** .cfi_startproc
+** cmpq \$15, %rdx
+** jbe .L10
+**.L8:
+** ret
+** .p2align 4,,10
+** .p2align 3
+**.L10:
+** movabsq \$72340172838076673, %rcx
+** movzbl %sil, %eax
+** imulq %rcx, %rax
+** cmpl \$8, %edx
+** jnb .L11
+** cmpl \$4, %edx
+** jnb .L5
+** cmpl \$1, %edx
+** ja .L6
+** jb .L8
+** movb %sil, \(%rdi\)
+** ret
+** .p2align 4,,10
+** .p2align 3
+**.L11:
+** movl %edx, %edx
+** movq %rax, \(%rdi\)
+** movq %rax, -8\(%rdi,%rdx\)
+** ret
+** .p2align 4,,10
+** .p2align 3
+**.L5:
+** movl %edx, %edx
+** movl %eax, \(%rdi\)
+** movl %eax, -4\(%rdi,%rdx\)
+** ret
+** .p2align 4,,10
+** .p2align 3
+**.L6:
+** movl %edx, %edx
+** movw %ax, \(%rdi\)
+** movw %ax, -2\(%rdi,%rdx\)
+** ret
+** .cfi_endproc
+**...
+*/
+
+#include "builtin-memset-2a.c"
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=x86-64-v4 -minline-all-stringops" } */
+/* Keep labels and directives ('.cfi_startproc', '.cfi_endproc'). */
+/* { dg-final { check-function-bodies "**" "" "" { target lp64 } {^\t?\.} } } */
+
+/*
+**foo:
+**.LFB0:
+** .cfi_startproc
+** cmpq \$15, %rdx
+** jbe .L10
+**.L8:
+** ret
+** .p2align 4,,10
+** .p2align 3
+**.L10:
+** movabsq \$72340172838076673, %rcx
+** movzbl %sil, %eax
+** imulq %rcx, %rax
+** cmpl \$8, %edx
+** jnb .L11
+** cmpl \$4, %edx
+** jnb .L5
+** cmpl \$1, %edx
+** ja .L6
+** jb .L8
+** movb %sil, \(%rdi\)
+** ret
+** .p2align 4,,10
+** .p2align 3
+**.L11:
+** movl %edx, %edx
+** movq %rax, \(%rdi\)
+** movq %rax, -8\(%rdi,%rdx\)
+** ret
+** .p2align 4,,10
+** .p2align 3
+**.L5:
+** movl %edx, %edx
+** movl %eax, \(%rdi\)
+** movl %eax, -4\(%rdi,%rdx\)
+** ret
+** .p2align 4,,10
+** .p2align 3
+**.L6:
+** movl %edx, %edx
+** movw %ax, \(%rdi\)
+** movw %ax, -2\(%rdi,%rdx\)
+** ret
+** .cfi_endproc
+**...
+*/
+
+#include "builtin-memset-2a.c"
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=x86-64 -minline-all-stringops" } */
+/* Keep labels and directives ('.cfi_startproc', '.cfi_endproc'). */
+/* { dg-final { check-function-bodies "**" "" "" { target lp64 } {^\t?\.} } } */
+
+/*
+**foo:
+**.LFB0:
+** .cfi_startproc
+** cmpq \$31, %rdx
+** jbe .L10
+**.L1:
+** ret
+** .p2align 4,,10
+** .p2align 3
+**.L10:
+** movabsq \$72340172838076673, %rcx
+** movzbl %sil, %eax
+** imulq %rcx, %rax
+** movq %rax, %xmm0
+** punpcklqdq %xmm0, %xmm0
+** cmpl \$16, %edx
+** jnb .L11
+** cmpl \$8, %edx
+** jnb .L5
+** cmpl \$4, %edx
+** jnb .L6
+** cmpl \$1, %edx
+** ja .L7
+** jb .L1
+** movb %sil, \(%rdi\)
+** ret
+** .p2align 4,,10
+** .p2align 3
+**.L11:
+** movl %edx, %edx
+** movups %xmm0, \(%rdi\)
+** movups %xmm0, -16\(%rdi,%rdx\)
+** ret
+** .p2align 4,,10
+** .p2align 3
+**.L5:
+** movl %edx, %edx
+** movq %rax, \(%rdi\)
+** movq %rax, -8\(%rdi,%rdx\)
+** ret
+** .p2align 4,,10
+** .p2align 3
+**.L6:
+** movl %edx, %edx
+** movl %eax, \(%rdi\)
+** movl %eax, -4\(%rdi,%rdx\)
+** ret
+** .p2align 4,,10
+** .p2align 3
+**.L7:
+** movl %edx, %edx
+** movw %ax, \(%rdi\)
+** movw %ax, -2\(%rdi,%rdx\)
+** ret
+** .cfi_endproc
+**...
+*/
+
+void
+foo (char *dest, int c, __SIZE_TYPE__ n)
+{
+ if (n < 32)
+ __builtin_memset (dest, c, n);
+}
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=x86-64-v3 -minline-all-stringops" } */
+/* Keep labels and directives ('.cfi_startproc', '.cfi_endproc'). */
+/* { dg-final { check-function-bodies "**" "" "" { target lp64 } {^\t?\.} } } */
+
+/*
+**foo:
+**.LFB0:
+** .cfi_startproc
+** cmpq \$31, %rdx
+** jbe .L11
+**.L9:
+** ret
+** .p2align 4,,10
+** .p2align 3
+**.L11:
+** movabsq \$72340172838076673, %rcx
+** movzbl %sil, %eax
+** imulq %rcx, %rax
+** vmovq %rax, %xmm1
+** vpunpcklqdq %xmm1, %xmm1, %xmm0
+** cmpl \$16, %edx
+** jnb .L12
+** cmpl \$8, %edx
+** jnb .L5
+** cmpl \$4, %edx
+** jnb .L6
+** cmpl \$1, %edx
+** ja .L7
+** jb .L9
+** movb %sil, \(%rdi\)
+** ret
+** .p2align 4,,10
+** .p2align 3
+**.L12:
+** movl %edx, %edx
+** vmovdqu %xmm0, \(%rdi\)
+** vmovdqu %xmm0, -16\(%rdi,%rdx\)
+** ret
+** .p2align 4,,10
+** .p2align 3
+**.L5:
+** movl %edx, %edx
+** movq %rax, \(%rdi\)
+** movq %rax, -8\(%rdi,%rdx\)
+** ret
+** .p2align 4,,10
+** .p2align 3
+**.L6:
+** movl %edx, %edx
+** movl %eax, \(%rdi\)
+** movl %eax, -4\(%rdi,%rdx\)
+** ret
+** .p2align 4,,10
+** .p2align 3
+**.L7:
+** movl %edx, %edx
+** movw %ax, \(%rdi\)
+** movw %ax, -2\(%rdi,%rdx\)
+** ret
+** .cfi_endproc
+**...
+*/
+
+#include "builtin-memset-3a.c"
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=x86-64-v4 -minline-all-stringops" } */
+/* Keep labels and directives ('.cfi_startproc', '.cfi_endproc'). */
+/* { dg-final { check-function-bodies "**" "" "" { target lp64 } {^\t?\.} } } */
+
+/*
+**foo:
+**...
+*/
+
+#include "builtin-memset-3a.c"
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=x86-64 -minline-all-stringops" } */
+/* Keep labels and directives ('.cfi_startproc', '.cfi_endproc'). */
+/* { dg-final { check-function-bodies "**" "" "" { target lp64 } {^\t?\.} } } */
+
+/*
+**foo:
+**.LFB0:
+** .cfi_startproc
+** cmpq \$63, %rdx
+** jbe .L12
+**.L1:
+** ret
+** .p2align 4,,10
+** .p2align 3
+**.L12:
+** movabsq \$72340172838076673, %rcx
+** movzbl %sil, %eax
+** imulq %rcx, %rax
+** movq %rax, %xmm0
+** punpcklqdq %xmm0, %xmm0
+** cmpl \$16, %edx
+** jnb .L13
+** cmpl \$8, %edx
+** jnb .L6
+** cmpl \$4, %edx
+** jnb .L7
+** cmpl \$1, %edx
+** ja .L8
+** jb .L1
+** movb %sil, \(%rdi\)
+** ret
+** .p2align 4,,10
+** .p2align 3
+**.L13:
+** movups %xmm0, \(%rdi\)
+** cmpl \$32, %edx
+** ja .L5
+** movl %edx, %edx
+** movups %xmm0, -16\(%rdi,%rdx\)
+** ret
+** .p2align 4,,10
+** .p2align 3
+**.L5:
+** movl %edx, %edx
+** movups %xmm0, 16\(%rdi\)
+** movups %xmm0, -16\(%rdi,%rdx\)
+** movups %xmm0, -32\(%rdi,%rdx\)
+** ret
+** .p2align 4,,10
+** .p2align 3
+**.L6:
+** movl %edx, %edx
+** movq %rax, \(%rdi\)
+** movq %rax, -8\(%rdi,%rdx\)
+** ret
+** .p2align 4,,10
+** .p2align 3
+**.L7:
+** movl %edx, %edx
+** movl %eax, \(%rdi\)
+** movl %eax, -4\(%rdi,%rdx\)
+** ret
+** .p2align 4,,10
+** .p2align 3
+**.L8:
+** movl %edx, %edx
+** movw %ax, \(%rdi\)
+** movw %ax, -2\(%rdi,%rdx\)
+** ret
+** .cfi_endproc
+**...
+*/
+
+void
+foo (char *dest, int c, __SIZE_TYPE__ n)
+{
+ if (n < 64)
+ __builtin_memset (dest, c, n);
+}
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=x86-64-v3 -minline-all-stringops" } */
+/* Keep labels and directives ('.cfi_startproc', '.cfi_endproc'). */
+/* { dg-final { check-function-bodies "**" "" "" { target lp64 } {^\t?\.} } } */
+
+/*
+**foo:
+**.LFB0:
+** .cfi_startproc
+** cmpq \$63, %rdx
+** jbe .L13
+** ret
+** .p2align 4,,10
+** .p2align 3
+**.L13:
+** movabsq \$72340172838076673, %rcx
+** movzbl %sil, %eax
+** imulq %rcx, %rax
+** vmovq %rax, %xmm1
+** vpbroadcastq %xmm1, %ymm0
+** cmpl \$32, %edx
+** jnb .L14
+** cmpl \$16, %edx
+** jnb .L5
+** cmpl \$8, %edx
+** jnb .L6
+** cmpl \$4, %edx
+** jnb .L7
+** cmpl \$1, %edx
+** ja .L8
+** jb .L10
+** movb %sil, \(%rdi\)
+** vzeroupper
+** ret
+** .p2align 4,,10
+** .p2align 3
+**.L14:
+** movl %edx, %edx
+** vmovdqu %ymm0, \(%rdi\)
+** vmovdqu %ymm0, -32\(%rdi,%rdx\)
+** vzeroupper
+** ret
+** .p2align 4,,10
+** .p2align 3
+**.L5:
+** movl %edx, %edx
+** vmovdqu %xmm0, \(%rdi\)
+** vmovdqu %xmm0, -16\(%rdi,%rdx\)
+** vzeroupper
+** ret
+** .p2align 4,,10
+** .p2align 3
+**.L6:
+** movl %edx, %edx
+** movq %rax, \(%rdi\)
+** movq %rax, -8\(%rdi,%rdx\)
+** vzeroupper
+** ret
+** .p2align 4,,10
+** .p2align 3
+**.L7:
+** movl %edx, %edx
+** movl %eax, \(%rdi\)
+** movl %eax, -4\(%rdi,%rdx\)
+** vzeroupper
+** ret
+** .p2align 4,,10
+** .p2align 3
+**.L8:
+** movl %edx, %edx
+** movw %ax, \(%rdi\)
+** movw %ax, -2\(%rdi,%rdx\)
+** vzeroupper
+** ret
+**.L10:
+** vzeroupper
+** ret
+** .cfi_endproc
+**...
+*/
+
+#include "builtin-memset-4a.c"
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=x86-64-v4 -minline-all-stringops" } */
+/* Keep labels and directives ('.cfi_startproc', '.cfi_endproc'). */
+/* { dg-final { check-function-bodies "**" "" "" { target lp64 } {^\t?\.} } } */
+
+/*
+**foo:
+**.LFB0:
+** .cfi_startproc
+** cmpq \$63, %rdx
+** jbe .L13
+** ret
+** .p2align 4,,10
+** .p2align 3
+**.L13:
+** movabsq \$72340172838076673, %rcx
+** movzbl %sil, %eax
+** imulq %rcx, %rax
+** vpbroadcastq %rax, %ymm0
+** cmpl \$32, %edx
+** jnb .L14
+** cmpl \$16, %edx
+** jnb .L5
+** cmpl \$8, %edx
+** jnb .L6
+** cmpl \$4, %edx
+** jnb .L7
+** cmpl \$1, %edx
+** ja .L8
+** jb .L10
+** movb %sil, \(%rdi\)
+** vzeroupper
+** ret
+** .p2align 4,,10
+** .p2align 3
+**.L14:
+** movl %edx, %edx
+** vmovdqu %ymm0, \(%rdi\)
+** vmovdqu %ymm0, -32\(%rdi,%rdx\)
+** vzeroupper
+** ret
+** .p2align 4,,10
+** .p2align 3
+**.L5:
+** movl %edx, %edx
+** vmovdqu %xmm0, \(%rdi\)
+** vmovdqu %xmm0, -16\(%rdi,%rdx\)
+** vzeroupper
+** ret
+** .p2align 4,,10
+** .p2align 3
+**.L6:
+** movl %edx, %edx
+** movq %rax, \(%rdi\)
+** movq %rax, -8\(%rdi,%rdx\)
+** vzeroupper
+** ret
+** .p2align 4,,10
+** .p2align 3
+**.L7:
+** movl %edx, %edx
+** movl %eax, \(%rdi\)
+** movl %eax, -4\(%rdi,%rdx\)
+** vzeroupper
+** ret
+** .p2align 4,,10
+** .p2align 3
+**.L8:
+** movl %edx, %edx
+** movw %ax, \(%rdi\)
+** movw %ax, -2\(%rdi,%rdx\)
+** vzeroupper
+** ret
+**.L10:
+** vzeroupper
+** ret
+** .cfi_endproc
+**...
+*/
+
+#include "builtin-memset-4a.c"
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=x86-64 -minline-all-stringops" } */
+/* Keep labels and directives ('.cfi_startproc', '.cfi_endproc'). */
+/* { dg-final { check-function-bodies "**" "" "" { target lp64 } {^\t?\.} } } */
+
+/*
+**foo:
+**.LFB0:
+** .cfi_startproc
+** cmpq \$127, %rdx
+** jbe .L12
+**.L1:
+** ret
+** .p2align 4,,10
+** .p2align 3
+**.L12:
+** movabsq \$72340172838076673, %rcx
+** movzbl %sil, %eax
+** imulq %rcx, %rax
+** movq %rax, %xmm0
+** punpcklqdq %xmm0, %xmm0
+** cmpl \$16, %edx
+** jnb .L13
+** cmpl \$8, %edx
+** jnb .L6
+** cmpl \$4, %edx
+** jnb .L7
+** cmpl \$1, %edx
+** ja .L8
+** jb .L1
+** movb %sil, \(%rdi\)
+** ret
+** .p2align 4,,10
+** .p2align 3
+**.L13:
+** movups %xmm0, \(%rdi\)
+** cmpl \$32, %edx
+** ja .L5
+** movl %edx, %edx
+** movups %xmm0, -16\(%rdi,%rdx\)
+** ret
+** .p2align 4,,10
+** .p2align 3
+**.L5:
+** movups %xmm0, 16\(%rdi\)
+** cmpl \$64, %edx
+** ja .L14
+** movl %edx, %edx
+** movups %xmm0, -16\(%rdi,%rdx\)
+** movups %xmm0, -32\(%rdi,%rdx\)
+** ret
+** .p2align 4,,10
+** .p2align 3
+**.L6:
+** movl %edx, %edx
+** movq %rax, \(%rdi\)
+** movq %rax, -8\(%rdi,%rdx\)
+** ret
+** .p2align 4,,10
+** .p2align 3
+**.L14:
+** movl %edx, %edx
+** movups %xmm0, 32\(%rdi\)
+** movups %xmm0, 48\(%rdi\)
+** movups %xmm0, -16\(%rdi,%rdx\)
+** movups %xmm0, -32\(%rdi,%rdx\)
+** movups %xmm0, -48\(%rdi,%rdx\)
+** movups %xmm0, -64\(%rdi,%rdx\)
+** ret
+** .p2align 4,,10
+** .p2align 3
+**.L7:
+** movl %edx, %edx
+** movl %eax, \(%rdi\)
+** movl %eax, -4\(%rdi,%rdx\)
+** ret
+** .p2align 4,,10
+** .p2align 3
+**.L8:
+** movl %edx, %edx
+** movw %ax, \(%rdi\)
+** movw %ax, -2\(%rdi,%rdx\)
+** ret
+** .cfi_endproc
+**...
+*/
+
+void
+foo (char *dest, int c, __SIZE_TYPE__ n)
+{
+ if (n < 128)
+ __builtin_memset (dest, c, n);
+}
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=x86-64-v3 -minline-all-stringops" } */
+/* Keep labels and directives ('.cfi_startproc', '.cfi_endproc'). */
+/* { dg-final { check-function-bodies "**" "" "" { target lp64 } {^\t?\.} } } */
+
+/*
+**foo:
+**.LFB0:
+** .cfi_startproc
+** cmpq \$127, %rdx
+** jbe .L15
+** ret
+** .p2align 4,,10
+** .p2align 3
+**.L15:
+** movabsq \$72340172838076673, %rcx
+** movzbl %sil, %eax
+** imulq %rcx, %rax
+** vmovq %rax, %xmm1
+** vpbroadcastq %xmm1, %ymm0
+** cmpl \$32, %edx
+** jnb .L16
+** cmpl \$16, %edx
+** jnb .L6
+** cmpl \$8, %edx
+** jnb .L7
+** cmpl \$4, %edx
+** jnb .L8
+** cmpl \$1, %edx
+** ja .L9
+** jb .L12
+** movb %sil, \(%rdi\)
+** vzeroupper
+** ret
+** .p2align 4,,10
+** .p2align 3
+**.L16:
+** vmovdqu %ymm0, \(%rdi\)
+** cmpl \$64, %edx
+** ja .L5
+** movl %edx, %edx
+** vmovdqu %ymm0, -32\(%rdi,%rdx\)
+** vzeroupper
+** ret
+** .p2align 4,,10
+** .p2align 3
+**.L5:
+** movl %edx, %edx
+** vmovdqu %ymm0, 32\(%rdi\)
+** vmovdqu %ymm0, -32\(%rdi,%rdx\)
+** vmovdqu %ymm0, -64\(%rdi,%rdx\)
+** vzeroupper
+** ret
+** .p2align 4,,10
+** .p2align 3
+**.L6:
+** movl %edx, %edx
+** vmovdqu %xmm0, \(%rdi\)
+** vmovdqu %xmm0, -16\(%rdi,%rdx\)
+** vzeroupper
+** ret
+** .p2align 4,,10
+** .p2align 3
+**.L7:
+** movl %edx, %edx
+** movq %rax, \(%rdi\)
+** movq %rax, -8\(%rdi,%rdx\)
+** vzeroupper
+** ret
+** .p2align 4,,10
+** .p2align 3
+**.L8:
+** movl %edx, %edx
+** movl %eax, \(%rdi\)
+** movl %eax, -4\(%rdi,%rdx\)
+** vzeroupper
+** ret
+** .p2align 4,,10
+** .p2align 3
+**.L9:
+** movl %edx, %edx
+** movw %ax, \(%rdi\)
+** movw %ax, -2\(%rdi,%rdx\)
+** vzeroupper
+** ret
+**.L12:
+** vzeroupper
+** ret
+** .cfi_endproc
+**...
+*/
+
+#include "builtin-memset-5a.c"
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=x86-64-v4 -minline-all-stringops" } */
+/* Keep labels and directives ('.cfi_startproc', '.cfi_endproc'). */
+/* { dg-final { check-function-bodies "**" "" "" { target lp64 } {^\t?\.} } } */
+
+/*
+**foo:
+**.LFB0:
+** .cfi_startproc
+** cmpq \$127, %rdx
+** jbe .L14
+** ret
+** .p2align 4,,10
+** .p2align 3
+**.L14:
+** movabsq \$72340172838076673, %rcx
+** movzbl %sil, %eax
+** imulq %rcx, %rax
+** vpbroadcastq %rax, %zmm0
+** cmpl \$64, %edx
+** jnb .L15
+** cmpl \$32, %edx
+** jnb .L5
+** cmpl \$16, %edx
+** jnb .L6
+** cmpl \$8, %edx
+** jnb .L7
+** cmpl \$4, %edx
+** jnb .L8
+** cmpl \$1, %edx
+** ja .L9
+** jb .L11
+** movb %sil, \(%rdi\)
+** vzeroupper
+** ret
+** .p2align 4,,10
+** .p2align 3
+**.L15:
+** movl %edx, %edx
+** vmovdqu8 %zmm0, \(%rdi\)
+** vmovdqu8 %zmm0, -64\(%rdi,%rdx\)
+** vzeroupper
+** ret
+** .p2align 4,,10
+** .p2align 3
+**.L5:
+** movl %edx, %edx
+** vmovdqu %ymm0, \(%rdi\)
+** vmovdqu %ymm0, -32\(%rdi,%rdx\)
+** vzeroupper
+** ret
+** .p2align 4,,10
+** .p2align 3
+**.L6:
+** movl %edx, %edx
+** vmovdqu %xmm0, \(%rdi\)
+** vmovdqu %xmm0, -16\(%rdi,%rdx\)
+** vzeroupper
+** ret
+** .p2align 4,,10
+** .p2align 3
+**.L7:
+** movl %edx, %edx
+** movq %rax, \(%rdi\)
+** movq %rax, -8\(%rdi,%rdx\)
+** vzeroupper
+** ret
+** .p2align 4,,10
+** .p2align 3
+**.L8:
+** movl %edx, %edx
+** movl %eax, \(%rdi\)
+** movl %eax, -4\(%rdi,%rdx\)
+** vzeroupper
+** ret
+**.L9:
+** movl %edx, %edx
+** movw %ax, \(%rdi\)
+** movw %ax, -2\(%rdi,%rdx\)
+** vzeroupper
+** ret
+**.L11:
+** vzeroupper
+** ret
+** .cfi_endproc
+**...
+*/
+
+#include "builtin-memset-5a.c"
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=x86-64 -minline-all-stringops" } */
+/* Keep labels and directives ('.cfi_startproc', '.cfi_endproc'). */
+/* { dg-final { check-function-bodies "**" "" "" { target lp64 } {^\t?\.} } } */
+
+/*
+**foo:
+**.LFB0:
+** .cfi_startproc
+** cmpq \$247, %rsi
+** jbe .L16
+**.L1:
+** ret
+** .p2align 4,,10
+** .p2align 3
+**.L16:
+** movq %rdi, %rax
+** cmpl \$16, %esi
+** jnb .L17
+** cmpl \$8, %esi
+** jnb .L6
+** cmpl \$4, %esi
+** jnb .L7
+** cmpl \$1, %esi
+** ja .L8
+** jb .L1
+** movb \$0, \(%rdi\)
+** ret
+** .p2align 4,,10
+** .p2align 3
+**.L17:
+** pxor %xmm0, %xmm0
+** cmpl \$32, %esi
+** ja .L5
+** movl %esi, %esi
+** movups %xmm0, \(%rdi\)
+** movups %xmm0, -16\(%rdi,%rsi\)
+** ret
+** .p2align 4,,10
+** .p2align 3
+**.L5:
+** cmpl \$128, %esi
+** ja .L10
+** movups %xmm0, \(%rdi\)
+** movups %xmm0, 16\(%rdi\)
+** cmpl \$64, %esi
+** jbe .L11
+** movups %xmm0, 32\(%rdi\)
+** movups %xmm0, 48\(%rdi\)
+**.L14:
+** movl %esi, %esi
+** movups %xmm0, -16\(%rdi,%rsi\)
+** movups %xmm0, -32\(%rdi,%rsi\)
+** movups %xmm0, -48\(%rdi,%rsi\)
+** movups %xmm0, -64\(%rdi,%rsi\)
+** ret
+** .p2align 4,,10
+** .p2align 3
+**.L6:
+** movl %esi, %esi
+** movq \$0, \(%rdi\)
+** movq \$0, -8\(%rdi,%rsi\)
+** ret
+** .p2align 4,,10
+** .p2align 3
+**.L10:
+** movl %esi, %edx
+**.L12:
+** subl \$64, %edx
+** movups %xmm0, \(%rax\)
+** addq \$64, %rax
+** movups %xmm0, -48\(%rax\)
+** movups %xmm0, -32\(%rax\)
+** movups %xmm0, -16\(%rax\)
+** cmpl \$64, %edx
+** ja .L12
+** jmp .L14
+** .p2align 4,,10
+** .p2align 3
+**.L7:
+** movl %esi, %esi
+** movl \$0, \(%rdi\)
+** movl \$0, -4\(%rdi,%rsi\)
+** ret
+** .p2align 4,,10
+** .p2align 3
+**.L11:
+** movl %esi, %esi
+** movups %xmm0, -16\(%rdi,%rsi\)
+** movups %xmm0, -32\(%rdi,%rsi\)
+** ret
+** .p2align 4,,10
+** .p2align 3
+**.L8:
+** xorl %eax, %eax
+** movl %esi, %esi
+** movw %ax, \(%rdi\)
+** movw %ax, -2\(%rdi,%rsi\)
+** ret
+** .cfi_endproc
+**...
+*/
+
+void
+foo (char *dst, __SIZE_TYPE__ n)
+{
+ if (n <= 247)
+ __builtin_memset(dst, 0, n);
+}
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=x86-64-v3 -minline-all-stringops" } */
+/* Keep labels and directives ('.cfi_startproc', '.cfi_endproc'). */
+/* { dg-final { check-function-bodies "**" "" "" { target lp64 } {^\t?\.} } } */
+
+/*
+**foo:
+**.LFB0:
+** .cfi_startproc
+** cmpq \$247, %rsi
+** jbe .L14
+**.L12:
+** ret
+** .p2align 4,,10
+** .p2align 3
+**.L14:
+** vpxor %xmm0, %xmm0, %xmm0
+** cmpl \$32, %esi
+** jnb .L15
+** cmpl \$16, %esi
+** jnb .L6
+** cmpl \$8, %esi
+** jnb .L7
+** cmpl \$4, %esi
+** jnb .L8
+** cmpl \$1, %esi
+** ja .L9
+** jb .L12
+** movb \$0, \(%rdi\)
+** ret
+** .p2align 4,,10
+** .p2align 3
+**.L15:
+** vmovdqu %ymm0, \(%rdi\)
+** cmpl \$64, %esi
+** ja .L5
+** movl %esi, %esi
+** vmovdqu %ymm0, -32\(%rdi,%rsi\)
+** vzeroupper
+** ret
+** .p2align 4,,10
+** .p2align 3
+**.L5:
+** vmovdqu %ymm0, 32\(%rdi\)
+** cmpl \$128, %esi
+** ja .L16
+** movl %esi, %esi
+** vmovdqu %ymm0, -32\(%rdi,%rsi\)
+** vmovdqu %ymm0, -64\(%rdi,%rsi\)
+** vzeroupper
+** ret
+** .p2align 4,,10
+** .p2align 3
+**.L6:
+** movl %esi, %esi
+** vmovdqu %xmm0, \(%rdi\)
+** vmovdqu %xmm0, -16\(%rdi,%rsi\)
+** ret
+** .p2align 4,,10
+** .p2align 3
+**.L16:
+** movl %esi, %esi
+** vmovdqu %ymm0, 64\(%rdi\)
+** vmovdqu %ymm0, 96\(%rdi\)
+** vmovdqu %ymm0, -32\(%rdi,%rsi\)
+** vmovdqu %ymm0, -64\(%rdi,%rsi\)
+** vmovdqu %ymm0, -96\(%rdi,%rsi\)
+** vmovdqu %ymm0, -128\(%rdi,%rsi\)
+** vzeroupper
+** ret
+** .p2align 4,,10
+** .p2align 3
+**.L7:
+** movl %esi, %esi
+** movq \$0, \(%rdi\)
+** movq \$0, -8\(%rdi,%rsi\)
+** ret
+** .p2align 4,,10
+** .p2align 3
+**.L8:
+** movl %esi, %esi
+** movl \$0, \(%rdi\)
+** movl \$0, -4\(%rdi,%rsi\)
+** ret
+** .p2align 4,,10
+** .p2align 3
+**.L9:
+** xorl %eax, %eax
+** movl %esi, %esi
+** movw %ax, \(%rdi\)
+** movw %ax, -2\(%rdi,%rsi\)
+** ret
+** .cfi_endproc
+**...
+*/
+
+#include "builtin-memset-6a.c"
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=x86-64-v4 -minline-all-stringops" } */
+/* Keep labels and directives ('.cfi_startproc', '.cfi_endproc'). */
+/* { dg-final { check-function-bodies "**" "" "" { target lp64 } {^\t?\.} } } */
+
+/*
+**foo:
+**.LFB0:
+** .cfi_startproc
+** cmpq \$247, %rsi
+** jbe .L15
+**.L13:
+** ret
+** .p2align 4,,10
+** .p2align 3
+**.L15:
+** vpxor %xmm0, %xmm0, %xmm0
+** cmpl \$64, %esi
+** jnb .L16
+** cmpl \$32, %esi
+** jnb .L6
+** cmpl \$16, %esi
+** jnb .L7
+** cmpl \$8, %esi
+** jnb .L8
+** cmpl \$4, %esi
+** jnb .L9
+** cmpl \$1, %esi
+** ja .L10
+** jb .L13
+** movb \$0, \(%rdi\)
+** ret
+** .p2align 4,,10
+** .p2align 3
+**.L16:
+** vmovdqu8 %zmm0, \(%rdi\)
+** cmpl \$128, %esi
+** ja .L5
+** movl %esi, %esi
+** vmovdqu8 %zmm0, -64\(%rdi,%rsi\)
+** vzeroupper
+** ret
+** .p2align 4,,10
+** .p2align 3
+**.L5:
+** movl %esi, %esi
+** vmovdqu8 %zmm0, 64\(%rdi\)
+** vmovdqu8 %zmm0, -64\(%rdi,%rsi\)
+** vmovdqu8 %zmm0, -128\(%rdi,%rsi\)
+** vzeroupper
+** ret
+** .p2align 4,,10
+** .p2align 3
+**.L6:
+** movl %esi, %esi
+** vmovdqu %ymm0, \(%rdi\)
+** vmovdqu %ymm0, -32\(%rdi,%rsi\)
+** vzeroupper
+** ret
+** .p2align 4,,10
+** .p2align 3
+**.L7:
+** movl %esi, %esi
+** vmovdqu %xmm0, \(%rdi\)
+** vmovdqu %xmm0, -16\(%rdi,%rsi\)
+** ret
+** .p2align 4,,10
+** .p2align 3
+**.L8:
+** movl %esi, %esi
+** movq \$0, \(%rdi\)
+** movq \$0, -8\(%rdi,%rsi\)
+** ret
+** .p2align 4,,10
+** .p2align 3
+**.L9:
+** movl %esi, %esi
+** movl \$0, \(%rdi\)
+** movl \$0, -4\(%rdi,%rsi\)
+** ret
+**.L10:
+** xorl %eax, %eax
+** movl %esi, %esi
+** movw %ax, \(%rdi\)
+** movw %ax, -2\(%rdi,%rsi\)
+** ret
+** .cfi_endproc
+**...
+*/
+
+#include "builtin-memset-6a.c"
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=x86-64 -minline-all-stringops" } */
+/* Keep labels and directives ('.cfi_startproc', '.cfi_endproc'). */
+/* { dg-final { check-function-bodies "**" "" "" { target lp64 } {^\t?\.} } } */
+
+/*
+**foo:
+**.LFB0:
+** .cfi_startproc
+** cmpq \$247, %rsi
+** jbe .L16
+**.L1:
+** ret
+** .p2align 4,,10
+** .p2align 3
+**.L16:
+** movq %rdi, %rax
+** cmpl \$16, %esi
+** jnb .L17
+** cmpl \$8, %esi
+** jnb .L6
+** cmpl \$4, %esi
+** jnb .L7
+** cmpl \$1, %esi
+** ja .L8
+** jb .L1
+** movb \$-1, \(%rdi\)
+** ret
+** .p2align 4,,10
+** .p2align 3
+**.L17:
+** pcmpeqd %xmm0, %xmm0
+** cmpl \$32, %esi
+** ja .L5
+** movl %esi, %esi
+** movups %xmm0, \(%rdi\)
+** movups %xmm0, -16\(%rdi,%rsi\)
+** ret
+** .p2align 4,,10
+** .p2align 3
+**.L5:
+** cmpl \$128, %esi
+** ja .L10
+** movups %xmm0, \(%rdi\)
+** movups %xmm0, 16\(%rdi\)
+** cmpl \$64, %esi
+** jbe .L11
+** movups %xmm0, 32\(%rdi\)
+** movups %xmm0, 48\(%rdi\)
+**.L14:
+** movl %esi, %esi
+** movups %xmm0, -16\(%rdi,%rsi\)
+** movups %xmm0, -32\(%rdi,%rsi\)
+** movups %xmm0, -48\(%rdi,%rsi\)
+** movups %xmm0, -64\(%rdi,%rsi\)
+** ret
+** .p2align 4,,10
+** .p2align 3
+**.L6:
+** movl %esi, %esi
+** movq \$-1, \(%rdi\)
+** movq \$-1, -8\(%rdi,%rsi\)
+** ret
+** .p2align 4,,10
+** .p2align 3
+**.L10:
+** movl %esi, %edx
+**.L12:
+** subl \$64, %edx
+** movups %xmm0, \(%rax\)
+** addq \$64, %rax
+** movups %xmm0, -48\(%rax\)
+** movups %xmm0, -32\(%rax\)
+** movups %xmm0, -16\(%rax\)
+** cmpl \$64, %edx
+** ja .L12
+** jmp .L14
+** .p2align 4,,10
+** .p2align 3
+**.L7:
+** movl %esi, %esi
+** movl \$-1, \(%rdi\)
+** movl \$-1, -4\(%rdi,%rsi\)
+** ret
+** .p2align 4,,10
+** .p2align 3
+**.L11:
+** movl %esi, %esi
+** movups %xmm0, -16\(%rdi,%rsi\)
+** movups %xmm0, -32\(%rdi,%rsi\)
+** ret
+** .p2align 4,,10
+** .p2align 3
+**.L8:
+** movl \$-1, %eax
+** movl %esi, %esi
+** movw %ax, \(%rdi\)
+** movw %ax, -2\(%rdi,%rsi\)
+** ret
+**...
+*/
+
+void
+foo (char *dst, __SIZE_TYPE__ n)
+{
+ if (n <= 247)
+ __builtin_memset(dst, -1, n);
+}
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=x86-64-v3 -minline-all-stringops" } */
+/* Keep labels and directives ('.cfi_startproc', '.cfi_endproc'). */
+/* { dg-final { check-function-bodies "**" "" "" { target lp64 } {^\t?\.} } } */
+
+/*
+**foo:
+**.LFB0:
+** .cfi_startproc
+** cmpq \$247, %rsi
+** jbe .L15
+** ret
+** .p2align 4,,10
+** .p2align 3
+**.L15:
+** vpcmpeqd %ymm0, %ymm0, %ymm0
+** cmpl \$32, %esi
+** jnb .L16
+** cmpl \$16, %esi
+** jnb .L6
+** cmpl \$8, %esi
+** jnb .L7
+** cmpl \$4, %esi
+** jnb .L8
+** cmpl \$1, %esi
+** ja .L9
+** jb .L12
+** movb \$-1, \(%rdi\)
+** vzeroupper
+** ret
+** .p2align 4,,10
+** .p2align 3
+**.L16:
+** vmovdqu %ymm0, \(%rdi\)
+** cmpl \$64, %esi
+** ja .L5
+** movl %esi, %esi
+** vmovdqu %ymm0, -32\(%rdi,%rsi\)
+** vzeroupper
+** ret
+** .p2align 4,,10
+** .p2align 3
+**.L5:
+** vmovdqu %ymm0, 32\(%rdi\)
+** cmpl \$128, %esi
+** ja .L17
+** movl %esi, %esi
+** vmovdqu %ymm0, -32\(%rdi,%rsi\)
+** vmovdqu %ymm0, -64\(%rdi,%rsi\)
+** vzeroupper
+** ret
+** .p2align 4,,10
+** .p2align 3
+**.L6:
+** movl %esi, %esi
+** vmovdqu %xmm0, \(%rdi\)
+** vmovdqu %xmm0, -16\(%rdi,%rsi\)
+** vzeroupper
+** ret
+** .p2align 4,,10
+** .p2align 3
+**.L17:
+** movl %esi, %esi
+** vmovdqu %ymm0, 64\(%rdi\)
+** vmovdqu %ymm0, 96\(%rdi\)
+** vmovdqu %ymm0, -32\(%rdi,%rsi\)
+** vmovdqu %ymm0, -64\(%rdi,%rsi\)
+** vmovdqu %ymm0, -96\(%rdi,%rsi\)
+** vmovdqu %ymm0, -128\(%rdi,%rsi\)
+** vzeroupper
+** ret
+** .p2align 4,,10
+** .p2align 3
+**.L7:
+** movl %esi, %esi
+** movq \$-1, \(%rdi\)
+** movq \$-1, -8\(%rdi,%rsi\)
+** vzeroupper
+** ret
+** .p2align 4,,10
+** .p2align 3
+**.L8:
+** movl %esi, %esi
+** movl \$-1, \(%rdi\)
+** movl \$-1, -4\(%rdi,%rsi\)
+** vzeroupper
+** ret
+** .p2align 4,,10
+** .p2align 3
+**.L9:
+** movl \$-1, %eax
+** movl %esi, %esi
+** movw %ax, \(%rdi\)
+** movw %ax, -2\(%rdi,%rsi\)
+** vzeroupper
+** ret
+**.L12:
+** vzeroupper
+** ret
+**...
+*/
+
+#include "builtin-memset-7a.c"
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=x86-64-v4 -minline-all-stringops" } */
+/* Keep labels and directives ('.cfi_startproc', '.cfi_endproc'). */
+/* { dg-final { check-function-bodies "**" "" "" { target lp64 } {^\t?\.} } } */
+
+/*
+**foo:
+**.LFB0:
+** .cfi_startproc
+** cmpq \$247, %rsi
+** jbe .L16
+** ret
+** .p2align 4,,10
+** .p2align 3
+**.L16:
+** vpxor %xmm0, %xmm0, %xmm0
+** vpternlogd \$0xFF, %zmm0, %zmm0, %zmm0
+** cmpl \$64, %esi
+** jnb .L17
+** cmpl \$32, %esi
+** jnb .L6
+** cmpl \$16, %esi
+** jnb .L7
+** cmpl \$8, %esi
+** jnb .L8
+** cmpl \$4, %esi
+** jnb .L9
+** cmpl \$1, %esi
+** ja .L10
+** jb .L13
+** movb \$-1, \(%rdi\)
+** vzeroupper
+** ret
+** .p2align 4,,10
+** .p2align 3
+**.L17:
+** vmovdqu8 %zmm0, \(%rdi\)
+** cmpl \$128, %esi
+** ja .L5
+** movl %esi, %esi
+** vmovdqu8 %zmm0, -64\(%rdi,%rsi\)
+** vzeroupper
+** ret
+** .p2align 4,,10
+** .p2align 3
+**.L5:
+** movl %esi, %esi
+** vmovdqu8 %zmm0, 64\(%rdi\)
+** vmovdqu8 %zmm0, -64\(%rdi,%rsi\)
+** vmovdqu8 %zmm0, -128\(%rdi,%rsi\)
+** vzeroupper
+** ret
+** .p2align 4,,10
+** .p2align 3
+**.L6:
+** movl %esi, %esi
+** vmovdqu %ymm0, \(%rdi\)
+** vmovdqu %ymm0, -32\(%rdi,%rsi\)
+** vzeroupper
+** ret
+** .p2align 4,,10
+** .p2align 3
+**.L7:
+** movl %esi, %esi
+** vmovdqu %xmm0, \(%rdi\)
+** vmovdqu %xmm0, -16\(%rdi,%rsi\)
+** vzeroupper
+** ret
+** .p2align 4,,10
+** .p2align 3
+**.L8:
+** movl %esi, %esi
+** movq \$-1, \(%rdi\)
+** movq \$-1, -8\(%rdi,%rsi\)
+** vzeroupper
+** ret
+** .p2align 4,,10
+** .p2align 3
+**.L9:
+** movl %esi, %esi
+** movl \$-1, \(%rdi\)
+** movl \$-1, -4\(%rdi,%rsi\)
+** vzeroupper
+** ret
+**.L10:
+** movl \$-1, %eax
+** movl %esi, %esi
+** movw %ax, \(%rdi\)
+** movw %ax, -2\(%rdi,%rsi\)
+** vzeroupper
+** ret
+**.L13:
+** vzeroupper
+** ret
+** .cfi_endproc
+**...
+*/
+
+#include "builtin-memset-7a.c"
--- /dev/null
+/* { dg-do compile { target { maybe_x32 && lp64 } } } */
+/* { dg-options "-O2 -mx32 -march=x86-64 -minline-all-stringops" } */
+/* Keep labels and directives ('.cfi_startproc', '.cfi_endproc'). */
+/* { dg-final { check-function-bodies "**" "" "" { target lp64 } {^\t?\.} } } */
+
+/*
+**foo:
+**.LFB0:
+** .cfi_startproc
+** movabsq \$72340172838076673, %rax
+** movzbl %sil, %esi
+** imulq %rax, %rsi
+** movq %rsi, %xmm0
+** punpcklqdq %xmm0, %xmm0
+** cmpl \$64, %edx
+** jnb .L2
+** testb \$32, %dl
+** jne .L19
+** testb \$16, %dl
+** jne .L20
+** testb \$8, %dl
+** jne .L21
+** testb \$4, %dl
+** jne .L22
+** testl %edx, %edx
+** jne .L23
+**.L1:
+** ret
+** .p2align 4,,10
+** .p2align 3
+**.L2:
+** movups %xmm0, -64\(%edx,%edi\)
+** subl \$1, %edx
+** movups %xmm0, -47\(%edx,%edi\)
+** movups %xmm0, -31\(%edx,%edi\)
+** movups %xmm0, -15\(%edx,%edi\)
+** cmpl \$64, %edx
+** jb .L1
+** andl \$-64, %edx
+** xorl %eax, %eax
+**.L9:
+** movups %xmm0, \(%eax,%edi\)
+** addl \$64, %eax
+** movups %xmm0, -48\(%eax,%edi\)
+** movups %xmm0, -32\(%eax,%edi\)
+** movups %xmm0, -16\(%eax,%edi\)
+** cmpl %edx, %eax
+** jb .L9
+** ret
+** .p2align 4,,10
+** .p2align 3
+**.L23:
+** movb %sil, \(%edi\)
+** testb \$2, %dl
+** je .L1
+** movw %si, -2\(%edx,%edi\)
+** ret
+** .p2align 4,,10
+** .p2align 3
+**.L19:
+** movups %xmm0, \(%edi\)
+** movups %xmm0, 16\(%edi\)
+** movups %xmm0, -32\(%edx,%edi\)
+** movups %xmm0, -16\(%edx,%edi\)
+** ret
+** .p2align 4,,10
+** .p2align 3
+**.L20:
+** movups %xmm0, \(%edi\)
+** movups %xmm0, -16\(%edx,%edi\)
+** ret
+** .p2align 4,,10
+** .p2align 3
+**.L21:
+** movq %rsi, \(%edi\)
+** movq %rsi, -8\(%edx,%edi\)
+** ret
+** .p2align 4,,10
+** .p2align 3
+**.L22:
+** movl %esi, \(%edi\)
+** movl %esi, -4\(%edx,%edi\)
+** ret
+** .cfi_endproc
+**...
+*/
+
+void
+foo (char *dst, int c, __SIZE_TYPE__ n)
+{
+ if (n <= (__SIZE_TYPE__) -1)
+ __builtin_memset (dst, c, n);
+}
--- /dev/null
+/* { dg-do run } */
+/* { dg-options "-O2 -minline-all-stringops" } */
+
+/* Test -O2 -minline-all-stringops on memset with various bounds. */
+
+#include <stdlib.h>
+
+#define MAX_LENGTH 4096
+#define EXTRA 64
+#define TOTAL (EXTRA + MAX_LENGTH + EXTRA)
+
+#define MEMSET_BOUND(BOUND) \
+__attribute__ ((noipa, noinline)) \
+static char * \
+memset_##BOUND (char *buf, size_t len) \
+{ \
+ if (len <= BOUND) \
+ return __builtin_memset (buf + EXTRA, 'A', len); \
+ else \
+ return buf + EXTRA; \
+}
+
+#define CHECK_MEMSET_BOUND(BOUND, SIZE) \
+ { \
+ for (i = 0; i < TOTAL; i++) \
+ buf[i] = 'a'; \
+ p = memset_##BOUND (buf, SIZE); \
+ if (p != buf + EXTRA) \
+ abort (); \
+ for (i = 0; i < SIZE; i++, p++) \
+ if (*p != 'A') \
+ abort (); \
+ for (; i < (TOTAL - EXTRA); i++, p++) \
+ if (*p != 'a') \
+ abort (); \
+ p = buf; \
+ for (i = 0; i < EXTRA; i++, p++) \
+ if (*p != 'a') \
+ abort (); \
+ }
+
+#define CHECK_MEMSET(SIZE) \
+ CHECK_MEMSET_BOUND (SIZE, SIZE) \
+ if (SIZE > 1) \
+ CHECK_MEMSET_BOUND (SIZE, SIZE - 1) \
+ if (SIZE > 2) \
+ CHECK_MEMSET_BOUND (SIZE, SIZE - 2) \
+ if (SIZE > 3) \
+ CHECK_MEMSET_BOUND (SIZE, SIZE - 3) \
+ if (SIZE > 4) \
+ CHECK_MEMSET_BOUND (SIZE, SIZE - 4) \
+ if (SIZE > 5) \
+ CHECK_MEMSET_BOUND (SIZE, SIZE - 5) \
+ if (SIZE > 6) \
+ CHECK_MEMSET_BOUND (SIZE, SIZE - 6) \
+ if (SIZE > 7) \
+ CHECK_MEMSET_BOUND (SIZE, SIZE - 7)
+
+char buf[TOTAL];
+
+MEMSET_BOUND (0);
+MEMSET_BOUND (1);
+MEMSET_BOUND (2);
+MEMSET_BOUND (3);
+MEMSET_BOUND (4);
+MEMSET_BOUND (5);
+MEMSET_BOUND (7);
+MEMSET_BOUND (8);
+MEMSET_BOUND (9);
+MEMSET_BOUND (15);
+MEMSET_BOUND (16);
+MEMSET_BOUND (17);
+MEMSET_BOUND (31);
+MEMSET_BOUND (32);
+MEMSET_BOUND (33);
+MEMSET_BOUND (63);
+MEMSET_BOUND (64);
+MEMSET_BOUND (65);
+MEMSET_BOUND (127);
+MEMSET_BOUND (128);
+MEMSET_BOUND (129);
+MEMSET_BOUND (255);
+MEMSET_BOUND (256);
+MEMSET_BOUND (257);
+
+int
+main (void)
+{
+ unsigned int i;
+ char *p;
+
+ CHECK_MEMSET (0);
+ CHECK_MEMSET (1);
+ CHECK_MEMSET (2);
+ CHECK_MEMSET (3);
+ CHECK_MEMSET (4);
+ CHECK_MEMSET (5);
+ CHECK_MEMSET (7);
+ CHECK_MEMSET (8);
+ CHECK_MEMSET (9);
+ CHECK_MEMSET (15);
+ CHECK_MEMSET (16);
+ CHECK_MEMSET (17);
+ CHECK_MEMSET (31);
+ CHECK_MEMSET (32);
+ CHECK_MEMSET (33);
+ CHECK_MEMSET (63);
+ CHECK_MEMSET (64);
+ CHECK_MEMSET (65);
+ CHECK_MEMSET (127);
+ CHECK_MEMSET (128);
+ CHECK_MEMSET (129);
+ CHECK_MEMSET (255);
+ CHECK_MEMSET (256);
+ CHECK_MEMSET (257);
+
+ return 0;
+}
--- /dev/null
+/* { dg-do run } */
+/* { dg-options "-O0 -minline-all-stringops" } */
+
+/* Test -O0 -minline-all-stringops on memset with various bounds. */
+
+#include "builtin-memset-bounded-1a.c"