/* Return true if ALG can be used in current context.
Assume we expand memset if MEMSET is true. */
static bool
-alg_usable_p (enum stringop_alg alg, bool memset, bool have_as)
+alg_usable_p (enum stringop_alg alg, bool memset,
+ addr_space_t dst_as, addr_space_t src_as)
{
if (alg == no_stringop)
return false;
/* It is not possible to use a library call if we have non-default
address space. We can do better than the generic byte-at-a-time
loop, used as a fallback. */
- if (alg == libcall && have_as)
+ if (alg == libcall &&
+ !(ADDR_SPACE_GENERIC_P (dst_as) && ADDR_SPACE_GENERIC_P (src_as)))
return false;
if (alg == vector_loop)
return TARGET_SSE || TARGET_AVX;
/* Algorithms using the rep prefix want at least edi and ecx;
additionally, memset wants eax and memcpy wants esi. Don't
consider such algorithms if the user has appropriated those
- registers for their own purposes, or if we have a non-default
- address space, since some string insns cannot override the segment. */
+ registers for their own purposes, or if we have the destination
+ in the non-default address space, since string insns cannot
+ override the destination segment. */
if (alg == rep_prefix_1_byte
|| alg == rep_prefix_4_byte
|| alg == rep_prefix_8_byte)
{
- if (have_as)
- return false;
if (fixed_regs[CX_REG]
|| fixed_regs[DI_REG]
- || (memset ? fixed_regs[AX_REG] : fixed_regs[SI_REG]))
+ || (memset ? fixed_regs[AX_REG] : fixed_regs[SI_REG])
+ || !ADDR_SPACE_GENERIC_P (dst_as))
return false;
}
return true;
static enum stringop_alg
decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size,
unsigned HOST_WIDE_INT min_size, unsigned HOST_WIDE_INT max_size,
- bool memset, bool zero_memset, bool have_as,
- int *dynamic_check, bool *noalign, bool recur)
+ bool memset, bool zero_memset, addr_space_t dst_as,
+ addr_space_t src_as, int *dynamic_check, bool *noalign, bool recur)
{
const struct stringop_algs *algs;
bool optimize_for_speed;
for (i = 0; i < MAX_STRINGOP_ALGS; i++)
{
enum stringop_alg candidate = algs->size[i].alg;
- bool usable = alg_usable_p (candidate, memset, have_as);
+ bool usable = alg_usable_p (candidate, memset, dst_as, src_as);
any_alg_usable_p |= usable;
if (candidate != libcall && candidate && usable)
/* If user specified the algorithm, honor it if possible. */
if (ix86_stringop_alg != no_stringop
- && alg_usable_p (ix86_stringop_alg, memset, have_as))
+ && alg_usable_p (ix86_stringop_alg, memset, dst_as, src_as))
return ix86_stringop_alg;
/* rep; movq or rep; movl is the smallest variant. */
else if (!optimize_for_speed)
{
*noalign = true;
if (!count || (count & 3) || (memset && !zero_memset))
- return alg_usable_p (rep_prefix_1_byte, memset, have_as)
+ return alg_usable_p (rep_prefix_1_byte, memset, dst_as, src_as)
? rep_prefix_1_byte : loop_1_byte;
else
- return alg_usable_p (rep_prefix_4_byte, memset, have_as)
+ return alg_usable_p (rep_prefix_4_byte, memset, dst_as, src_as)
? rep_prefix_4_byte : loop;
}
/* Very tiny blocks are best handled via the loop, REP is expensive to
enum stringop_alg candidate = algs->size[i].alg;
if (candidate != libcall
- && alg_usable_p (candidate, memset, have_as))
+ && alg_usable_p (candidate, memset, dst_as, src_as))
{
alg = candidate;
alg_noalign = algs->size[i].noalign;
else if (!any_alg_usable_p)
break;
}
- else if (alg_usable_p (candidate, memset, have_as)
+ else if (alg_usable_p (candidate, memset, dst_as, src_as)
&& !(TARGET_PREFER_KNOWN_REP_MOVSB_STOSB
&& candidate == rep_prefix_1_byte
/* NB: If min_size != max_size, size is
choice in ix86_costs. */
if ((TARGET_INLINE_ALL_STRINGOPS || TARGET_INLINE_STRINGOPS_DYNAMICALLY)
&& (algs->unknown_size == libcall
- || !alg_usable_p (algs->unknown_size, memset, have_as)))
+ || !alg_usable_p (algs->unknown_size, memset, dst_as, src_as)))
{
enum stringop_alg alg;
HOST_WIDE_INT new_expected_size = (max > 0 ? max : 4096) / 2;
*dynamic_check = 128;
return loop_1_byte;
}
- alg = decide_alg (count, new_expected_size, min_size, max_size, memset,
- zero_memset, have_as, dynamic_check, noalign, true);
+ alg = decide_alg (count, new_expected_size, min_size, max_size,
+ memset, zero_memset, dst_as, src_as,
+ dynamic_check, noalign, true);
gcc_assert (*dynamic_check == -1);
if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
*dynamic_check = max;
/* Try to use some reasonable fallback algorithm. Note that for
non-default address spaces we default to a loop instead of
a libcall. */
- return (alg_usable_p (algs->unknown_size, memset, have_as)
+
+ bool have_as = !(ADDR_SPACE_GENERIC_P (dst_as)
+ && ADDR_SPACE_GENERIC_P (src_as));
+
+ return (alg_usable_p (algs->unknown_size, memset, dst_as, src_as)
? algs->unknown_size : have_as ? loop : libcall);
}
unsigned HOST_WIDE_INT max_size = -1;
unsigned HOST_WIDE_INT probable_max_size = -1;
bool misaligned_prologue_used = false;
- bool have_as;
+ addr_space_t dst_as, src_as = ADDR_SPACE_GENERIC;
if (CONST_INT_P (align_exp))
align = INTVAL (align_exp);
if (count > (HOST_WIDE_INT_1U << 30))
return false;
- have_as = !ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (dst));
+ dst_as = MEM_ADDR_SPACE (dst);
if (!issetmem)
- have_as |= !ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (src));
+ src_as = MEM_ADDR_SPACE (src);
/* Step 0: Decide on preferred algorithm, desired alignment and
size of chunks to be copied by main loop. */
alg = decide_alg (count, expected_size, min_size, probable_max_size,
- issetmem,
- issetmem && val_exp == const0_rtx, have_as,
- &dynamic_check, &noalign, false);
+ issetmem, issetmem && val_exp == const0_rtx,
+ dst_as, src_as, &dynamic_check, &noalign, false);
if (dump_file)
fprintf (dump_file, "Selected stringop expansion strategy: %s\n",
(clobber (reg:CC FLAGS_REG))])]
""
{
- /* Can't use this for non-default address spaces. */
- if (!ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (operands[3])))
- FAIL;
-
int piece_size = GET_MODE_SIZE (GET_MODE (operands[1]));
/* If .md ever supports :P for Pmode, these can be directly
operands[5] = plus_constant (Pmode, operands[0], piece_size);
operands[6] = plus_constant (Pmode, operands[2], piece_size);
- /* Can't use this if the user has appropriated esi or edi. */
+ /* Can't use this if the user has appropriated esi or edi,
+ * or if we have the destination in the non-default address space,
+ * since string insns cannot override the destination segment. */
if ((TARGET_SINGLE_STRINGOP || optimize_insn_for_size_p ())
- && !(fixed_regs[SI_REG] || fixed_regs[DI_REG]))
+ && !(fixed_regs[SI_REG] || fixed_regs[DI_REG])
+ && ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (operands[1])))
{
emit_insn (gen_strmov_singleop (operands[0], operands[1],
operands[2], operands[3],
(const_int 8)))]
"TARGET_64BIT
&& !(fixed_regs[SI_REG] || fixed_regs[DI_REG])
- && ix86_check_no_addr_space (insn)"
- "%^movsq"
+ && ADDR_SPACE_GENERIC_P
+ (MEM_ADDR_SPACE (SET_DEST (XVECEXP (PATTERN (insn), 0, 0))))"
+{
+ rtx exp = XVECEXP (PATTERN (insn), 0, 0);
+
+ operands[0] = SET_DEST (exp);
+ operands[1] = SET_SRC (exp);
+
+ return "%^movsq\t{%1, %0|%0, %1}";
+}
[(set_attr "type" "str")
(set_attr "memory" "both")
(set_attr "mode" "DI")])
(plus:P (match_dup 3)
(const_int 4)))]
"!(fixed_regs[SI_REG] || fixed_regs[DI_REG])
- && ix86_check_no_addr_space (insn)"
- "%^movs{l|d}"
+ && ADDR_SPACE_GENERIC_P
+ (MEM_ADDR_SPACE (SET_DEST (XVECEXP (PATTERN (insn), 0, 0))))"
+{
+ rtx exp = XVECEXP (PATTERN (insn), 0, 0);
+
+ operands[0] = SET_DEST (exp);
+ operands[1] = SET_SRC (exp);
+
+ return "%^movs{l|d}\t{%1, %0|%0, %1}";
+}
[(set_attr "type" "str")
(set_attr "memory" "both")
(set_attr "mode" "SI")])
(plus:P (match_dup 3)
(const_int 2)))]
"!(fixed_regs[SI_REG] || fixed_regs[DI_REG])
- && ix86_check_no_addr_space (insn)"
- "%^movsw"
+ && ADDR_SPACE_GENERIC_P
+ (MEM_ADDR_SPACE (SET_DEST (XVECEXP (PATTERN (insn), 0, 0))))"
+{
+ rtx exp = XVECEXP (PATTERN (insn), 0, 0);
+
+ operands[0] = SET_DEST (exp);
+ operands[1] = SET_SRC (exp);
+
+ return "%^movsw\t{%1, %0|%0, %1}";
+}
[(set_attr "type" "str")
(set_attr "memory" "both")
(set_attr "mode" "HI")])
(plus:P (match_dup 3)
(const_int 1)))]
"!(fixed_regs[SI_REG] || fixed_regs[DI_REG])
- && ix86_check_no_addr_space (insn)"
- "%^movsb"
+ && ADDR_SPACE_GENERIC_P
+ (MEM_ADDR_SPACE (SET_DEST (XVECEXP (PATTERN (insn), 0, 0))))"
+{
+ rtx exp = XVECEXP (PATTERN (insn), 0, 0);
+
+ operands[0] = SET_DEST (exp);
+ operands[1] = SET_SRC (exp);
+
+ return "%^movsb\t{%1, %0|%0, %1}";
+}
[(set_attr "type" "str")
(set_attr "memory" "both")
(set (attr "prefix_rex")
(use (match_dup 5))]
"TARGET_64BIT
&& !(fixed_regs[CX_REG] || fixed_regs[SI_REG] || fixed_regs[DI_REG])
- && ix86_check_no_addr_space (insn)"
- "%^rep{%;} movsq"
+ && ADDR_SPACE_GENERIC_P
+ (MEM_ADDR_SPACE (SET_DEST (XVECEXP (PATTERN (insn), 0, 3))))"
+{
+ rtx exp = XVECEXP (PATTERN (insn), 0, 3);
+
+ operands[0] = SET_DEST (exp);
+ operands[1] = SET_SRC (exp);
+
+ return "%^rep{%;} movsq\t{%1, %0|%0, %1}";
+}
[(set_attr "type" "str")
(set_attr "prefix_rep" "1")
(set_attr "memory" "both")
(mem:BLK (match_dup 4)))
(use (match_dup 5))]
"!(fixed_regs[CX_REG] || fixed_regs[SI_REG] || fixed_regs[DI_REG])
- && ix86_check_no_addr_space (insn)"
- "%^rep{%;} movs{l|d}"
+ && ADDR_SPACE_GENERIC_P
+ (MEM_ADDR_SPACE (SET_DEST (XVECEXP (PATTERN (insn), 0, 3))))"
+{
+ rtx exp = XVECEXP (PATTERN (insn), 0, 3);
+
+ operands[0] = SET_DEST (exp);
+ operands[1] = SET_SRC (exp);
+
+ return "%^rep{%;} movs{l|d}\t{%1, %0|%0, %1}";
+}
[(set_attr "type" "str")
(set_attr "prefix_rep" "1")
(set_attr "memory" "both")
(mem:BLK (match_dup 4)))
(use (match_dup 5))]
"!(fixed_regs[CX_REG] || fixed_regs[SI_REG] || fixed_regs[DI_REG])
- && ix86_check_no_addr_space (insn)"
- "%^rep{%;} movsb"
+ && ADDR_SPACE_GENERIC_P
+ (MEM_ADDR_SPACE (SET_DEST (XVECEXP (PATTERN (insn), 0, 3))))"
+{
+ rtx exp = XVECEXP (PATTERN (insn), 0, 3);
+
+ operands[0] = SET_DEST (exp);
+ operands[1] = SET_SRC (exp);
+
+ return "%^rep{%;} movsb\t{%1, %0|%0, %1}";
+}
[(set_attr "type" "str")
(set_attr "prefix_rep" "1")
(set_attr "memory" "both")
(unspec [(const_int 0)] UNSPEC_STOS)]
"TARGET_64BIT
&& !(fixed_regs[AX_REG] || fixed_regs[DI_REG])
- && ix86_check_no_addr_space (insn)"
+ && ADDR_SPACE_GENERIC_P
+ (MEM_ADDR_SPACE (SET_DEST (XVECEXP (PATTERN (insn), 0, 0))))"
"%^stosq"
[(set_attr "type" "str")
(set_attr "memory" "store")
(const_int 4)))
(unspec [(const_int 0)] UNSPEC_STOS)]
"!(fixed_regs[AX_REG] || fixed_regs[DI_REG])
- && ix86_check_no_addr_space (insn)"
+ && ADDR_SPACE_GENERIC_P
+ (MEM_ADDR_SPACE (SET_DEST (XVECEXP (PATTERN (insn), 0, 0))))"
"%^stos{l|d}"
[(set_attr "type" "str")
(set_attr "memory" "store")
(const_int 2)))
(unspec [(const_int 0)] UNSPEC_STOS)]
"!(fixed_regs[AX_REG] || fixed_regs[DI_REG])
- && ix86_check_no_addr_space (insn)"
+ && ADDR_SPACE_GENERIC_P
+ (MEM_ADDR_SPACE (SET_DEST (XVECEXP (PATTERN (insn), 0, 0))))"
"%^stosw"
[(set_attr "type" "str")
(set_attr "memory" "store")
(const_int 1)))
(unspec [(const_int 0)] UNSPEC_STOS)]
"!(fixed_regs[AX_REG] || fixed_regs[DI_REG])
- && ix86_check_no_addr_space (insn)"
+ && ADDR_SPACE_GENERIC_P
+ (MEM_ADDR_SPACE (SET_DEST (XVECEXP (PATTERN (insn), 0, 0))))"
"%^stosb"
[(set_attr "type" "str")
(set_attr "memory" "store")
(use (match_dup 4))]
"TARGET_64BIT
&& !(fixed_regs[AX_REG] || fixed_regs[CX_REG] || fixed_regs[DI_REG])
- && ix86_check_no_addr_space (insn)"
+ && ADDR_SPACE_GENERIC_P
+ (MEM_ADDR_SPACE (SET_DEST (XVECEXP (PATTERN (insn), 0, 2))))"
"%^rep{%;} stosq"
[(set_attr "type" "str")
(set_attr "prefix_rep" "1")
(use (match_operand:SI 2 "register_operand" "a"))
(use (match_dup 4))]
"!(fixed_regs[AX_REG] || fixed_regs[CX_REG] || fixed_regs[DI_REG])
- && ix86_check_no_addr_space (insn)"
+ && ADDR_SPACE_GENERIC_P
+ (MEM_ADDR_SPACE (SET_DEST (XVECEXP (PATTERN (insn), 0, 2))))"
"%^rep{%;} stos{l|d}"
[(set_attr "type" "str")
(set_attr "prefix_rep" "1")
(use (match_operand:QI 2 "register_operand" "a"))
(use (match_dup 4))]
"!(fixed_regs[AX_REG] || fixed_regs[CX_REG] || fixed_regs[DI_REG])
- && ix86_check_no_addr_space (insn)"
+ && ADDR_SPACE_GENERIC_P
+ (MEM_ADDR_SPACE (SET_DEST (XVECEXP (PATTERN (insn), 0, 2))))"
"%^rep{%;} stosb"
[(set_attr "type" "str")
(set_attr "prefix_rep" "1")
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-O2 -mno-sse -mtune=generic -masm=att" } */
+
+typedef unsigned long uword __attribute__ ((mode (word)));
+
+struct a { uword arr[30]; };
+
+__seg_gs struct a m;
+void bar (struct a *dst) { *dst = m; }
+
+/* { dg-final { scan-assembler "rep\[; \t\]+movs(l|q)\[ \t\]+%gs:" } } */