{
int i;
unsigned int ix86_arch_mask, ix86_tune_mask;
- const bool ix86_tune_specified = (ix86_tune_string != NULL);
+ const bool ix86_tune_specified = (ix86_tune_string != NULL);
const char *prefix;
const char *suffix;
const char *sw;
{"bdver1", PROCESSOR_BDVER1, CPU_BDVER1,
PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
| PTA_SSE2 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM
- | PTA_SSSE3 | PTA_SSE4_1 | PTA_SSE4_2 | PTA_AES
+ | PTA_SSSE3 | PTA_SSE4_1 | PTA_SSE4_2 | PTA_AES
| PTA_PCLMUL | PTA_AVX | PTA_FMA4 | PTA_XOP | PTA_LWP},
{"generic32", PROCESSOR_GENERIC32, CPU_PENTIUMPRO,
0 /* flags are only used for -march switch. */ },
name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
name = targetm.strip_name_encoding (name);
-
+
/* If we're using one_only, then there needs to be a .gnu.linkonce
prefix to the section name. */
linkonce = one_only ? ".gnu.linkonce" : "";
-
+
string = ACONCAT ((linkonce, prefix, ".", name, NULL));
-
+
DECL_SECTION_NAME (decl) = build_string (strlen (string), string);
return;
}
static bool warnedavx;
if (cum
- && !warnedavx
+ && !warnedavx
&& cum->warn_avx)
{
warnedavx = true;
== NULL_TREE))
{
static bool warned;
-
+
if (!warned && warn_psabi)
{
warned = true;
case X86_64_SSESF_CLASS:
case X86_64_SSEDF_CLASS:
if (mode != BLKmode)
- return gen_reg_or_parallel (mode, orig_mode,
+ return gen_reg_or_parallel (mode, orig_mode,
SSE_REGNO (sse_regno));
break;
case X86_64_X87_CLASS:
return SUBTARGET_RETURN_IN_MEMORY (type, fntype);
#else
const enum machine_mode mode = type_natural_mode (type, NULL);
-
+
if (TARGET_64BIT)
{
if (ix86_function_type_abi (fntype) == MS_ABI)
if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
return TARGET_AVX ? "vxorps\t%0, %0, %0" : "xorps\t%0, %0";
else
- return TARGET_AVX ? "vxorpd\t%0, %0, %0" : "xorpd\t%0, %0";
+ return TARGET_AVX ? "vxorpd\t%0, %0, %0" : "xorpd\t%0, %0";
case MODE_TI:
if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
return TARGET_AVX ? "vxorps\t%0, %0, %0" : "xorps\t%0, %0";
preferred_alignment = crtl->preferred_stack_boundary / BITS_PER_UNIT;
/* MS ABI seem to require stack alignment to be always 16 except for function
- prologues. */
- if (ix86_cfun_abi () == MS_ABI && preferred_alignment < 16)
+ prologues and leaf. */
+ if ((ix86_cfun_abi () == MS_ABI && preferred_alignment < 16)
+ && (!current_function_is_leaf || cfun->calls_alloca != 0
+ || ix86_current_function_calls_tls_descriptor))
{
preferred_alignment = 16;
stack_alignment_needed = 16;
frame->padding0 = ((offset + 16 - 1) & -16) - offset;
else
frame->padding0 = 0;
-
+
/* SSE register save area. */
offset += frame->padding0 + frame->nsseregs * 16;
gcc_assert (ix86_cfa_state->reg == src);
ix86_cfa_state->offset += INTVAL (offset);
ix86_cfa_state->reg = dest;
-
+
r = gen_rtx_PLUS (Pmode, src, offset);
r = gen_rtx_SET (VOIDmode, dest, r);
add_reg_note (insn, REG_CFA_ADJUST_CFA, r);
Return: the regno of chosen register. */
-static unsigned int
+static unsigned int
find_drap_reg (void)
{
tree decl = cfun->decl;
register in such case. */
if (DECL_STATIC_CHAIN (decl) || crtl->tail_call_emit)
return DI_REG;
-
+
/* Reuse static chain register if it isn't used for parameter
passing. */
if (ix86_function_regparm (TREE_TYPE (decl), decl) <= 2
if (ix86_user_incoming_stack_boundary)
incoming_stack_boundary = ix86_user_incoming_stack_boundary;
/* In 32bit, use MIN_STACK_BOUNDARY for incoming stack boundary
- if -mstackrealign is used, it isn't used for sibcall check and
+ if -mstackrealign is used, it isn't used for sibcall check and
estimated stack alignment is 128bit. */
else if (!sibcall
&& !TARGET_64BIT
drap_vreg = copy_to_reg (arg_ptr);
seq = get_insns ();
end_sequence ();
-
+
insn = emit_insn_before (seq, NEXT_INSN (entry_of_function ()));
if (!optimize)
{
/* Finalize stack_realign_needed flag, which will guide prologue/epilogue
to be generated in correct form. */
-static void
+static void
ix86_finalize_stack_realign_flags (void)
{
- /* Check if stack realign is really needed after reload, and
+ /* Check if stack realign is really needed after reload, and
stores result in cfun */
unsigned int incoming_stack_boundary
= (crtl->parm_stack_boundary > ix86_incoming_stack_boundary
}
insn = emit_insn (gen_rtx_SET (VOIDmode, y, x));
- RTX_FRAME_RELATED_P (insn) = 1;
+ RTX_FRAME_RELATED_P (insn) = 1;
ix86_cfa_state->reg = crtl->drap_reg;
/* Align the stack. */
if (!TARGET_64BIT_MS_ABI && TARGET_RED_ZONE && frame.save_regs_using_mov
&& (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT))
ix86_emit_save_regs_using_mov ((frame_pointer_needed
- && !crtl->stack_realign_needed)
+ && !crtl->stack_realign_needed)
? hard_frame_pointer_rtx
: stack_pointer_rtx,
-frame.nregs * UNITS_PER_WORD);
ix86_cfa_state->reg = stack_pointer_rtx;
ix86_cfa_state->offset -= UNITS_PER_WORD;
- add_reg_note (insn, REG_CFA_ADJUST_CFA,
+ add_reg_note (insn, REG_CFA_ADJUST_CFA,
copy_rtx (XVECEXP (PATTERN (insn), 0, 0)));
RTX_FRAME_RELATED_P (insn) = 1;
ix86_add_cfa_restore_note (insn, hard_frame_pointer_rtx, red_offset);
/* See the comment about red zone and frame
pointer usage in ix86_expand_prologue. */
if (frame_pointer_needed && frame.red_zone_size)
- emit_insn (gen_memory_blockage ());
+ emit_insn (gen_memory_blockage ());
using_drap = crtl->drap_reg && crtl->stack_realign_needed;
gcc_assert (!using_drap || ix86_cfa_state->reg == crtl->drap_reg);
locations. If both are available, default to ebp, since offsets
are known to be small. Only exception is esp pointing directly
to the end of block of saved registers, where we may simplify
- addressing mode.
+ addressing mode.
If we are realigning stack with bp and sp, regs restore can't
be addressed by bp. sp must be used instead. */
if (!frame_pointer_needed
- || (sp_valid && !(frame.to_allocate + frame.padding0))
+ || (sp_valid && !(frame.to_allocate + frame.padding0))
|| stack_realign_fp)
{
ix86_emit_restore_sse_regs_using_mov (stack_pointer_rtx,
If we realign stack with frame pointer, then stack pointer
won't be able to recover via lea $offset(%bp), %sp, because
- there is a padding area between bp and sp for realign.
+ there is a padding area between bp and sp for realign.
"add $to_allocate, %sp" must be used instead. */
if (!sp_valid)
{
ix86_emit_leave (red_offset);
else
{
- /* For stack realigned really happens, recover stack
- pointer to hard frame pointer is a must, if not using
+ /* For stack realigned really happens, recover stack
+ pointer to hard frame pointer is a must, if not using
leave. */
if (stack_realign_fp)
pro_epilogue_adjust_stack (stack_pointer_rtx,
gcc_assert (ix86_cfa_state->reg == stack_pointer_rtx);
ix86_cfa_state->offset += UNITS_PER_WORD;
-
+
r = gen_rtx_REG (Pmode, CX_REG);
insn = emit_insn (ix86_gen_pop1 (r));
output_operand_lossage
("invalid operand size for operand code '%c'", code);
return;
-
+
case 'd':
case 'b':
case 'w':
op1 = gen_lowpart (mode, op1);
switch (mode)
- {
+ {
case V4SFmode:
emit_insn (gen_avx_movups (op0, op1));
break;
if (MEM_P (op1))
{
/* If we're optimizing for size, movups is the smallest. */
- if (optimize_insn_for_size_p ()
+ if (optimize_insn_for_size_p ()
|| TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
{
op0 = gen_lowpart (V4SFmode, op0);
{
op0 = gen_lowpart (V2DFmode, op0);
op1 = gen_lowpart (V2DFmode, op1);
- emit_insn (gen_sse2_movupd (op0, op1));
+ emit_insn (gen_sse2_movupd (op0, op1));
}
else
{
if (TARGET_SSE_UNALIGNED_STORE_OPTIMAL)
{
op0 = gen_lowpart (V4SFmode, op0);
- emit_insn (gen_sse_movups (op0, op1));
+ emit_insn (gen_sse_movups (op0, op1));
}
else
{
#define LEA_SEARCH_THRESHOLD 12
/* Search backward for non-agu definition of register number REGNO1
- or register number REGNO2 in INSN's basic block until
+ or register number REGNO2 in INSN's basic block until
1. Pass LEA_SEARCH_THRESHOLD instructions, or
2. Reach BB boundary, or
3. Reach agu definition.
prev = PREV_INSN (prev);
}
}
-
+
if (distance < LEA_SEARCH_THRESHOLD)
{
edge e;
edge_iterator ei;
bool simple_loop = false;
-
+
FOR_EACH_EDGE (e, ei, bb->preds)
if (e->src == bb)
{
simple_loop = true;
break;
}
-
+
if (simple_loop)
{
rtx prev = BB_END (bb);
return distance;
}
-/* Return the distance between INSN and the next insn that uses
+/* Return the distance between INSN and the next insn that uses
register number REGNO0 in memory address. Return -1 if no such
a use is found within LEA_SEARCH_THRESHOLD or REGNO0 is set. */
edge e;
edge_iterator ei;
bool simple_loop = false;
-
+
FOR_EACH_EDGE (e, ei, bb->succs)
if (e->dest == bb)
{
simple_loop = true;
break;
}
-
+
if (simple_loop)
{
rtx next = BB_HEAD (bb);
next = NEXT_INSN (next);
}
}
- }
+ }
return -1;
}
/* If a = b + c, (a!=b && a!=c), must use lea form. */
if (regno0 != regno1 && regno0 != regno2)
return true;
- else
+ else
{
int dist_define, dist_use;
dist_define = distance_non_agu_define (regno1, regno2, insn);
break;
}
- if (shift_rtx
+ if (shift_rtx
&& (GET_CODE (shift_rtx) == ASHIFT
|| GET_CODE (shift_rtx) == LSHIFTRT
|| GET_CODE (shift_rtx) == ASHIFTRT
}
-/* Return a comparison we can do and that it is equivalent to
+/* Return a comparison we can do and that it is equivalent to
swap_condition (code) apart possibly from orderedness.
But, never change orderedness if TARGET_IEEE_FP, returning
UNKNOWN in that case if necessary. */
&& alg != rep_prefix_4_byte \
&& alg != rep_prefix_8_byte))
const struct processor_costs *cost;
-
+
/* Even if the string operation call is cold, we still might spend a lot
of time processing large blocks. */
if (optimize_function_for_size_p (cfun)
}
if (ix86_cmodel == CM_LARGE_PIC
- && MEM_P (fnaddr)
+ && MEM_P (fnaddr)
&& GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
&& !local_symbolic_operand (XEXP (fnaddr, 0), VOIDmode))
fnaddr = gen_rtx_MEM (QImode, construct_plt_address (XEXP (fnaddr, 0)));
}
/* Emit RTL insns to initialize the variable parts of a trampoline.
- FNDECL is the decl of the target address; M_TRAMP is a MEM for
+ FNDECL is the decl of the target address; M_TRAMP is a MEM for
the trampoline, and CHAIN_VALUE is an RTX for the static chain
to be passed to the target function. */
{ OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmsubv2df4, "__builtin_ia32_vfmsubsd", IX86_BUILTIN_VFMSUBSD, UNKNOWN, (int)MULTI_ARG_3_DF },
{ OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmsubv4sf4, "__builtin_ia32_vfmsubps", IX86_BUILTIN_VFMSUBPS, UNKNOWN, (int)MULTI_ARG_3_SF },
{ OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmsubv2df4, "__builtin_ia32_vfmsubpd", IX86_BUILTIN_VFMSUBPD, UNKNOWN, (int)MULTI_ARG_3_DF },
-
+
{ OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfnmaddv4sf4, "__builtin_ia32_vfnmaddss", IX86_BUILTIN_VFNMADDSS, UNKNOWN, (int)MULTI_ARG_3_SF },
{ OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfnmaddv2df4, "__builtin_ia32_vfnmaddsd", IX86_BUILTIN_VFNMADDSD, UNKNOWN, (int)MULTI_ARG_3_DF },
{ OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fnmaddv4sf4, "__builtin_ia32_vfnmaddps", IX86_BUILTIN_VFNMADDPS, UNKNOWN, (int)MULTI_ARG_3_SF },
{ OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmaddv4df4256, "__builtin_ia32_vfmaddpd256", IX86_BUILTIN_VFMADDPD256, UNKNOWN, (int)MULTI_ARG_3_DF2 },
{ OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmsubv8sf4256, "__builtin_ia32_vfmsubps256", IX86_BUILTIN_VFMSUBPS256, UNKNOWN, (int)MULTI_ARG_3_SF2 },
{ OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmsubv4df4256, "__builtin_ia32_vfmsubpd256", IX86_BUILTIN_VFMSUBPD256, UNKNOWN, (int)MULTI_ARG_3_DF2 },
-
+
{ OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fnmaddv8sf4256, "__builtin_ia32_vfnmaddps256", IX86_BUILTIN_VFNMADDPS256, UNKNOWN, (int)MULTI_ARG_3_SF2 },
{ OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fnmaddv4df4256, "__builtin_ia32_vfnmaddpd256", IX86_BUILTIN_VFNMADDPD256, UNKNOWN, (int)MULTI_ARG_3_DF2 },
{ OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fnmsubv8sf4256, "__builtin_ia32_vfnmsubps256", IX86_BUILTIN_VFNMSUBPS256, UNKNOWN, (int)MULTI_ARG_3_SF2 },
case V8HI_FTYPE_V8HI_V8HI_INT:
case V8SI_FTYPE_V8SI_V8SI_INT:
case V8SI_FTYPE_V8SI_V4SI_INT:
- case V8SF_FTYPE_V8SF_V8SF_INT:
- case V8SF_FTYPE_V8SF_V4SF_INT:
+ case V8SF_FTYPE_V8SF_V8SF_INT:
+ case V8SF_FTYPE_V8SF_V4SF_INT:
case V4SI_FTYPE_V4SI_V4SI_INT:
case V4DF_FTYPE_V4DF_V4DF_INT:
case V4DF_FTYPE_V4DF_V2DF_INT:
emit_insn (gen_rtx_SET (VOIDmode, target, CONST0_RTX (mode)));
var = force_reg (GET_MODE_INNER (mode), var);
ix86_expand_vector_set (mmx_ok, target, var, one_var);
- return true;
+ return true;
}
switch (mode)
rtx (*gen_load_even) (rtx, rtx, rtx);
rtx (*gen_interleave_first_low) (rtx, rtx, rtx);
rtx (*gen_interleave_second_low) (rtx, rtx, rtx);
-
+
switch (mode)
{
case V8HImode:
default:
gcc_unreachable ();
}
-
+
for (i = 0; i < n; i++)
{
/* Extend the odd elment to SImode using a paradoxical SUBREG. */
/* Cast the V4SImode vector back to a vector in orignal mode. */
op0 = gen_reg_rtx (mode);
emit_move_insn (op0, gen_lowpart (mode, op1));
-
+
/* Load even elements into the second positon. */
emit_insn ((*gen_load_even) (op0,
force_reg (inner_mode,
break;
/* Don't use ix86_expand_vector_init_interleave if we can't
- move from GPR to SSE register directly. */
+ move from GPR to SSE register directly. */
if (!TARGET_INTER_UNIT_MOVES)
break;
nelt = d->nelt;
eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
-
+
/* Generate two permutation masks. If the required element is within
the given vector it is shuffled into the proper lane. If the required
element is in the other vector, force a zero into the lane by setting
d.op1 = d.op0;
break;
}
-
+
d.target = gen_reg_rtx (d.vmode);
if (ix86_expand_vec_perm_builtin_1 (&d))
return d.target;
an error generated from the extract. */
gcc_assert (vec_mask > 0 && vec_mask <= 3);
one_vec = (vec_mask != 3);
-
+
/* Implementable with shufps or pshufd. */
if (one_vec && (d.vmode == V4SFmode || d.vmode == V4SImode))
return true;