1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001
3 Free Software Foundation, Inc.
5 This file is part of GNU CC.
7 GNU CC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
12 GNU CC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GNU CC; see the file COPYING. If not, write to
19 the Free Software Foundation, 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
28 #include "hard-reg-set.h"
30 #include "insn-config.h"
31 #include "conditions.h"
33 #include "insn-attr.h"
41 #include "basic-block.h"
44 #include "target-def.h"
46 #ifndef CHECK_STACK_LIMIT
47 #define CHECK_STACK_LIMIT -1
50 /* Processor costs (relative to an add) */
51 struct processor_costs size_cost
= { /* costs for tunning for size */
52 2, /* cost of an add instruction */
53 3, /* cost of a lea instruction */
54 2, /* variable shift costs */
55 3, /* constant shift costs */
56 3, /* cost of starting a multiply */
57 0, /* cost of multiply per each bit set */
58 3, /* cost of a divide/mod */
61 2, /* cost for loading QImode using movzbl */
62 {2, 2, 2}, /* cost of loading integer registers
63 in QImode, HImode and SImode.
64 Relative to reg-reg move (2). */
65 {2, 2, 2}, /* cost of storing integer registers */
66 2, /* cost of reg,reg fld/fst */
67 {2, 2, 2}, /* cost of loading fp registers
68 in SFmode, DFmode and XFmode */
69 {2, 2, 2}, /* cost of loading integer registers */
70 3, /* cost of moving MMX register */
71 {3, 3}, /* cost of loading MMX registers
72 in SImode and DImode */
73 {3, 3}, /* cost of storing MMX registers
74 in SImode and DImode */
75 3, /* cost of moving SSE register */
76 {3, 3, 3}, /* cost of loading SSE registers
77 in SImode, DImode and TImode */
78 {3, 3, 3}, /* cost of storing SSE registers
79 in SImode, DImode and TImode */
80 3, /* MMX or SSE register to integer */
82 /* Processor costs (relative to an add) */
83 struct processor_costs i386_cost
= { /* 386 specific costs */
84 1, /* cost of an add instruction */
85 1, /* cost of a lea instruction */
86 3, /* variable shift costs */
87 2, /* constant shift costs */
88 6, /* cost of starting a multiply */
89 1, /* cost of multiply per each bit set */
90 23, /* cost of a divide/mod */
91 15, /* "large" insn */
93 4, /* cost for loading QImode using movzbl */
94 {2, 4, 2}, /* cost of loading integer registers
95 in QImode, HImode and SImode.
96 Relative to reg-reg move (2). */
97 {2, 4, 2}, /* cost of storing integer registers */
98 2, /* cost of reg,reg fld/fst */
99 {8, 8, 8}, /* cost of loading fp registers
100 in SFmode, DFmode and XFmode */
101 {8, 8, 8}, /* cost of loading integer registers */
102 2, /* cost of moving MMX register */
103 {4, 8}, /* cost of loading MMX registers
104 in SImode and DImode */
105 {4, 8}, /* cost of storing MMX registers
106 in SImode and DImode */
107 2, /* cost of moving SSE register */
108 {4, 8, 16}, /* cost of loading SSE registers
109 in SImode, DImode and TImode */
110 {4, 8, 16}, /* cost of storing SSE registers
111 in SImode, DImode and TImode */
112 3, /* MMX or SSE register to integer */
115 struct processor_costs i486_cost
= { /* 486 specific costs */
116 1, /* cost of an add instruction */
117 1, /* cost of a lea instruction */
118 3, /* variable shift costs */
119 2, /* constant shift costs */
120 12, /* cost of starting a multiply */
121 1, /* cost of multiply per each bit set */
122 40, /* cost of a divide/mod */
123 15, /* "large" insn */
125 4, /* cost for loading QImode using movzbl */
126 {2, 4, 2}, /* cost of loading integer registers
127 in QImode, HImode and SImode.
128 Relative to reg-reg move (2). */
129 {2, 4, 2}, /* cost of storing integer registers */
130 2, /* cost of reg,reg fld/fst */
131 {8, 8, 8}, /* cost of loading fp registers
132 in SFmode, DFmode and XFmode */
133 {8, 8, 8}, /* cost of loading integer registers */
134 2, /* cost of moving MMX register */
135 {4, 8}, /* cost of loading MMX registers
136 in SImode and DImode */
137 {4, 8}, /* cost of storing MMX registers
138 in SImode and DImode */
139 2, /* cost of moving SSE register */
140 {4, 8, 16}, /* cost of loading SSE registers
141 in SImode, DImode and TImode */
142 {4, 8, 16}, /* cost of storing SSE registers
143 in SImode, DImode and TImode */
144 3 /* MMX or SSE register to integer */
147 struct processor_costs pentium_cost
= {
148 1, /* cost of an add instruction */
149 1, /* cost of a lea instruction */
150 4, /* variable shift costs */
151 1, /* constant shift costs */
152 11, /* cost of starting a multiply */
153 0, /* cost of multiply per each bit set */
154 25, /* cost of a divide/mod */
155 8, /* "large" insn */
157 6, /* cost for loading QImode using movzbl */
158 {2, 4, 2}, /* cost of loading integer registers
159 in QImode, HImode and SImode.
160 Relative to reg-reg move (2). */
161 {2, 4, 2}, /* cost of storing integer registers */
162 2, /* cost of reg,reg fld/fst */
163 {2, 2, 6}, /* cost of loading fp registers
164 in SFmode, DFmode and XFmode */
165 {4, 4, 6}, /* cost of loading integer registers */
166 8, /* cost of moving MMX register */
167 {8, 8}, /* cost of loading MMX registers
168 in SImode and DImode */
169 {8, 8}, /* cost of storing MMX registers
170 in SImode and DImode */
171 2, /* cost of moving SSE register */
172 {4, 8, 16}, /* cost of loading SSE registers
173 in SImode, DImode and TImode */
174 {4, 8, 16}, /* cost of storing SSE registers
175 in SImode, DImode and TImode */
176 3 /* MMX or SSE register to integer */
179 struct processor_costs pentiumpro_cost
= {
180 1, /* cost of an add instruction */
181 1, /* cost of a lea instruction */
182 1, /* variable shift costs */
183 1, /* constant shift costs */
184 4, /* cost of starting a multiply */
185 0, /* cost of multiply per each bit set */
186 17, /* cost of a divide/mod */
187 8, /* "large" insn */
189 2, /* cost for loading QImode using movzbl */
190 {4, 4, 4}, /* cost of loading integer registers
191 in QImode, HImode and SImode.
192 Relative to reg-reg move (2). */
193 {2, 2, 2}, /* cost of storing integer registers */
194 2, /* cost of reg,reg fld/fst */
195 {2, 2, 6}, /* cost of loading fp registers
196 in SFmode, DFmode and XFmode */
197 {4, 4, 6}, /* cost of loading integer registers */
198 2, /* cost of moving MMX register */
199 {2, 2}, /* cost of loading MMX registers
200 in SImode and DImode */
201 {2, 2}, /* cost of storing MMX registers
202 in SImode and DImode */
203 2, /* cost of moving SSE register */
204 {2, 2, 8}, /* cost of loading SSE registers
205 in SImode, DImode and TImode */
206 {2, 2, 8}, /* cost of storing SSE registers
207 in SImode, DImode and TImode */
208 3 /* MMX or SSE register to integer */
211 struct processor_costs k6_cost
= {
212 1, /* cost of an add instruction */
213 2, /* cost of a lea instruction */
214 1, /* variable shift costs */
215 1, /* constant shift costs */
216 3, /* cost of starting a multiply */
217 0, /* cost of multiply per each bit set */
218 18, /* cost of a divide/mod */
219 8, /* "large" insn */
221 3, /* cost for loading QImode using movzbl */
222 {4, 5, 4}, /* cost of loading integer registers
223 in QImode, HImode and SImode.
224 Relative to reg-reg move (2). */
225 {2, 3, 2}, /* cost of storing integer registers */
226 4, /* cost of reg,reg fld/fst */
227 {6, 6, 6}, /* cost of loading fp registers
228 in SFmode, DFmode and XFmode */
229 {4, 4, 4}, /* cost of loading integer registers */
230 2, /* cost of moving MMX register */
231 {2, 2}, /* cost of loading MMX registers
232 in SImode and DImode */
233 {2, 2}, /* cost of storing MMX registers
234 in SImode and DImode */
235 2, /* cost of moving SSE register */
236 {2, 2, 8}, /* cost of loading SSE registers
237 in SImode, DImode and TImode */
238 {2, 2, 8}, /* cost of storing SSE registers
239 in SImode, DImode and TImode */
240 6 /* MMX or SSE register to integer */
243 struct processor_costs athlon_cost
= {
244 1, /* cost of an add instruction */
245 2, /* cost of a lea instruction */
246 1, /* variable shift costs */
247 1, /* constant shift costs */
248 5, /* cost of starting a multiply */
249 0, /* cost of multiply per each bit set */
250 42, /* cost of a divide/mod */
251 8, /* "large" insn */
253 4, /* cost for loading QImode using movzbl */
254 {4, 5, 4}, /* cost of loading integer registers
255 in QImode, HImode and SImode.
256 Relative to reg-reg move (2). */
257 {2, 3, 2}, /* cost of storing integer registers */
258 4, /* cost of reg,reg fld/fst */
259 {6, 6, 20}, /* cost of loading fp registers
260 in SFmode, DFmode and XFmode */
261 {4, 4, 16}, /* cost of loading integer registers */
262 2, /* cost of moving MMX register */
263 {2, 2}, /* cost of loading MMX registers
264 in SImode and DImode */
265 {2, 2}, /* cost of storing MMX registers
266 in SImode and DImode */
267 2, /* cost of moving SSE register */
268 {2, 2, 8}, /* cost of loading SSE registers
269 in SImode, DImode and TImode */
270 {2, 2, 8}, /* cost of storing SSE registers
271 in SImode, DImode and TImode */
272 6 /* MMX or SSE register to integer */
275 struct processor_costs pentium4_cost
= {
276 1, /* cost of an add instruction */
277 1, /* cost of a lea instruction */
278 8, /* variable shift costs */
279 8, /* constant shift costs */
280 30, /* cost of starting a multiply */
281 0, /* cost of multiply per each bit set */
282 112, /* cost of a divide/mod */
283 16, /* "large" insn */
285 2, /* cost for loading QImode using movzbl */
286 {4, 5, 4}, /* cost of loading integer registers
287 in QImode, HImode and SImode.
288 Relative to reg-reg move (2). */
289 {2, 3, 2}, /* cost of storing integer registers */
290 2, /* cost of reg,reg fld/fst */
291 {2, 2, 6}, /* cost of loading fp registers
292 in SFmode, DFmode and XFmode */
293 {4, 4, 6}, /* cost of loading integer registers */
294 2, /* cost of moving MMX register */
295 {2, 2}, /* cost of loading MMX registers
296 in SImode and DImode */
297 {2, 2}, /* cost of storing MMX registers
298 in SImode and DImode */
299 12, /* cost of moving SSE register */
300 {12, 12, 12}, /* cost of loading SSE registers
301 in SImode, DImode and TImode */
302 {2, 2, 8}, /* cost of storing SSE registers
303 in SImode, DImode and TImode */
304 10, /* MMX or SSE register to integer */
307 struct processor_costs
*ix86_cost
= &pentium_cost
;
309 /* Processor feature/optimization bitmasks. */
310 #define m_386 (1<<PROCESSOR_I386)
311 #define m_486 (1<<PROCESSOR_I486)
312 #define m_PENT (1<<PROCESSOR_PENTIUM)
313 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
314 #define m_K6 (1<<PROCESSOR_K6)
315 #define m_ATHLON (1<<PROCESSOR_ATHLON)
316 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
318 const int x86_use_leave
= m_386
| m_K6
| m_ATHLON
;
319 const int x86_push_memory
= m_386
| m_K6
| m_ATHLON
| m_PENT4
;
320 const int x86_zero_extend_with_and
= m_486
| m_PENT
;
321 const int x86_movx
= m_ATHLON
| m_PPRO
| m_PENT4
/* m_386 | m_K6 */;
322 const int x86_double_with_add
= ~m_386
;
323 const int x86_use_bit_test
= m_386
;
324 const int x86_unroll_strlen
= m_486
| m_PENT
| m_PPRO
| m_ATHLON
| m_K6
;
325 const int x86_cmove
= m_PPRO
| m_ATHLON
| m_PENT4
;
326 const int x86_3dnow_a
= m_ATHLON
;
327 const int x86_deep_branch
= m_PPRO
| m_K6
| m_ATHLON
| m_PENT4
;
328 const int x86_branch_hints
= m_PENT4
;
329 const int x86_use_sahf
= m_PPRO
| m_K6
| m_PENT4
;
330 const int x86_partial_reg_stall
= m_PPRO
;
331 const int x86_use_loop
= m_K6
;
332 const int x86_use_fiop
= ~(m_PPRO
| m_ATHLON
| m_PENT
);
333 const int x86_use_mov0
= m_K6
;
334 const int x86_use_cltd
= ~(m_PENT
| m_K6
);
335 const int x86_read_modify_write
= ~m_PENT
;
336 const int x86_read_modify
= ~(m_PENT
| m_PPRO
);
337 const int x86_split_long_moves
= m_PPRO
;
338 const int x86_promote_QImode
= m_K6
| m_PENT
| m_386
| m_486
;
339 const int x86_single_stringop
= m_386
| m_PENT4
;
340 const int x86_qimode_math
= ~(0);
341 const int x86_promote_qi_regs
= 0;
342 const int x86_himode_math
= ~(m_PPRO
);
343 const int x86_promote_hi_regs
= m_PPRO
;
344 const int x86_sub_esp_4
= m_ATHLON
| m_PPRO
| m_PENT4
;
345 const int x86_sub_esp_8
= m_ATHLON
| m_PPRO
| m_386
| m_486
| m_PENT4
;
346 const int x86_add_esp_4
= m_ATHLON
| m_K6
| m_PENT4
;
347 const int x86_add_esp_8
= m_ATHLON
| m_PPRO
| m_K6
| m_386
| m_486
| m_PENT4
;
348 const int x86_integer_DFmode_moves
= ~(m_ATHLON
| m_PENT4
);
349 const int x86_partial_reg_dependency
= m_ATHLON
| m_PENT4
;
350 const int x86_memory_mismatch_stall
= m_ATHLON
| m_PENT4
;
351 const int x86_accumulate_outgoing_args
= m_ATHLON
| m_PENT4
| m_PPRO
;
352 const int x86_prologue_using_move
= m_ATHLON
| m_PENT4
| m_PPRO
;
353 const int x86_epilogue_using_move
= m_ATHLON
| m_PENT4
| m_PPRO
;
355 /* In case the avreage insn count for single function invocation is
356 lower than this constant, emit fast (but longer) prologue and
358 #define FAST_PROLOGUE_INSN_COUNT 30
359 /* Set by prologue expander and used by epilogue expander to determine
361 static int use_fast_prologue_epilogue
;
363 #define AT_BP(mode) (gen_rtx_MEM ((mode), hard_frame_pointer_rtx))
365 static const char *const hi_reg_name
[] = HI_REGISTER_NAMES
; /* names for 16 bit regs */
366 static const char *const qi_reg_name
[] = QI_REGISTER_NAMES
; /* names for 8 bit regs (low) */
367 static const char *const qi_high_reg_name
[] = QI_HIGH_REGISTER_NAMES
; /* names for 8 bit regs (high) */
369 /* Array of the smallest class containing reg number REGNO, indexed by
370 REGNO. Used by REGNO_REG_CLASS in i386.h. */
372 enum reg_class
const regclass_map
[FIRST_PSEUDO_REGISTER
] =
375 AREG
, DREG
, CREG
, BREG
,
377 SIREG
, DIREG
, NON_Q_REGS
, NON_Q_REGS
,
379 FP_TOP_REG
, FP_SECOND_REG
, FLOAT_REGS
, FLOAT_REGS
,
380 FLOAT_REGS
, FLOAT_REGS
, FLOAT_REGS
, FLOAT_REGS
,
383 /* flags, fpsr, dirflag, frame */
384 NO_REGS
, NO_REGS
, NO_REGS
, NON_Q_REGS
,
385 SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
,
387 MMX_REGS
, MMX_REGS
, MMX_REGS
, MMX_REGS
, MMX_REGS
, MMX_REGS
,
389 NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
,
390 NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
,
391 SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
,
395 /* The "default" register map used in 32bit mode. */
397 int const dbx_register_map
[FIRST_PSEUDO_REGISTER
] =
399 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
400 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
401 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
402 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
403 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
404 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
405 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
408 static int x86_64_int_parameter_registers
[6] = {5 /*RDI*/, 4 /*RSI*/,
409 1 /*RDX*/, 2 /*RCX*/,
410 FIRST_REX_INT_REG
/*R8 */,
411 FIRST_REX_INT_REG
+ 1 /*R9 */};
412 static int x86_64_int_return_registers
[4] = {0 /*RAX*/, 1 /*RDI*/, 5, 4};
414 /* The "default" register map used in 64bit mode. */
415 int const dbx64_register_map
[FIRST_PSEUDO_REGISTER
] =
417 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
418 33, 34, 35, 36, 37, 38, 39, 40 /* fp regs */
419 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
420 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
421 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
422 8,9,10,11,12,13,14,15, /* extended integer registers */
423 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
426 /* Define the register numbers to be used in Dwarf debugging information.
427 The SVR4 reference port C compiler uses the following register numbers
428 in its Dwarf output code:
429 0 for %eax (gcc regno = 0)
430 1 for %ecx (gcc regno = 2)
431 2 for %edx (gcc regno = 1)
432 3 for %ebx (gcc regno = 3)
433 4 for %esp (gcc regno = 7)
434 5 for %ebp (gcc regno = 6)
435 6 for %esi (gcc regno = 4)
436 7 for %edi (gcc regno = 5)
437 The following three DWARF register numbers are never generated by
438 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
439 believes these numbers have these meanings.
440 8 for %eip (no gcc equivalent)
441 9 for %eflags (gcc regno = 17)
442 10 for %trapno (no gcc equivalent)
443 It is not at all clear how we should number the FP stack registers
444 for the x86 architecture. If the version of SDB on x86/svr4 were
445 a bit less brain dead with respect to floating-point then we would
446 have a precedent to follow with respect to DWARF register numbers
447 for x86 FP registers, but the SDB on x86/svr4 is so completely
448 broken with respect to FP registers that it is hardly worth thinking
449 of it as something to strive for compatibility with.
450 The version of x86/svr4 SDB I have at the moment does (partially)
451 seem to believe that DWARF register number 11 is associated with
452 the x86 register %st(0), but that's about all. Higher DWARF
453 register numbers don't seem to be associated with anything in
454 particular, and even for DWARF regno 11, SDB only seems to under-
455 stand that it should say that a variable lives in %st(0) (when
456 asked via an `=' command) if we said it was in DWARF regno 11,
457 but SDB still prints garbage when asked for the value of the
458 variable in question (via a `/' command).
459 (Also note that the labels SDB prints for various FP stack regs
460 when doing an `x' command are all wrong.)
461 Note that these problems generally don't affect the native SVR4
462 C compiler because it doesn't allow the use of -O with -g and
463 because when it is *not* optimizing, it allocates a memory
464 location for each floating-point variable, and the memory
465 location is what gets described in the DWARF AT_location
466 attribute for the variable in question.
467 Regardless of the severe mental illness of the x86/svr4 SDB, we
468 do something sensible here and we use the following DWARF
469 register numbers. Note that these are all stack-top-relative
471 11 for %st(0) (gcc regno = 8)
472 12 for %st(1) (gcc regno = 9)
473 13 for %st(2) (gcc regno = 10)
474 14 for %st(3) (gcc regno = 11)
475 15 for %st(4) (gcc regno = 12)
476 16 for %st(5) (gcc regno = 13)
477 17 for %st(6) (gcc regno = 14)
478 18 for %st(7) (gcc regno = 15)
480 int const svr4_dbx_register_map
[FIRST_PSEUDO_REGISTER
] =
482 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
483 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
484 -1, 9, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
485 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
486 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
487 -1, -1, -1, -1, -1, -1, -1, -1, /* extemded integer registers */
488 -1, -1, -1, -1, -1, -1, -1, -1, /* extemded SSE registers */
491 /* Test and compare insns in i386.md store the information needed to
492 generate branch and scc insns here. */
494 struct rtx_def
*ix86_compare_op0
= NULL_RTX
;
495 struct rtx_def
*ix86_compare_op1
= NULL_RTX
;
497 #define MAX_386_STACK_LOCALS 3
498 /* Size of the register save area. */
499 #define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16)
501 /* Define the structure for the machine field in struct function. */
502 struct machine_function
504 rtx stack_locals
[(int) MAX_MACHINE_MODE
][MAX_386_STACK_LOCALS
];
505 int save_varrargs_registers
;
506 int accesses_prev_frame
;
509 #define ix86_stack_locals (cfun->machine->stack_locals)
510 #define ix86_save_varrargs_registers (cfun->machine->save_varrargs_registers)
512 /* Structure describing stack frame layout.
513 Stack grows downward:
519 saved frame pointer if frame_pointer_needed
520 <- HARD_FRAME_POINTER
526 > to_allocate <- FRAME_POINTER
538 int outgoing_arguments_size
;
541 HOST_WIDE_INT to_allocate
;
542 /* The offsets relative to ARG_POINTER. */
543 HOST_WIDE_INT frame_pointer_offset
;
544 HOST_WIDE_INT hard_frame_pointer_offset
;
545 HOST_WIDE_INT stack_pointer_offset
;
548 /* Code model option as passed by user. */
549 const char *ix86_cmodel_string
;
551 enum cmodel ix86_cmodel
;
553 /* which cpu are we scheduling for */
554 enum processor_type ix86_cpu
;
556 /* which instruction set architecture to use. */
559 /* Strings to hold which cpu and instruction set architecture to use. */
560 const char *ix86_cpu_string
; /* for -mcpu=<xxx> */
561 const char *ix86_arch_string
; /* for -march=<xxx> */
563 /* # of registers to use to pass arguments. */
564 const char *ix86_regparm_string
;
566 /* ix86_regparm_string as a number */
569 /* Alignment to use for loops and jumps: */
571 /* Power of two alignment for loops. */
572 const char *ix86_align_loops_string
;
574 /* Power of two alignment for non-loop jumps. */
575 const char *ix86_align_jumps_string
;
577 /* Power of two alignment for stack boundary in bytes. */
578 const char *ix86_preferred_stack_boundary_string
;
580 /* Preferred alignment for stack boundary in bits. */
581 int ix86_preferred_stack_boundary
;
583 /* Values 1-5: see jump.c */
584 int ix86_branch_cost
;
585 const char *ix86_branch_cost_string
;
587 /* Power of two alignment for functions. */
588 const char *ix86_align_funcs_string
;
590 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
591 static char internal_label_prefix
[16];
592 static int internal_label_prefix_len
;
594 static int local_symbolic_operand
PARAMS ((rtx
, enum machine_mode
));
595 static void output_pic_addr_const
PARAMS ((FILE *, rtx
, int));
596 static void put_condition_code
PARAMS ((enum rtx_code
, enum machine_mode
,
598 static rtx ix86_expand_int_compare
PARAMS ((enum rtx_code
, rtx
, rtx
));
599 static enum rtx_code ix86_prepare_fp_compare_args
PARAMS ((enum rtx_code
,
601 static rtx gen_push
PARAMS ((rtx
));
602 static int memory_address_length
PARAMS ((rtx addr
));
603 static int ix86_flags_dependant
PARAMS ((rtx
, rtx
, enum attr_type
));
604 static int ix86_agi_dependant
PARAMS ((rtx
, rtx
, enum attr_type
));
605 static int ix86_safe_length
PARAMS ((rtx
));
606 static enum attr_memory ix86_safe_memory
PARAMS ((rtx
));
607 static enum attr_pent_pair ix86_safe_pent_pair
PARAMS ((rtx
));
608 static enum attr_ppro_uops ix86_safe_ppro_uops
PARAMS ((rtx
));
609 static void ix86_dump_ppro_packet
PARAMS ((FILE *));
610 static void ix86_reorder_insn
PARAMS ((rtx
*, rtx
*));
611 static rtx
* ix86_pent_find_pair
PARAMS ((rtx
*, rtx
*, enum attr_pent_pair
,
613 static void ix86_init_machine_status
PARAMS ((struct function
*));
614 static void ix86_mark_machine_status
PARAMS ((struct function
*));
615 static void ix86_free_machine_status
PARAMS ((struct function
*));
616 static int ix86_split_to_parts
PARAMS ((rtx
, rtx
*, enum machine_mode
));
617 static int ix86_safe_length_prefix
PARAMS ((rtx
));
618 static int ix86_nsaved_regs
PARAMS((void));
619 static void ix86_emit_save_regs
PARAMS((void));
620 static void ix86_emit_save_regs_using_mov
PARAMS ((rtx
, HOST_WIDE_INT
));
621 static void ix86_emit_restore_regs_using_mov
PARAMS ((rtx
, int, int));
622 static void ix86_set_move_mem_attrs_1
PARAMS ((rtx
, rtx
, rtx
, rtx
, rtx
));
623 static void ix86_sched_reorder_pentium
PARAMS((rtx
*, rtx
*));
624 static void ix86_sched_reorder_ppro
PARAMS((rtx
*, rtx
*));
625 static HOST_WIDE_INT ix86_GOT_alias_set
PARAMS ((void));
626 static void ix86_adjust_counter
PARAMS ((rtx
, HOST_WIDE_INT
));
627 static rtx ix86_expand_aligntest
PARAMS ((rtx
, int));
628 static void ix86_expand_strlensi_unroll_1
PARAMS ((rtx
, rtx
));
629 static int ix86_issue_rate
PARAMS ((void));
630 static int ix86_adjust_cost
PARAMS ((rtx
, rtx
, rtx
, int));
631 static void ix86_sched_init
PARAMS ((FILE *, int, int));
632 static int ix86_sched_reorder
PARAMS ((FILE *, int, rtx
*, int *, int));
633 static int ix86_variable_issue
PARAMS ((FILE *, int, rtx
, int));
637 rtx base
, index
, disp
;
641 static int ix86_decompose_address
PARAMS ((rtx
, struct ix86_address
*));
643 struct builtin_description
;
644 static rtx ix86_expand_sse_comi
PARAMS ((struct builtin_description
*, tree
,
646 static rtx ix86_expand_sse_compare
PARAMS ((struct builtin_description
*, tree
,
648 static rtx ix86_expand_unop1_builtin
PARAMS ((enum insn_code
, tree
, rtx
));
649 static rtx ix86_expand_unop_builtin
PARAMS ((enum insn_code
, tree
, rtx
, int));
650 static rtx ix86_expand_binop_builtin
PARAMS ((enum insn_code
, tree
, rtx
));
651 static rtx ix86_expand_store_builtin
PARAMS ((enum insn_code
, tree
, int));
652 static rtx safe_vector_operand
PARAMS ((rtx
, enum machine_mode
));
653 static enum rtx_code ix86_fp_compare_code_to_integer
PARAMS ((enum rtx_code
));
654 static void ix86_fp_comparison_codes
PARAMS ((enum rtx_code code
,
658 static rtx ix86_expand_fp_compare
PARAMS ((enum rtx_code
, rtx
, rtx
, rtx
,
660 static int ix86_fp_comparison_arithmetics_cost
PARAMS ((enum rtx_code code
));
661 static int ix86_fp_comparison_fcomi_cost
PARAMS ((enum rtx_code code
));
662 static int ix86_fp_comparison_sahf_cost
PARAMS ((enum rtx_code code
));
663 static int ix86_fp_comparison_cost
PARAMS ((enum rtx_code code
));
664 static int ix86_save_reg
PARAMS ((int, int));
665 static void ix86_compute_frame_layout
PARAMS ((struct ix86_frame
*));
666 static int ix86_comp_type_attributes
PARAMS ((tree
, tree
));
667 const struct attribute_spec ix86_attribute_table
[];
668 static tree ix86_handle_cdecl_attribute
PARAMS ((tree
*, tree
, tree
, int, bool *));
669 static tree ix86_handle_regparm_attribute
PARAMS ((tree
*, tree
, tree
, int, bool *));
671 #ifdef DO_GLOBAL_CTORS_BODY
672 static void ix86_svr3_asm_out_constructor
PARAMS ((rtx
, int));
674 #if defined(TARGET_ELF) && defined(TARGET_COFF)
675 static void sco_asm_named_section
PARAMS ((const char *, unsigned int));
676 static void sco_asm_out_constructor
PARAMS ((rtx
, int));
678 /* Register class used for passing given 64bit part of the argument.
679 These represent classes as documented by the PS ABI, with the exception
680 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
681 use SF or DFmode move instead of DImode to avoid reformating penalties.
683 Similary we play games with INTEGERSI_CLASS to use cheaper SImode moves
684 whenever possible (upper half does contain padding).
686 enum x86_64_reg_class
689 X86_64_INTEGER_CLASS
,
690 X86_64_INTEGERSI_CLASS
,
699 const char * const x86_64_reg_class_name
[] =
700 {"no", "integer", "integerSI", "sse", "sseSF", "sseDF", "sseup", "x87", "x87up", "no"};
702 #define MAX_CLASSES 4
703 static int classify_argument
PARAMS ((enum machine_mode
, tree
,
704 enum x86_64_reg_class
[MAX_CLASSES
],
706 static int examine_argument
PARAMS ((enum machine_mode
, tree
, int, int *,
708 static rtx construct_container
PARAMS ((enum machine_mode
, tree
, int, int, int,
710 static enum x86_64_reg_class merge_classes
PARAMS ((enum x86_64_reg_class
,
711 enum x86_64_reg_class
));
713 /* Initialize the GCC target structure. */
714 #undef TARGET_ATTRIBUTE_TABLE
715 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
716 #ifdef TARGET_DLLIMPORT_DECL_ATTRIBUTES
717 # undef TARGET_MERGE_DECL_ATTRIBUTES
718 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
721 #undef TARGET_COMP_TYPE_ATTRIBUTES
722 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
724 #undef TARGET_INIT_BUILTINS
725 #define TARGET_INIT_BUILTINS ix86_init_builtins
727 #undef TARGET_EXPAND_BUILTIN
728 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
730 #if defined (OSF_OS) || defined (TARGET_OSF1ELF)
731 static void ix86_osf_output_function_prologue
PARAMS ((FILE *,
733 # undef TARGET_ASM_FUNCTION_PROLOGUE
734 # define TARGET_ASM_FUNCTION_PROLOGUE ix86_osf_output_function_prologue
737 #undef TARGET_ASM_OPEN_PAREN
738 #define TARGET_ASM_OPEN_PAREN ""
739 #undef TARGET_ASM_CLOSE_PAREN
740 #define TARGET_ASM_CLOSE_PAREN ""
742 #undef TARGET_SCHED_ADJUST_COST
743 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
744 #undef TARGET_SCHED_ISSUE_RATE
745 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
746 #undef TARGET_SCHED_VARIABLE_ISSUE
747 #define TARGET_SCHED_VARIABLE_ISSUE ix86_variable_issue
748 #undef TARGET_SCHED_INIT
749 #define TARGET_SCHED_INIT ix86_sched_init
750 #undef TARGET_SCHED_REORDER
751 #define TARGET_SCHED_REORDER ix86_sched_reorder
753 struct gcc_target targetm
= TARGET_INITIALIZER
;
755 /* Sometimes certain combinations of command options do not make
756 sense on a particular target machine. You can define a macro
757 `OVERRIDE_OPTIONS' to take account of this. This macro, if
758 defined, is executed once just after all the command options have
761 Don't use this macro to turn on various extra optimizations for
762 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
768 /* Comes from final.c -- no real reason to change it. */
769 #define MAX_CODE_ALIGN 16
773 struct processor_costs
*cost
; /* Processor costs */
774 int target_enable
; /* Target flags to enable. */
775 int target_disable
; /* Target flags to disable. */
776 int align_loop
; /* Default alignments. */
781 const processor_target_table
[PROCESSOR_max
] =
783 {&i386_cost
, 0, 0, 2, 2, 2, 1},
784 {&i486_cost
, 0, 0, 4, 4, 4, 1},
785 {&pentium_cost
, 0, 0, -4, -4, -4, 1},
786 {&pentiumpro_cost
, 0, 0, 4, -4, 4, 1},
787 {&k6_cost
, 0, 0, -5, -5, 4, 1},
788 {&athlon_cost
, 0, 0, 4, -4, 4, 1},
789 {&pentium4_cost
, 0, 0, 2, 2, 2, 1}
794 const char *name
; /* processor name or nickname. */
795 enum processor_type processor
;
797 const processor_alias_table
[] =
799 {"i386", PROCESSOR_I386
},
800 {"i486", PROCESSOR_I486
},
801 {"i586", PROCESSOR_PENTIUM
},
802 {"pentium", PROCESSOR_PENTIUM
},
803 {"i686", PROCESSOR_PENTIUMPRO
},
804 {"pentiumpro", PROCESSOR_PENTIUMPRO
},
805 {"k6", PROCESSOR_K6
},
806 {"athlon", PROCESSOR_ATHLON
},
807 {"pentium4", PROCESSOR_PENTIUM4
},
810 int const pta_size
= sizeof (processor_alias_table
) / sizeof (struct pta
);
812 #ifdef SUBTARGET_OVERRIDE_OPTIONS
813 SUBTARGET_OVERRIDE_OPTIONS
;
816 ix86_arch
= PROCESSOR_I386
;
817 ix86_cpu
= (enum processor_type
) TARGET_CPU_DEFAULT
;
819 if (ix86_cmodel_string
!= 0)
821 if (!strcmp (ix86_cmodel_string
, "small"))
822 ix86_cmodel
= flag_pic
? CM_SMALL_PIC
: CM_SMALL
;
824 sorry ("Code model %s not supported in PIC mode", ix86_cmodel_string
);
825 else if (!strcmp (ix86_cmodel_string
, "32"))
827 else if (!strcmp (ix86_cmodel_string
, "kernel") && !flag_pic
)
828 ix86_cmodel
= CM_KERNEL
;
829 else if (!strcmp (ix86_cmodel_string
, "medium") && !flag_pic
)
830 ix86_cmodel
= CM_MEDIUM
;
831 else if (!strcmp (ix86_cmodel_string
, "large") && !flag_pic
)
832 ix86_cmodel
= CM_LARGE
;
834 error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string
);
840 ix86_cmodel
= flag_pic
? CM_SMALL_PIC
: CM_SMALL
;
842 if ((TARGET_64BIT
== 0) != (ix86_cmodel
== CM_32
))
843 error ("Code model `%s' not supported in the %s bit mode.",
844 ix86_cmodel_string
, TARGET_64BIT
? "64" : "32");
845 if (ix86_cmodel
== CM_LARGE
)
846 sorry ("Code model `large' not supported yet.");
847 if ((TARGET_64BIT
!= 0) != ((target_flags
& MASK_64BIT
) != 0))
848 sorry ("%i-bit mode not compiled in.",
849 (target_flags
& MASK_64BIT
) ? 64 : 32);
851 if (ix86_arch_string
!= 0)
853 for (i
= 0; i
< pta_size
; i
++)
854 if (! strcmp (ix86_arch_string
, processor_alias_table
[i
].name
))
856 ix86_arch
= processor_alias_table
[i
].processor
;
857 /* Default cpu tuning to the architecture. */
858 ix86_cpu
= ix86_arch
;
863 error ("bad value (%s) for -march= switch", ix86_arch_string
);
866 if (ix86_cpu_string
!= 0)
868 for (i
= 0; i
< pta_size
; i
++)
869 if (! strcmp (ix86_cpu_string
, processor_alias_table
[i
].name
))
871 ix86_cpu
= processor_alias_table
[i
].processor
;
875 error ("bad value (%s) for -mcpu= switch", ix86_cpu_string
);
879 ix86_cost
= &size_cost
;
881 ix86_cost
= processor_target_table
[ix86_cpu
].cost
;
882 target_flags
|= processor_target_table
[ix86_cpu
].target_enable
;
883 target_flags
&= ~processor_target_table
[ix86_cpu
].target_disable
;
885 /* Arrange to set up i386_stack_locals for all functions. */
886 init_machine_status
= ix86_init_machine_status
;
887 mark_machine_status
= ix86_mark_machine_status
;
888 free_machine_status
= ix86_free_machine_status
;
890 /* Validate -mregparm= value. */
891 if (ix86_regparm_string
)
893 i
= atoi (ix86_regparm_string
);
894 if (i
< 0 || i
> REGPARM_MAX
)
895 error ("-mregparm=%d is not between 0 and %d", i
, REGPARM_MAX
);
901 ix86_regparm
= REGPARM_MAX
;
903 /* If the user has provided any of the -malign-* options,
904 warn and use that value only if -falign-* is not set.
905 Remove this code in GCC 3.2 or later. */
906 if (ix86_align_loops_string
)
908 warning ("-malign-loops is obsolete, use -falign-loops");
909 if (align_loops
== 0)
911 i
= atoi (ix86_align_loops_string
);
912 if (i
< 0 || i
> MAX_CODE_ALIGN
)
913 error ("-malign-loops=%d is not between 0 and %d", i
, MAX_CODE_ALIGN
);
915 align_loops
= 1 << i
;
919 if (ix86_align_jumps_string
)
921 warning ("-malign-jumps is obsolete, use -falign-jumps");
922 if (align_jumps
== 0)
924 i
= atoi (ix86_align_jumps_string
);
925 if (i
< 0 || i
> MAX_CODE_ALIGN
)
926 error ("-malign-loops=%d is not between 0 and %d", i
, MAX_CODE_ALIGN
);
928 align_jumps
= 1 << i
;
932 if (ix86_align_funcs_string
)
934 warning ("-malign-functions is obsolete, use -falign-functions");
935 if (align_functions
== 0)
937 i
= atoi (ix86_align_funcs_string
);
938 if (i
< 0 || i
> MAX_CODE_ALIGN
)
939 error ("-malign-loops=%d is not between 0 and %d", i
, MAX_CODE_ALIGN
);
941 align_functions
= 1 << i
;
945 /* Default align_* from the processor table. */
946 #define abs(n) (n < 0 ? -n : n)
947 if (align_loops
== 0)
948 align_loops
= 1 << abs (processor_target_table
[ix86_cpu
].align_loop
);
949 if (align_jumps
== 0)
950 align_jumps
= 1 << abs (processor_target_table
[ix86_cpu
].align_jump
);
951 if (align_functions
== 0)
952 align_functions
= 1 << abs (processor_target_table
[ix86_cpu
].align_func
);
954 /* Validate -mpreferred-stack-boundary= value, or provide default.
955 The default of 128 bits is for Pentium III's SSE __m128. */
956 ix86_preferred_stack_boundary
= 128;
957 if (ix86_preferred_stack_boundary_string
)
959 i
= atoi (ix86_preferred_stack_boundary_string
);
960 if (i
< (TARGET_64BIT
? 3 : 2) || i
> 31)
961 error ("-mpreferred-stack-boundary=%d is not between %d and 31", i
,
962 TARGET_64BIT
? 3 : 2);
964 ix86_preferred_stack_boundary
= (1 << i
) * BITS_PER_UNIT
;
967 /* Validate -mbranch-cost= value, or provide default. */
968 ix86_branch_cost
= processor_target_table
[ix86_cpu
].branch_cost
;
969 if (ix86_branch_cost_string
)
971 i
= atoi (ix86_branch_cost_string
);
973 error ("-mbranch-cost=%d is not between 0 and 5", i
);
975 ix86_branch_cost
= i
;
978 /* Keep nonleaf frame pointers. */
979 if (TARGET_OMIT_LEAF_FRAME_POINTER
)
980 flag_omit_frame_pointer
= 1;
982 /* If we're doing fast math, we don't care about comparison order
983 wrt NaNs. This lets us use a shorter comparison sequence. */
984 if (flag_unsafe_math_optimizations
)
985 target_flags
&= ~MASK_IEEE_FP
;
989 if (TARGET_ALIGN_DOUBLE
)
990 error ("-malign-double makes no sense in the 64bit mode.");
992 error ("-mrtd calling convention not supported in the 64bit mode.");
993 /* Enable by default the SSE and MMX builtins. */
994 target_flags
|= MASK_SSE2
| MASK_SSE
| MASK_MMX
| MASK_128BIT_LONG_DOUBLE
;
997 /* It makes no sense to ask for just SSE builtins, so MMX is also turned
1000 target_flags
|= MASK_MMX
;
1002 /* If it has 3DNow! it also has MMX so MMX is also turned on by -m3dnow */
1005 target_flags
|= MASK_MMX
;
1006 /* If we are targetting the Athlon architecture, enable the 3Dnow/MMX
1007 extensions it adds. */
1008 if (x86_3dnow_a
& (1 << ix86_arch
))
1009 target_flags
|= MASK_3DNOW_A
;
1011 if ((x86_accumulate_outgoing_args
& CPUMASK
)
1012 && !(target_flags
& MASK_NO_ACCUMULATE_OUTGOING_ARGS
)
1014 target_flags
|= MASK_ACCUMULATE_OUTGOING_ARGS
;
1016 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
1019 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix
, "LX", 0);
1020 p
= strchr (internal_label_prefix
, 'X');
1021 internal_label_prefix_len
= p
- internal_label_prefix
;
1027 optimization_options (level
, size
)
1029 int size ATTRIBUTE_UNUSED
;
1031 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
1032 make the problem with not enough registers even worse. */
1033 #ifdef INSN_SCHEDULING
1035 flag_schedule_insns
= 0;
1037 if (TARGET_64BIT
&& optimize
>= 1)
1038 flag_omit_frame_pointer
= 1;
1040 flag_pcc_struct_return
= 0;
1043 /* Table of valid machine attributes. */
1044 const struct attribute_spec ix86_attribute_table
[] =
1046 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
1047 /* Stdcall attribute says callee is responsible for popping arguments
1048 if they are not variable. */
1049 { "stdcall", 0, 0, false, true, true, ix86_handle_cdecl_attribute
},
1050 /* Cdecl attribute says the callee is a normal C declaration */
1051 { "cdecl", 0, 0, false, true, true, ix86_handle_cdecl_attribute
},
1052 /* Regparm attribute specifies how many integer arguments are to be
1053 passed in registers. */
1054 { "regparm", 1, 1, false, true, true, ix86_handle_regparm_attribute
},
1055 #ifdef TARGET_DLLIMPORT_DECL_ATTRIBUTES
1056 { "dllimport", 1, 1, false, false, false, ix86_handle_dll_attribute
},
1057 { "dllexport", 1, 1, false, false, false, ix86_handle_dll_attribute
},
1058 { "shared", 1, 1, true, false, false, ix86_handle_shared_attribute
},
1060 { NULL
, 0, 0, false, false, false, NULL
}
1063 /* Handle a "cdecl" or "stdcall" attribute;
1064 arguments as in struct attribute_spec.handler. */
1066 ix86_handle_cdecl_attribute (node
, name
, args
, flags
, no_add_attrs
)
1069 tree args ATTRIBUTE_UNUSED
;
1070 int flags ATTRIBUTE_UNUSED
;
1073 if (TREE_CODE (*node
) != FUNCTION_TYPE
1074 && TREE_CODE (*node
) != METHOD_TYPE
1075 && TREE_CODE (*node
) != FIELD_DECL
1076 && TREE_CODE (*node
) != TYPE_DECL
)
1078 warning ("`%s' attribute only applies to functions",
1079 IDENTIFIER_POINTER (name
));
1080 *no_add_attrs
= true;
1085 warning ("`%s' attribute ignored", IDENTIFIER_POINTER (name
));
1086 *no_add_attrs
= true;
1092 /* Handle a "regparm" attribute;
1093 arguments as in struct attribute_spec.handler. */
1095 ix86_handle_regparm_attribute (node
, name
, args
, flags
, no_add_attrs
)
1099 int flags ATTRIBUTE_UNUSED
;
1102 if (TREE_CODE (*node
) != FUNCTION_TYPE
1103 && TREE_CODE (*node
) != METHOD_TYPE
1104 && TREE_CODE (*node
) != FIELD_DECL
1105 && TREE_CODE (*node
) != TYPE_DECL
)
1107 warning ("`%s' attribute only applies to functions",
1108 IDENTIFIER_POINTER (name
));
1109 *no_add_attrs
= true;
1115 cst
= TREE_VALUE (args
);
1116 if (TREE_CODE (cst
) != INTEGER_CST
)
1118 warning ("`%s' attribute requires an integer constant argument",
1119 IDENTIFIER_POINTER (name
));
1120 *no_add_attrs
= true;
1122 else if (compare_tree_int (cst
, REGPARM_MAX
) > 0)
1124 warning ("argument to `%s' attribute larger than %d",
1125 IDENTIFIER_POINTER (name
), REGPARM_MAX
);
1126 *no_add_attrs
= true;
1133 #if defined (OSF_OS) || defined (TARGET_OSF1ELF)
1135 /* Generate the assembly code for function entry. FILE is a stdio
1136 stream to output the code to. SIZE is an int: how many units of
1137 temporary storage to allocate.
1139 Refer to the array `regs_ever_live' to determine which registers to
1140 save; `regs_ever_live[I]' is nonzero if register number I is ever
1141 used in the function. This function is responsible for knowing
1142 which registers should not be saved even if used.
1144 We override it here to allow for the new profiling code to go before
1145 the prologue and the old mcount code to go after the prologue (and
1146 after %ebx has been set up for ELF shared library support). */
1149 ix86_osf_output_function_prologue (file
, size
)
1154 char *lprefix
= LPREFIX
;
1155 int labelno
= profile_label_no
;
1159 if (TARGET_UNDERSCORES
)
1162 if (profile_flag
&& OSF_PROFILE_BEFORE_PROLOGUE
)
1164 if (!flag_pic
&& !HALF_PIC_P ())
1166 fprintf (file
, "\tmovl $%sP%d,%%edx\n", lprefix
, labelno
);
1167 fprintf (file
, "\tcall *%s_mcount_ptr\n", prefix
);
1170 else if (HALF_PIC_P ())
1174 HALF_PIC_EXTERNAL ("_mcount_ptr");
1175 symref
= HALF_PIC_PTR (gen_rtx_SYMBOL_REF (Pmode
,
1178 fprintf (file
, "\tmovl $%sP%d,%%edx\n", lprefix
, labelno
);
1179 fprintf (file
, "\tmovl %s%s,%%eax\n", prefix
,
1181 fprintf (file
, "\tcall *(%%eax)\n");
1186 static int call_no
= 0;
1188 fprintf (file
, "\tcall %sPc%d\n", lprefix
, call_no
);
1189 fprintf (file
, "%sPc%d:\tpopl %%eax\n", lprefix
, call_no
);
1190 fprintf (file
, "\taddl $_GLOBAL_OFFSET_TABLE_+[.-%sPc%d],%%eax\n",
1191 lprefix
, call_no
++);
1192 fprintf (file
, "\tleal %sP%d@GOTOFF(%%eax),%%edx\n",
1194 fprintf (file
, "\tmovl %s_mcount_ptr@GOT(%%eax),%%eax\n",
1196 fprintf (file
, "\tcall *(%%eax)\n");
1202 if (profile_flag
&& OSF_PROFILE_BEFORE_PROLOGUE
)
1206 fprintf (file
, "\tmovl $%sP%d,%%edx\n", lprefix
, labelno
);
1207 fprintf (file
, "\tcall *%s_mcount_ptr\n", prefix
);
1212 static int call_no
= 0;
1214 fprintf (file
, "\tcall %sPc%d\n", lprefix
, call_no
);
1215 fprintf (file
, "%sPc%d:\tpopl %%eax\n", lprefix
, call_no
);
1216 fprintf (file
, "\taddl $_GLOBAL_OFFSET_TABLE_+[.-%sPc%d],%%eax\n",
1217 lprefix
, call_no
++);
1218 fprintf (file
, "\tleal %sP%d@GOTOFF(%%eax),%%edx\n",
1220 fprintf (file
, "\tmovl %s_mcount_ptr@GOT(%%eax),%%eax\n",
1222 fprintf (file
, "\tcall *(%%eax)\n");
1225 #endif /* !OSF_OS */
1227 function_prologue (file
, size
);
1230 #endif /* OSF_OS || TARGET_OSF1ELF */
1232 /* Return 0 if the attributes for two types are incompatible, 1 if they
1233 are compatible, and 2 if they are nearly compatible (which causes a
1234 warning to be generated). */
1237 ix86_comp_type_attributes (type1
, type2
)
1241 /* Check for mismatch of non-default calling convention. */
1242 const char *const rtdstr
= TARGET_RTD
? "cdecl" : "stdcall";
1244 if (TREE_CODE (type1
) != FUNCTION_TYPE
)
1247 /* Check for mismatched return types (cdecl vs stdcall). */
1248 if (!lookup_attribute (rtdstr
, TYPE_ATTRIBUTES (type1
))
1249 != !lookup_attribute (rtdstr
, TYPE_ATTRIBUTES (type2
)))
1254 /* Value is the number of bytes of arguments automatically
1255 popped when returning from a subroutine call.
1256 FUNDECL is the declaration node of the function (as a tree),
1257 FUNTYPE is the data type of the function (as a tree),
1258 or for a library call it is an identifier node for the subroutine name.
1259 SIZE is the number of bytes of arguments passed on the stack.
1261 On the 80386, the RTD insn may be used to pop them if the number
1262 of args is fixed, but if the number is variable then the caller
1263 must pop them all. RTD can't be used for library calls now
1264 because the library is compiled with the Unix compiler.
1265 Use of RTD is a selectable option, since it is incompatible with
1266 standard Unix calling sequences. If the option is not selected,
1267 the caller must always pop the args.
1269 The attribute stdcall is equivalent to RTD on a per module basis. */
1272 ix86_return_pops_args (fundecl
, funtype
, size
)
1277 int rtd
= TARGET_RTD
&& (!fundecl
|| TREE_CODE (fundecl
) != IDENTIFIER_NODE
);
1279 /* Cdecl functions override -mrtd, and never pop the stack. */
1280 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype
))) {
1282 /* Stdcall functions will pop the stack if not variable args. */
1283 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype
)))
1287 && (TYPE_ARG_TYPES (funtype
) == NULL_TREE
1288 || (TREE_VALUE (tree_last (TYPE_ARG_TYPES (funtype
)))
1289 == void_type_node
)))
1293 /* Lose any fake structure return argument. */
1294 if (aggregate_value_p (TREE_TYPE (funtype
))
1296 return GET_MODE_SIZE (Pmode
);
1301 /* Argument support functions. */
1303 /* Return true when register may be used to pass function parameters. */
1305 ix86_function_arg_regno_p (regno
)
1310 return regno
< REGPARM_MAX
|| (TARGET_SSE
&& SSE_REGNO_P (regno
));
1311 if (SSE_REGNO_P (regno
) && TARGET_SSE
)
1313 /* RAX is used as hidden argument to va_arg functions. */
1316 for (i
= 0; i
< REGPARM_MAX
; i
++)
1317 if (regno
== x86_64_int_parameter_registers
[i
])
1322 /* Initialize a variable CUM of type CUMULATIVE_ARGS
1323 for a call to a function whose data type is FNTYPE.
1324 For a library call, FNTYPE is 0. */
1327 init_cumulative_args (cum
, fntype
, libname
)
1328 CUMULATIVE_ARGS
*cum
; /* Argument info to initialize */
1329 tree fntype
; /* tree ptr for function decl */
1330 rtx libname
; /* SYMBOL_REF of library name or 0 */
1332 static CUMULATIVE_ARGS zero_cum
;
1333 tree param
, next_param
;
1335 if (TARGET_DEBUG_ARG
)
1337 fprintf (stderr
, "\ninit_cumulative_args (");
1339 fprintf (stderr
, "fntype code = %s, ret code = %s",
1340 tree_code_name
[(int) TREE_CODE (fntype
)],
1341 tree_code_name
[(int) TREE_CODE (TREE_TYPE (fntype
))]);
1343 fprintf (stderr
, "no fntype");
1346 fprintf (stderr
, ", libname = %s", XSTR (libname
, 0));
1351 /* Set up the number of registers to use for passing arguments. */
1352 cum
->nregs
= ix86_regparm
;
1353 cum
->sse_nregs
= SSE_REGPARM_MAX
;
1354 if (fntype
&& !TARGET_64BIT
)
1356 tree attr
= lookup_attribute ("regparm", TYPE_ATTRIBUTES (fntype
));
1359 cum
->nregs
= TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr
)));
1361 cum
->maybe_vaarg
= false;
1363 /* Determine if this function has variable arguments. This is
1364 indicated by the last argument being 'void_type_mode' if there
1365 are no variable arguments. If there are variable arguments, then
1366 we won't pass anything in registers */
1370 for (param
= (fntype
) ? TYPE_ARG_TYPES (fntype
) : 0;
1371 param
!= 0; param
= next_param
)
1373 next_param
= TREE_CHAIN (param
);
1374 if (next_param
== 0 && TREE_VALUE (param
) != void_type_node
)
1378 cum
->maybe_vaarg
= true;
1382 if ((!fntype
&& !libname
)
1383 || (fntype
&& !TYPE_ARG_TYPES (fntype
)))
1384 cum
->maybe_vaarg
= 1;
1386 if (TARGET_DEBUG_ARG
)
1387 fprintf (stderr
, ", nregs=%d )\n", cum
->nregs
);
1392 /* x86-64 register passing impleemntation. See x86-64 ABI for details. Goal
1393 of this code is to classify each 8bytes of incomming argument by the register
1394 class and assign registers accordingly. */
1396 /* Return the union class of CLASS1 and CLASS2.
1397 See the x86-64 PS ABI for details. */
1399 static enum x86_64_reg_class
1400 merge_classes (class1
, class2
)
1401 enum x86_64_reg_class class1
, class2
;
1403 /* Rule #1: If both classes are equal, this is the resulting class. */
1404 if (class1
== class2
)
1407 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
1409 if (class1
== X86_64_NO_CLASS
)
1411 if (class2
== X86_64_NO_CLASS
)
1414 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
1415 if (class1
== X86_64_MEMORY_CLASS
|| class2
== X86_64_MEMORY_CLASS
)
1416 return X86_64_MEMORY_CLASS
;
1418 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
1419 if ((class1
== X86_64_INTEGERSI_CLASS
&& class2
== X86_64_SSESF_CLASS
)
1420 || (class2
== X86_64_INTEGERSI_CLASS
&& class1
== X86_64_SSESF_CLASS
))
1421 return X86_64_INTEGERSI_CLASS
;
1422 if (class1
== X86_64_INTEGER_CLASS
|| class1
== X86_64_INTEGERSI_CLASS
1423 || class2
== X86_64_INTEGER_CLASS
|| class2
== X86_64_INTEGERSI_CLASS
)
1424 return X86_64_INTEGER_CLASS
;
1426 /* Rule #5: If one of the classes is X87 or X87UP class, MEMORY is used. */
1427 if (class1
== X86_64_X87_CLASS
|| class1
== X86_64_X87UP_CLASS
1428 || class2
== X86_64_X87_CLASS
|| class2
== X86_64_X87UP_CLASS
)
1429 return X86_64_MEMORY_CLASS
;
1431 /* Rule #6: Otherwise class SSE is used. */
1432 return X86_64_SSE_CLASS
;
1435 /* Classify the argument of type TYPE and mode MODE.
1436 CLASSES will be filled by the register class used to pass each word
1437 of the operand. The number of words is returned. In case the parameter
1438 should be passed in memory, 0 is returned. As a special case for zero
1439 sized containers, classes[0] will be NO_CLASS and 1 is returned.
1441 BIT_OFFSET is used internally for handling records and specifies offset
1442 of the offset in bits modulo 256 to avoid overflow cases.
1444 See the x86-64 PS ABI for details.
1448 classify_argument (mode
, type
, classes
, bit_offset
)
1449 enum machine_mode mode
;
1451 enum x86_64_reg_class classes
[MAX_CLASSES
];
1455 (mode
== BLKmode
) ? int_size_in_bytes (type
) : (int) GET_MODE_SIZE (mode
);
1456 int words
= (bytes
+ UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
1458 if (type
&& AGGREGATE_TYPE_P (type
))
1462 enum x86_64_reg_class subclasses
[MAX_CLASSES
];
1464 /* On x86-64 we pass structures larger than 16 bytes on the stack. */
1468 for (i
= 0; i
< words
; i
++)
1469 classes
[i
] = X86_64_NO_CLASS
;
1471 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
1472 signalize memory class, so handle it as special case. */
1475 classes
[0] = X86_64_NO_CLASS
;
1479 /* Classify each field of record and merge classes. */
1480 if (TREE_CODE (type
) == RECORD_TYPE
)
1482 for (field
= TYPE_FIELDS (type
); field
; field
= TREE_CHAIN (field
))
1484 if (TREE_CODE (field
) == FIELD_DECL
)
1488 /* Bitfields are always classified as integer. Handle them
1489 early, since later code would consider them to be
1490 misaligned integers. */
1491 if (DECL_BIT_FIELD (field
))
1493 for (i
= int_bit_position (field
) / 8 / 8;
1494 i
< (int_bit_position (field
)
1495 + tree_low_cst (DECL_SIZE (field
), 0)
1498 merge_classes (X86_64_INTEGER_CLASS
,
1503 num
= classify_argument (TYPE_MODE (TREE_TYPE (field
)),
1504 TREE_TYPE (field
), subclasses
,
1505 (int_bit_position (field
)
1506 + bit_offset
) % 256);
1509 for (i
= 0; i
< num
; i
++)
1512 (int_bit_position (field
) + bit_offset
) / 8 / 8;
1514 merge_classes (subclasses
[i
], classes
[i
+ pos
]);
1520 /* Arrays are handled as small records. */
1521 else if (TREE_CODE (type
) == ARRAY_TYPE
)
1524 num
= classify_argument (TYPE_MODE (TREE_TYPE (type
)),
1525 TREE_TYPE (type
), subclasses
, bit_offset
);
1529 /* The partial classes are now full classes. */
1530 if (subclasses
[0] == X86_64_SSESF_CLASS
&& bytes
!= 4)
1531 subclasses
[0] = X86_64_SSE_CLASS
;
1532 if (subclasses
[0] == X86_64_INTEGERSI_CLASS
&& bytes
!= 4)
1533 subclasses
[0] = X86_64_INTEGER_CLASS
;
1535 for (i
= 0; i
< words
; i
++)
1536 classes
[i
] = subclasses
[i
% num
];
1538 /* Unions are similar to RECORD_TYPE but offset is always 0. */
1539 else if (TREE_CODE (type
) == UNION_TYPE
)
1541 for (field
= TYPE_FIELDS (type
); field
; field
= TREE_CHAIN (field
))
1543 if (TREE_CODE (field
) == FIELD_DECL
)
1546 num
= classify_argument (TYPE_MODE (TREE_TYPE (field
)),
1547 TREE_TYPE (field
), subclasses
,
1551 for (i
= 0; i
< num
; i
++)
1552 classes
[i
] = merge_classes (subclasses
[i
], classes
[i
]);
1559 /* Final merger cleanup. */
1560 for (i
= 0; i
< words
; i
++)
1562 /* If one class is MEMORY, everything should be passed in
1564 if (classes
[i
] == X86_64_MEMORY_CLASS
)
1567 /* The X86_64_SSEUP_CLASS should be always preceeded by
1568 X86_64_SSE_CLASS. */
1569 if (classes
[i
] == X86_64_SSEUP_CLASS
1570 && (i
== 0 || classes
[i
- 1] != X86_64_SSE_CLASS
))
1571 classes
[i
] = X86_64_SSE_CLASS
;
1573 /* X86_64_X87UP_CLASS should be preceeded by X86_64_X87_CLASS. */
1574 if (classes
[i
] == X86_64_X87UP_CLASS
1575 && (i
== 0 || classes
[i
- 1] != X86_64_X87_CLASS
))
1576 classes
[i
] = X86_64_SSE_CLASS
;
1581 /* Compute alignment needed. We align all types to natural boundaries with
1582 exception of XFmode that is aligned to 64bits. */
1583 if (mode
!= VOIDmode
&& mode
!= BLKmode
)
1585 int mode_alignment
= GET_MODE_BITSIZE (mode
);
1588 mode_alignment
= 128;
1589 else if (mode
== XCmode
)
1590 mode_alignment
= 256;
1591 /* Missalignmed fields are always returned in memory. */
1592 if (bit_offset
% mode_alignment
)
1596 /* Classification of atomic types. */
1606 if (bit_offset
+ GET_MODE_BITSIZE (mode
) <= 32)
1607 classes
[0] = X86_64_INTEGERSI_CLASS
;
1609 classes
[0] = X86_64_INTEGER_CLASS
;
1613 classes
[0] = classes
[1] = X86_64_INTEGER_CLASS
;
1616 classes
[0] = classes
[1] = X86_64_INTEGER_CLASS
;
1617 classes
[2] = classes
[3] = X86_64_INTEGER_CLASS
;
1620 if (!(bit_offset
% 64))
1621 classes
[0] = X86_64_SSESF_CLASS
;
1623 classes
[0] = X86_64_SSE_CLASS
;
1626 classes
[0] = X86_64_SSEDF_CLASS
;
1629 classes
[0] = X86_64_X87_CLASS
;
1630 classes
[1] = X86_64_X87UP_CLASS
;
1633 classes
[0] = X86_64_X87_CLASS
;
1634 classes
[1] = X86_64_X87UP_CLASS
;
1635 classes
[2] = X86_64_X87_CLASS
;
1636 classes
[3] = X86_64_X87UP_CLASS
;
1639 classes
[0] = X86_64_SSEDF_CLASS
;
1640 classes
[1] = X86_64_SSEDF_CLASS
;
1643 classes
[0] = X86_64_SSE_CLASS
;
1652 /* Examine the argument and return set number of register required in each
1653 class. Return 0 ifif parameter should be passed in memory. */
1655 examine_argument (mode
, type
, in_return
, int_nregs
, sse_nregs
)
1656 enum machine_mode mode
;
1658 int *int_nregs
, *sse_nregs
;
1661 enum x86_64_reg_class
class[MAX_CLASSES
];
1662 int n
= classify_argument (mode
, type
, class, 0);
1668 for (n
--; n
>= 0; n
--)
1671 case X86_64_INTEGER_CLASS
:
1672 case X86_64_INTEGERSI_CLASS
:
1675 case X86_64_SSE_CLASS
:
1676 case X86_64_SSESF_CLASS
:
1677 case X86_64_SSEDF_CLASS
:
1680 case X86_64_NO_CLASS
:
1681 case X86_64_SSEUP_CLASS
:
1683 case X86_64_X87_CLASS
:
1684 case X86_64_X87UP_CLASS
:
1688 case X86_64_MEMORY_CLASS
:
1693 /* Construct container for the argument used by GCC interface. See
1694 FUNCTION_ARG for the detailed description. */
1696 construct_container (mode
, type
, in_return
, nintregs
, nsseregs
, intreg
, sse_regno
)
1697 enum machine_mode mode
;
1700 int nintregs
, nsseregs
;
1701 int *intreg
, sse_regno
;
1703 enum machine_mode tmpmode
;
1705 (mode
== BLKmode
) ? int_size_in_bytes (type
) : (int) GET_MODE_SIZE (mode
);
1706 enum x86_64_reg_class
class[MAX_CLASSES
];
1710 int needed_sseregs
, needed_intregs
;
1711 rtx exp
[MAX_CLASSES
];
1714 n
= classify_argument (mode
, type
, class, 0);
1715 if (TARGET_DEBUG_ARG
)
1718 fprintf (stderr
, "Memory class\n");
1721 fprintf (stderr
, "Classes:");
1722 for (i
= 0; i
< n
; i
++)
1724 fprintf (stderr
, " %s", x86_64_reg_class_name
[class[i
]]);
1726 fprintf (stderr
, "\n");
1731 if (!examine_argument (mode
, type
, in_return
, &needed_intregs
, &needed_sseregs
))
1733 if (needed_intregs
> nintregs
|| needed_sseregs
> nsseregs
)
1736 /* First construct simple cases. Avoid SCmode, since we want to use
1737 single register to pass this type. */
1738 if (n
== 1 && mode
!= SCmode
)
1741 case X86_64_INTEGER_CLASS
:
1742 case X86_64_INTEGERSI_CLASS
:
1743 return gen_rtx_REG (mode
, intreg
[0]);
1744 case X86_64_SSE_CLASS
:
1745 case X86_64_SSESF_CLASS
:
1746 case X86_64_SSEDF_CLASS
:
1747 return gen_rtx_REG (mode
, SSE_REGNO (sse_regno
));
1748 case X86_64_X87_CLASS
:
1749 return gen_rtx_REG (mode
, FIRST_STACK_REG
);
1750 case X86_64_NO_CLASS
:
1751 /* Zero sized array, struct or class. */
1756 if (n
== 2 && class[0] == X86_64_SSE_CLASS
&& class[1] == X86_64_SSEUP_CLASS
)
1757 return gen_rtx_REG (TImode
, SSE_REGNO (sse_regno
));
1759 && class[0] == X86_64_X87_CLASS
&& class[1] == X86_64_X87UP_CLASS
)
1760 return gen_rtx_REG (TFmode
, FIRST_STACK_REG
);
1761 if (n
== 2 && class[0] == X86_64_INTEGER_CLASS
1762 && class[1] == X86_64_INTEGER_CLASS
1763 && (mode
== CDImode
|| mode
== TImode
)
1764 && intreg
[0] + 1 == intreg
[1])
1765 return gen_rtx_REG (mode
, intreg
[0]);
1767 && class[0] == X86_64_X87_CLASS
&& class[1] == X86_64_X87UP_CLASS
1768 && class[2] == X86_64_X87_CLASS
&& class[3] == X86_64_X87UP_CLASS
)
1769 return gen_rtx_REG (TCmode
, FIRST_STACK_REG
);
1771 /* Otherwise figure out the entries of the PARALLEL. */
1772 for (i
= 0; i
< n
; i
++)
1776 case X86_64_NO_CLASS
:
1778 case X86_64_INTEGER_CLASS
:
1779 case X86_64_INTEGERSI_CLASS
:
1780 /* Merge TImodes on aligned occassions here too. */
1781 if (i
* 8 + 8 > bytes
)
1782 tmpmode
= mode_for_size ((bytes
- i
* 8) * BITS_PER_UNIT
, MODE_INT
, 0);
1783 else if (class[i
] == X86_64_INTEGERSI_CLASS
)
1787 /* We've requested 24 bytes we don't have mode for. Use DImode. */
1788 if (tmpmode
== BLKmode
)
1790 exp
[nexps
++] = gen_rtx_EXPR_LIST (VOIDmode
,
1791 gen_rtx_REG (tmpmode
, *intreg
),
1795 case X86_64_SSESF_CLASS
:
1796 exp
[nexps
++] = gen_rtx_EXPR_LIST (VOIDmode
,
1797 gen_rtx_REG (SFmode
,
1798 SSE_REGNO (sse_regno
)),
1802 case X86_64_SSEDF_CLASS
:
1803 exp
[nexps
++] = gen_rtx_EXPR_LIST (VOIDmode
,
1804 gen_rtx_REG (DFmode
,
1805 SSE_REGNO (sse_regno
)),
1809 case X86_64_SSE_CLASS
:
1810 if (i
< n
&& class[i
+ 1] == X86_64_SSEUP_CLASS
)
1811 tmpmode
= TImode
, i
++;
1814 exp
[nexps
++] = gen_rtx_EXPR_LIST (VOIDmode
,
1815 gen_rtx_REG (tmpmode
,
1816 SSE_REGNO (sse_regno
)),
1824 ret
= gen_rtx_PARALLEL (mode
, rtvec_alloc (nexps
));
1825 for (i
= 0; i
< nexps
; i
++)
1826 XVECEXP (ret
, 0, i
) = exp
[i
];
1830 /* Update the data in CUM to advance over an argument
1831 of mode MODE and data type TYPE.
1832 (TYPE is null for libcalls where that information may not be available.) */
1835 function_arg_advance (cum
, mode
, type
, named
)
1836 CUMULATIVE_ARGS
*cum
; /* current arg information */
1837 enum machine_mode mode
; /* current arg mode */
1838 tree type
; /* type of the argument or 0 if lib support */
1839 int named
; /* whether or not the argument was named */
1842 (mode
== BLKmode
) ? int_size_in_bytes (type
) : (int) GET_MODE_SIZE (mode
);
1843 int words
= (bytes
+ UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
1845 if (TARGET_DEBUG_ARG
)
1847 "function_adv (sz=%d, wds=%2d, nregs=%d, mode=%s, named=%d)\n\n",
1848 words
, cum
->words
, cum
->nregs
, GET_MODE_NAME (mode
), named
);
1851 int int_nregs
, sse_nregs
;
1852 if (!examine_argument (mode
, type
, 0, &int_nregs
, &sse_nregs
))
1853 cum
->words
+= words
;
1854 else if (sse_nregs
<= cum
->sse_nregs
&& int_nregs
<= cum
->nregs
)
1856 cum
->nregs
-= int_nregs
;
1857 cum
->sse_nregs
-= sse_nregs
;
1858 cum
->regno
+= int_nregs
;
1859 cum
->sse_regno
+= sse_nregs
;
1862 cum
->words
+= words
;
1866 if (TARGET_SSE
&& mode
== TImode
)
1868 cum
->sse_words
+= words
;
1869 cum
->sse_nregs
-= 1;
1870 cum
->sse_regno
+= 1;
1871 if (cum
->sse_nregs
<= 0)
1879 cum
->words
+= words
;
1880 cum
->nregs
-= words
;
1881 cum
->regno
+= words
;
1883 if (cum
->nregs
<= 0)
1893 /* Define where to put the arguments to a function.
1894 Value is zero to push the argument on the stack,
1895 or a hard register in which to store the argument.
1897 MODE is the argument's machine mode.
1898 TYPE is the data type of the argument (as a tree).
1899 This is null for libcalls where that information may
1901 CUM is a variable of type CUMULATIVE_ARGS which gives info about
1902 the preceding args and about the function being called.
1903 NAMED is nonzero if this argument is a named parameter
1904 (otherwise it is an extra parameter matching an ellipsis). */
1907 function_arg (cum
, mode
, type
, named
)
1908 CUMULATIVE_ARGS
*cum
; /* current arg information */
1909 enum machine_mode mode
; /* current arg mode */
1910 tree type
; /* type of the argument or 0 if lib support */
1911 int named
; /* != 0 for normal args, == 0 for ... args */
1915 (mode
== BLKmode
) ? int_size_in_bytes (type
) : (int) GET_MODE_SIZE (mode
);
1916 int words
= (bytes
+ UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
1918 /* Handle an hidden AL argument containing number of registers for varargs
1919 x86-64 functions. For i386 ABI just return constm1_rtx to avoid
1921 if (mode
== VOIDmode
)
1924 return GEN_INT (cum
->maybe_vaarg
1925 ? (cum
->sse_nregs
< 0
1933 ret
= construct_container (mode
, type
, 0, cum
->nregs
, cum
->sse_nregs
,
1934 &x86_64_int_parameter_registers
[cum
->regno
],
1939 /* For now, pass fp/complex values on the stack. */
1948 if (words
<= cum
->nregs
)
1949 ret
= gen_rtx_REG (mode
, cum
->regno
);
1953 ret
= gen_rtx_REG (mode
, cum
->sse_regno
);
1957 if (TARGET_DEBUG_ARG
)
1960 "function_arg (size=%d, wds=%2d, nregs=%d, mode=%4s, named=%d",
1961 words
, cum
->words
, cum
->nregs
, GET_MODE_NAME (mode
), named
);
1964 fprintf (stderr
, ", reg=%%e%s", reg_names
[ REGNO(ret
) ]);
1966 fprintf (stderr
, ", stack");
1968 fprintf (stderr
, " )\n");
1974 /* Gives the alignment boundary, in bits, of an argument with the specified mode
1978 ix86_function_arg_boundary (mode
, type
)
1979 enum machine_mode mode
;
1984 return PARM_BOUNDARY
;
1986 align
= TYPE_ALIGN (type
);
1988 align
= GET_MODE_ALIGNMENT (mode
);
1989 if (align
< PARM_BOUNDARY
)
1990 align
= PARM_BOUNDARY
;
1996 /* Return true if N is a possible register number of function value. */
1998 ix86_function_value_regno_p (regno
)
2003 return ((regno
) == 0
2004 || ((regno
) == FIRST_FLOAT_REG
&& TARGET_FLOAT_RETURNS_IN_80387
)
2005 || ((regno
) == FIRST_SSE_REG
&& TARGET_SSE
));
2007 return ((regno
) == 0 || (regno
) == FIRST_FLOAT_REG
2008 || ((regno
) == FIRST_SSE_REG
&& TARGET_SSE
)
2009 || ((regno
) == FIRST_FLOAT_REG
&& TARGET_FLOAT_RETURNS_IN_80387
));
2012 /* Define how to find the value returned by a function.
2013 VALTYPE is the data type of the value (as a tree).
2014 If the precise function being called is known, FUNC is its FUNCTION_DECL;
2015 otherwise, FUNC is 0. */
2017 ix86_function_value (valtype
)
2022 rtx ret
= construct_container (TYPE_MODE (valtype
), valtype
, 1,
2023 REGPARM_MAX
, SSE_REGPARM_MAX
,
2024 x86_64_int_return_registers
, 0);
2025 /* For zero sized structures, construct_continer return NULL, but we need
2026 to keep rest of compiler happy by returning meaningfull value. */
2028 ret
= gen_rtx_REG (TYPE_MODE (valtype
), 0);
2032 return gen_rtx_REG (TYPE_MODE (valtype
), VALUE_REGNO (TYPE_MODE (valtype
)));
2035 /* Return false ifif type is returned in memory. */
2037 ix86_return_in_memory (type
)
2040 int needed_intregs
, needed_sseregs
;
2043 return !examine_argument (TYPE_MODE (type
), type
, 1,
2044 &needed_intregs
, &needed_sseregs
);
2048 if (TYPE_MODE (type
) == BLKmode
2049 || (VECTOR_MODE_P (TYPE_MODE (type
))
2050 && int_size_in_bytes (type
) == 8)
2051 || (int_size_in_bytes (type
) > 12 && TYPE_MODE (type
) != TImode
2052 && TYPE_MODE (type
) != TFmode
2053 && !VECTOR_MODE_P (TYPE_MODE (type
))))
2059 /* Define how to find the value returned by a library function
2060 assuming the value has mode MODE. */
2062 ix86_libcall_value (mode
)
2063 enum machine_mode mode
;
2073 return gen_rtx_REG (mode
, FIRST_SSE_REG
);
2076 return gen_rtx_REG (mode
, FIRST_FLOAT_REG
);
2078 return gen_rtx_REG (mode
, 0);
2082 return gen_rtx_REG (mode
, VALUE_REGNO (mode
));
2085 /* Create the va_list data type. */
2088 ix86_build_va_list ()
2090 tree f_gpr
, f_fpr
, f_ovf
, f_sav
, record
, type_decl
;
2092 /* For i386 we use plain pointer to argument area. */
2094 return build_pointer_type (char_type_node
);
2096 record
= make_lang_type (RECORD_TYPE
);
2097 type_decl
= build_decl (TYPE_DECL
, get_identifier ("__va_list_tag"), record
);
2099 f_gpr
= build_decl (FIELD_DECL
, get_identifier ("gp_offset"),
2100 unsigned_type_node
);
2101 f_fpr
= build_decl (FIELD_DECL
, get_identifier ("fp_offset"),
2102 unsigned_type_node
);
2103 f_ovf
= build_decl (FIELD_DECL
, get_identifier ("overflow_arg_area"),
2105 f_sav
= build_decl (FIELD_DECL
, get_identifier ("reg_save_area"),
2108 DECL_FIELD_CONTEXT (f_gpr
) = record
;
2109 DECL_FIELD_CONTEXT (f_fpr
) = record
;
2110 DECL_FIELD_CONTEXT (f_ovf
) = record
;
2111 DECL_FIELD_CONTEXT (f_sav
) = record
;
2113 TREE_CHAIN (record
) = type_decl
;
2114 TYPE_NAME (record
) = type_decl
;
2115 TYPE_FIELDS (record
) = f_gpr
;
2116 TREE_CHAIN (f_gpr
) = f_fpr
;
2117 TREE_CHAIN (f_fpr
) = f_ovf
;
2118 TREE_CHAIN (f_ovf
) = f_sav
;
2120 layout_type (record
);
2122 /* The correct type is an array type of one element. */
2123 return build_array_type (record
, build_index_type (size_zero_node
));
2126 /* Perform any needed actions needed for a function that is receiving a
2127 variable number of arguments.
2131 MODE and TYPE are the mode and type of the current parameter.
2133 PRETEND_SIZE is a variable that should be set to the amount of stack
2134 that must be pushed by the prolog to pretend that our caller pushed
2137 Normally, this macro will push all remaining incoming registers on the
2138 stack and set PRETEND_SIZE to the length of the registers pushed. */
2141 ix86_setup_incoming_varargs (cum
, mode
, type
, pretend_size
, no_rtl
)
2142 CUMULATIVE_ARGS
*cum
;
2143 enum machine_mode mode
;
2145 int *pretend_size ATTRIBUTE_UNUSED
;
2149 CUMULATIVE_ARGS next_cum
;
2150 rtx save_area
= NULL_RTX
, mem
;
2163 /* Indicate to allocate space on the stack for varargs save area. */
2164 ix86_save_varrargs_registers
= 1;
2166 fntype
= TREE_TYPE (current_function_decl
);
2167 stdarg_p
= (TYPE_ARG_TYPES (fntype
) != 0
2168 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype
)))
2169 != void_type_node
));
2171 /* For varargs, we do not want to skip the dummy va_dcl argument.
2172 For stdargs, we do want to skip the last named argument. */
2175 function_arg_advance (&next_cum
, mode
, type
, 1);
2178 save_area
= frame_pointer_rtx
;
2180 set
= get_varargs_alias_set ();
2182 for (i
= next_cum
.regno
; i
< ix86_regparm
; i
++)
2184 mem
= gen_rtx_MEM (Pmode
,
2185 plus_constant (save_area
, i
* UNITS_PER_WORD
));
2186 set_mem_alias_set (mem
, set
);
2187 emit_move_insn (mem
, gen_rtx_REG (Pmode
,
2188 x86_64_int_parameter_registers
[i
]));
2191 if (next_cum
.sse_nregs
)
2193 /* Now emit code to save SSE registers. The AX parameter contains number
2194 of SSE parameter regsiters used to call this function. We use
2195 sse_prologue_save insn template that produces computed jump across
2196 SSE saves. We need some preparation work to get this working. */
2198 label
= gen_label_rtx ();
2199 label_ref
= gen_rtx_LABEL_REF (Pmode
, label
);
2201 /* Compute address to jump to :
2202 label - 5*eax + nnamed_sse_arguments*5 */
2203 tmp_reg
= gen_reg_rtx (Pmode
);
2204 nsse_reg
= gen_reg_rtx (Pmode
);
2205 emit_insn (gen_zero_extendqidi2 (nsse_reg
, gen_rtx_REG (QImode
, 0)));
2206 emit_insn (gen_rtx_SET (VOIDmode
, tmp_reg
,
2207 gen_rtx_MULT (VOIDmode
, nsse_reg
,
2209 if (next_cum
.sse_regno
)
2212 gen_rtx_CONST (DImode
,
2213 gen_rtx_PLUS (DImode
,
2215 GEN_INT (next_cum
.sse_regno
* 4))));
2217 emit_move_insn (nsse_reg
, label_ref
);
2218 emit_insn (gen_subdi3 (nsse_reg
, nsse_reg
, tmp_reg
));
2220 /* Compute address of memory block we save into. We always use pointer
2221 pointing 127 bytes after first byte to store - this is needed to keep
2222 instruction size limited by 4 bytes. */
2223 tmp_reg
= gen_reg_rtx (Pmode
);
2224 emit_insn (gen_rtx_SET(VOIDmode
, tmp_reg
,
2225 plus_constant (save_area
, 8 * REGPARM_MAX
+ 127)));
2226 mem
= gen_rtx_MEM (BLKmode
, plus_constant (tmp_reg
, -127));
2227 set_mem_alias_set (mem
, set
);
2229 /* And finally do the dirty job! */
2230 emit_insn (gen_sse_prologue_save (mem
, nsse_reg
, GEN_INT (next_cum
.sse_regno
),
2236 /* Implement va_start. */
2239 ix86_va_start (stdarg_p
, valist
, nextarg
)
2244 HOST_WIDE_INT words
, n_gpr
, n_fpr
;
2245 tree f_gpr
, f_fpr
, f_ovf
, f_sav
;
2246 tree gpr
, fpr
, ovf
, sav
, t
;
2248 /* Only 64bit target needs something special. */
2251 std_expand_builtin_va_start (stdarg_p
, valist
, nextarg
);
2255 f_gpr
= TYPE_FIELDS (TREE_TYPE (va_list_type_node
));
2256 f_fpr
= TREE_CHAIN (f_gpr
);
2257 f_ovf
= TREE_CHAIN (f_fpr
);
2258 f_sav
= TREE_CHAIN (f_ovf
);
2260 valist
= build1 (INDIRECT_REF
, TREE_TYPE (TREE_TYPE (valist
)), valist
);
2261 gpr
= build (COMPONENT_REF
, TREE_TYPE (f_gpr
), valist
, f_gpr
);
2262 fpr
= build (COMPONENT_REF
, TREE_TYPE (f_fpr
), valist
, f_fpr
);
2263 ovf
= build (COMPONENT_REF
, TREE_TYPE (f_ovf
), valist
, f_ovf
);
2264 sav
= build (COMPONENT_REF
, TREE_TYPE (f_sav
), valist
, f_sav
);
2266 /* Count number of gp and fp argument registers used. */
2267 words
= current_function_args_info
.words
;
2268 n_gpr
= current_function_args_info
.regno
;
2269 n_fpr
= current_function_args_info
.sse_regno
;
2271 if (TARGET_DEBUG_ARG
)
2272 fprintf (stderr
, "va_start: words = %d, n_gpr = %d, n_fpr = %d\n",
2273 (int)words
, (int)n_gpr
, (int)n_fpr
);
2275 t
= build (MODIFY_EXPR
, TREE_TYPE (gpr
), gpr
,
2276 build_int_2 (n_gpr
* 8, 0));
2277 TREE_SIDE_EFFECTS (t
) = 1;
2278 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
2280 t
= build (MODIFY_EXPR
, TREE_TYPE (fpr
), fpr
,
2281 build_int_2 (n_fpr
* 16 + 8*REGPARM_MAX
, 0));
2282 TREE_SIDE_EFFECTS (t
) = 1;
2283 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
2285 /* Find the overflow area. */
2286 t
= make_tree (TREE_TYPE (ovf
), virtual_incoming_args_rtx
);
2288 t
= build (PLUS_EXPR
, TREE_TYPE (ovf
), t
,
2289 build_int_2 (words
* UNITS_PER_WORD
, 0));
2290 t
= build (MODIFY_EXPR
, TREE_TYPE (ovf
), ovf
, t
);
2291 TREE_SIDE_EFFECTS (t
) = 1;
2292 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
2294 /* Find the register save area.
2295 Prologue of the function save it right above stack frame. */
2296 t
= make_tree (TREE_TYPE (sav
), frame_pointer_rtx
);
2297 t
= build (MODIFY_EXPR
, TREE_TYPE (sav
), sav
, t
);
2298 TREE_SIDE_EFFECTS (t
) = 1;
2299 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
2302 /* Implement va_arg. */
2304 ix86_va_arg (valist
, type
)
2307 static int intreg
[6] = { 0, 1, 2, 3, 4, 5 };
2308 tree f_gpr
, f_fpr
, f_ovf
, f_sav
;
2309 tree gpr
, fpr
, ovf
, sav
, t
;
2310 int indirect_p
= 0, size
, rsize
;
2311 rtx lab_false
, lab_over
= NULL_RTX
;
2315 /* Only 64bit target needs something special. */
2318 return std_expand_builtin_va_arg (valist
, type
);
2321 f_gpr
= TYPE_FIELDS (TREE_TYPE (va_list_type_node
));
2322 f_fpr
= TREE_CHAIN (f_gpr
);
2323 f_ovf
= TREE_CHAIN (f_fpr
);
2324 f_sav
= TREE_CHAIN (f_ovf
);
2326 valist
= build1 (INDIRECT_REF
, TREE_TYPE (TREE_TYPE (valist
)), valist
);
2327 gpr
= build (COMPONENT_REF
, TREE_TYPE (f_gpr
), valist
, f_gpr
);
2328 fpr
= build (COMPONENT_REF
, TREE_TYPE (f_fpr
), valist
, f_fpr
);
2329 ovf
= build (COMPONENT_REF
, TREE_TYPE (f_ovf
), valist
, f_ovf
);
2330 sav
= build (COMPONENT_REF
, TREE_TYPE (f_sav
), valist
, f_sav
);
2332 size
= int_size_in_bytes (type
);
2333 rsize
= (size
+ UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
2335 container
= construct_container (TYPE_MODE (type
), type
, 0,
2336 REGPARM_MAX
, SSE_REGPARM_MAX
, intreg
, 0);
2338 * Pull the value out of the saved registers ...
2341 addr_rtx
= gen_reg_rtx (Pmode
);
2345 rtx int_addr_rtx
, sse_addr_rtx
;
2346 int needed_intregs
, needed_sseregs
;
2349 lab_over
= gen_label_rtx ();
2350 lab_false
= gen_label_rtx ();
2352 examine_argument (TYPE_MODE (type
), type
, 0,
2353 &needed_intregs
, &needed_sseregs
);
2356 need_temp
= ((needed_intregs
&& TYPE_ALIGN (type
) > 64)
2357 || TYPE_ALIGN (type
) > 128);
2359 /* In case we are passing structure, verify that it is consetuctive block
2360 on the register save area. If not we need to do moves. */
2361 if (!need_temp
&& !REG_P (container
))
2363 /* Verify that all registers are strictly consetuctive */
2364 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container
, 0, 0), 0))))
2368 for (i
= 0; i
< XVECLEN (container
, 0) && !need_temp
; i
++)
2370 rtx slot
= XVECEXP (container
, 0, i
);
2371 if (REGNO (XEXP (slot
, 0)) != FIRST_SSE_REG
+ (unsigned int)i
2372 || INTVAL (XEXP (slot
, 1)) != i
* 16)
2380 for (i
= 0; i
< XVECLEN (container
, 0) && !need_temp
; i
++)
2382 rtx slot
= XVECEXP (container
, 0, i
);
2383 if (REGNO (XEXP (slot
, 0)) != (unsigned int)i
2384 || INTVAL (XEXP (slot
, 1)) != i
* 8)
2391 int_addr_rtx
= addr_rtx
;
2392 sse_addr_rtx
= addr_rtx
;
2396 int_addr_rtx
= gen_reg_rtx (Pmode
);
2397 sse_addr_rtx
= gen_reg_rtx (Pmode
);
2399 /* First ensure that we fit completely in registers. */
2402 emit_cmp_and_jump_insns (expand_expr
2403 (gpr
, NULL_RTX
, SImode
, EXPAND_NORMAL
),
2404 GEN_INT ((REGPARM_MAX
- needed_intregs
+
2405 1) * 8), GE
, const1_rtx
, SImode
,
2410 emit_cmp_and_jump_insns (expand_expr
2411 (fpr
, NULL_RTX
, SImode
, EXPAND_NORMAL
),
2412 GEN_INT ((SSE_REGPARM_MAX
-
2413 needed_sseregs
+ 1) * 16 +
2414 REGPARM_MAX
* 8), GE
, const1_rtx
,
2415 SImode
, 1, 1, lab_false
);
2418 /* Compute index to start of area used for integer regs. */
2421 t
= build (PLUS_EXPR
, ptr_type_node
, sav
, gpr
);
2422 r
= expand_expr (t
, int_addr_rtx
, Pmode
, EXPAND_NORMAL
);
2423 if (r
!= int_addr_rtx
)
2424 emit_move_insn (int_addr_rtx
, r
);
2428 t
= build (PLUS_EXPR
, ptr_type_node
, sav
, fpr
);
2429 r
= expand_expr (t
, sse_addr_rtx
, Pmode
, EXPAND_NORMAL
);
2430 if (r
!= sse_addr_rtx
)
2431 emit_move_insn (sse_addr_rtx
, r
);
2438 mem
= assign_temp (type
, 0, 1, 0);
2439 set_mem_alias_set (mem
, get_varargs_alias_set ());
2440 addr_rtx
= XEXP (mem
, 0);
2441 for (i
= 0; i
< XVECLEN (container
, 0); i
++)
2443 rtx slot
= XVECEXP (container
, 0, i
);
2444 rtx reg
= XEXP (slot
, 0);
2445 enum machine_mode mode
= GET_MODE (reg
);
2451 if (SSE_REGNO_P (REGNO (reg
)))
2453 src_addr
= sse_addr_rtx
;
2454 src_offset
= (REGNO (reg
) - FIRST_SSE_REG
) * 16;
2458 src_addr
= int_addr_rtx
;
2459 src_offset
= REGNO (reg
) * 8;
2461 src_mem
= gen_rtx_MEM (mode
, src_addr
);
2462 set_mem_alias_set (src_mem
, get_varargs_alias_set ());
2463 src_mem
= adjust_address (src_mem
, mode
, src_offset
);
2464 dest_mem
= adjust_address (mem
, mode
, INTVAL (XEXP (slot
, 1)));
2465 PUT_MODE (dest_mem
, mode
);
2466 /* ??? Break out TImode moves from integer registers? */
2467 emit_move_insn (dest_mem
, src_mem
);
2474 build (PLUS_EXPR
, TREE_TYPE (gpr
), gpr
,
2475 build_int_2 (needed_intregs
* 8, 0));
2476 t
= build (MODIFY_EXPR
, TREE_TYPE (gpr
), gpr
, t
);
2477 TREE_SIDE_EFFECTS (t
) = 1;
2478 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
2483 build (PLUS_EXPR
, TREE_TYPE (fpr
), fpr
,
2484 build_int_2 (needed_sseregs
* 16, 0));
2485 t
= build (MODIFY_EXPR
, TREE_TYPE (fpr
), fpr
, t
);
2486 TREE_SIDE_EFFECTS (t
) = 1;
2487 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
2490 emit_jump_insn (gen_jump (lab_over
));
2492 emit_label (lab_false
);
2495 /* ... otherwise out of the overflow area. */
2497 /* Care for on-stack alignment if needed. */
2498 if (FUNCTION_ARG_BOUNDARY (VOIDmode
, type
) <= 64)
2502 HOST_WIDE_INT align
= FUNCTION_ARG_BOUNDARY (VOIDmode
, type
) / 8;
2503 t
= build (PLUS_EXPR
, TREE_TYPE (ovf
), ovf
, build_int_2 (align
- 1, 0));
2504 t
= build (BIT_AND_EXPR
, TREE_TYPE (t
), t
, build_int_2 (-align
, -1));
2508 r
= expand_expr (t
, addr_rtx
, Pmode
, EXPAND_NORMAL
);
2510 emit_move_insn (addr_rtx
, r
);
2513 build (PLUS_EXPR
, TREE_TYPE (t
), t
,
2514 build_int_2 (rsize
* UNITS_PER_WORD
, 0));
2515 t
= build (MODIFY_EXPR
, TREE_TYPE (ovf
), ovf
, t
);
2516 TREE_SIDE_EFFECTS (t
) = 1;
2517 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
2520 emit_label (lab_over
);
2525 r
= gen_rtx_MEM (Pmode
, addr_rtx
);
2526 set_mem_alias_set (r
, get_varargs_alias_set ());
2527 emit_move_insn (addr_rtx
, r
);
2533 /* Return nonzero if OP is general operand representable on x86_64. */
2536 x86_64_general_operand (op
, mode
)
2538 enum machine_mode mode
;
2541 return general_operand (op
, mode
);
2542 if (nonimmediate_operand (op
, mode
))
2544 return x86_64_sign_extended_value (op
);
2547 /* Return nonzero if OP is general operand representable on x86_64
2548 as eighter sign extended or zero extended constant. */
2551 x86_64_szext_general_operand (op
, mode
)
2553 enum machine_mode mode
;
2556 return general_operand (op
, mode
);
2557 if (nonimmediate_operand (op
, mode
))
2559 return x86_64_sign_extended_value (op
) || x86_64_zero_extended_value (op
);
2562 /* Return nonzero if OP is nonmemory operand representable on x86_64. */
2565 x86_64_nonmemory_operand (op
, mode
)
2567 enum machine_mode mode
;
2570 return nonmemory_operand (op
, mode
);
2571 if (register_operand (op
, mode
))
2573 return x86_64_sign_extended_value (op
);
2576 /* Return nonzero if OP is nonmemory operand acceptable by movabs patterns. */
2579 x86_64_movabs_operand (op
, mode
)
2581 enum machine_mode mode
;
2583 if (!TARGET_64BIT
|| !flag_pic
)
2584 return nonmemory_operand (op
, mode
);
2585 if (register_operand (op
, mode
) || x86_64_sign_extended_value (op
))
2587 if (CONSTANT_P (op
) && !symbolic_reference_mentioned_p (op
))
2592 /* Return nonzero if OP is nonmemory operand representable on x86_64. */
2595 x86_64_szext_nonmemory_operand (op
, mode
)
2597 enum machine_mode mode
;
2600 return nonmemory_operand (op
, mode
);
2601 if (register_operand (op
, mode
))
2603 return x86_64_sign_extended_value (op
) || x86_64_zero_extended_value (op
);
2606 /* Return nonzero if OP is immediate operand representable on x86_64. */
2609 x86_64_immediate_operand (op
, mode
)
2611 enum machine_mode mode
;
2614 return immediate_operand (op
, mode
);
2615 return x86_64_sign_extended_value (op
);
2618 /* Return nonzero if OP is immediate operand representable on x86_64. */
2621 x86_64_zext_immediate_operand (op
, mode
)
2623 enum machine_mode mode ATTRIBUTE_UNUSED
;
2625 return x86_64_zero_extended_value (op
);
2628 /* Return nonzero if OP is (const_int 1), else return zero. */
2631 const_int_1_operand (op
, mode
)
2633 enum machine_mode mode ATTRIBUTE_UNUSED
;
2635 return (GET_CODE (op
) == CONST_INT
&& INTVAL (op
) == 1);
2638 /* Returns 1 if OP is either a symbol reference or a sum of a symbol
2639 reference and a constant. */
2642 symbolic_operand (op
, mode
)
2644 enum machine_mode mode ATTRIBUTE_UNUSED
;
2646 switch (GET_CODE (op
))
2654 if (GET_CODE (op
) == SYMBOL_REF
2655 || GET_CODE (op
) == LABEL_REF
2656 || (GET_CODE (op
) == UNSPEC
2657 && (XINT (op
, 1) == 6
2658 || XINT (op
, 1) == 7
2659 || XINT (op
, 1) == 15)))
2661 if (GET_CODE (op
) != PLUS
2662 || GET_CODE (XEXP (op
, 1)) != CONST_INT
)
2666 if (GET_CODE (op
) == SYMBOL_REF
2667 || GET_CODE (op
) == LABEL_REF
)
2669 /* Only @GOTOFF gets offsets. */
2670 if (GET_CODE (op
) != UNSPEC
2671 || XINT (op
, 1) != 7)
2674 op
= XVECEXP (op
, 0, 0);
2675 if (GET_CODE (op
) == SYMBOL_REF
2676 || GET_CODE (op
) == LABEL_REF
)
2685 /* Return true if the operand contains a @GOT or @GOTOFF reference. */
2688 pic_symbolic_operand (op
, mode
)
2690 enum machine_mode mode ATTRIBUTE_UNUSED
;
2692 if (GET_CODE (op
) != CONST
)
2697 if (GET_CODE (XEXP (op
, 0)) == UNSPEC
)
2702 if (GET_CODE (op
) == UNSPEC
)
2704 if (GET_CODE (op
) != PLUS
2705 || GET_CODE (XEXP (op
, 1)) != CONST_INT
)
2708 if (GET_CODE (op
) == UNSPEC
)
2714 /* Return true if OP is a symbolic operand that resolves locally. */
2717 local_symbolic_operand (op
, mode
)
2719 enum machine_mode mode ATTRIBUTE_UNUSED
;
2721 if (GET_CODE (op
) == LABEL_REF
)
2724 if (GET_CODE (op
) == CONST
2725 && GET_CODE (XEXP (op
, 0)) == PLUS
2726 && GET_CODE (XEXP (XEXP (op
, 0), 1)) == CONST_INT
)
2727 op
= XEXP (XEXP (op
, 0), 0);
2729 if (GET_CODE (op
) != SYMBOL_REF
)
2732 /* These we've been told are local by varasm and encode_section_info
2734 if (CONSTANT_POOL_ADDRESS_P (op
) || SYMBOL_REF_FLAG (op
))
2737 /* There is, however, a not insubstantial body of code in the rest of
2738 the compiler that assumes it can just stick the results of
2739 ASM_GENERATE_INTERNAL_LABEL in a symbol_ref and have done. */
2740 /* ??? This is a hack. Should update the body of the compiler to
2741 always create a DECL an invoke ENCODE_SECTION_INFO. */
2742 if (strncmp (XSTR (op
, 0), internal_label_prefix
,
2743 internal_label_prefix_len
) == 0)
2749 /* Test for a valid operand for a call instruction. Don't allow the
2750 arg pointer register or virtual regs since they may decay into
2751 reg + const, which the patterns can't handle. */
2754 call_insn_operand (op
, mode
)
2756 enum machine_mode mode ATTRIBUTE_UNUSED
;
2758 /* Disallow indirect through a virtual register. This leads to
2759 compiler aborts when trying to eliminate them. */
2760 if (GET_CODE (op
) == REG
2761 && (op
== arg_pointer_rtx
2762 || op
== frame_pointer_rtx
2763 || (REGNO (op
) >= FIRST_PSEUDO_REGISTER
2764 && REGNO (op
) <= LAST_VIRTUAL_REGISTER
)))
2767 /* Disallow `call 1234'. Due to varying assembler lameness this
2768 gets either rejected or translated to `call .+1234'. */
2769 if (GET_CODE (op
) == CONST_INT
)
2772 /* Explicitly allow SYMBOL_REF even if pic. */
2773 if (GET_CODE (op
) == SYMBOL_REF
)
2776 /* Half-pic doesn't allow anything but registers and constants.
2777 We've just taken care of the later. */
2779 return register_operand (op
, Pmode
);
2781 /* Otherwise we can allow any general_operand in the address. */
2782 return general_operand (op
, Pmode
);
2786 constant_call_address_operand (op
, mode
)
2788 enum machine_mode mode ATTRIBUTE_UNUSED
;
2790 if (GET_CODE (op
) == CONST
2791 && GET_CODE (XEXP (op
, 0)) == PLUS
2792 && GET_CODE (XEXP (XEXP (op
, 0), 1)) == CONST_INT
)
2793 op
= XEXP (XEXP (op
, 0), 0);
2794 return GET_CODE (op
) == SYMBOL_REF
;
2797 /* Match exactly zero and one. */
2800 const0_operand (op
, mode
)
2802 enum machine_mode mode
;
2804 return op
== CONST0_RTX (mode
);
2808 const1_operand (op
, mode
)
2810 enum machine_mode mode ATTRIBUTE_UNUSED
;
2812 return op
== const1_rtx
;
2815 /* Match 2, 4, or 8. Used for leal multiplicands. */
2818 const248_operand (op
, mode
)
2820 enum machine_mode mode ATTRIBUTE_UNUSED
;
2822 return (GET_CODE (op
) == CONST_INT
2823 && (INTVAL (op
) == 2 || INTVAL (op
) == 4 || INTVAL (op
) == 8));
2826 /* True if this is a constant appropriate for an increment or decremenmt. */
2829 incdec_operand (op
, mode
)
2831 enum machine_mode mode ATTRIBUTE_UNUSED
;
2833 /* On Pentium4, the inc and dec operations causes extra dependancy on flag
2834 registers, since carry flag is not set. */
2835 if (TARGET_PENTIUM4
&& !optimize_size
)
2837 return op
== const1_rtx
|| op
== constm1_rtx
;
2840 /* Return nonzero if OP is acceptable as operand of DImode shift
2844 shiftdi_operand (op
, mode
)
2846 enum machine_mode mode ATTRIBUTE_UNUSED
;
2849 return nonimmediate_operand (op
, mode
);
2851 return register_operand (op
, mode
);
2854 /* Return false if this is the stack pointer, or any other fake
2855 register eliminable to the stack pointer. Otherwise, this is
2858 This is used to prevent esp from being used as an index reg.
2859 Which would only happen in pathological cases. */
2862 reg_no_sp_operand (op
, mode
)
2864 enum machine_mode mode
;
2867 if (GET_CODE (t
) == SUBREG
)
2869 if (t
== stack_pointer_rtx
|| t
== arg_pointer_rtx
|| t
== frame_pointer_rtx
)
2872 return register_operand (op
, mode
);
2876 mmx_reg_operand (op
, mode
)
2878 enum machine_mode mode ATTRIBUTE_UNUSED
;
2880 return MMX_REG_P (op
);
2883 /* Return false if this is any eliminable register. Otherwise
2887 general_no_elim_operand (op
, mode
)
2889 enum machine_mode mode
;
2892 if (GET_CODE (t
) == SUBREG
)
2894 if (t
== arg_pointer_rtx
|| t
== frame_pointer_rtx
2895 || t
== virtual_incoming_args_rtx
|| t
== virtual_stack_vars_rtx
2896 || t
== virtual_stack_dynamic_rtx
)
2899 && REGNO (t
) >= FIRST_VIRTUAL_REGISTER
2900 && REGNO (t
) <= LAST_VIRTUAL_REGISTER
)
2903 return general_operand (op
, mode
);
2906 /* Return false if this is any eliminable register. Otherwise
2907 register_operand or const_int. */
2910 nonmemory_no_elim_operand (op
, mode
)
2912 enum machine_mode mode
;
2915 if (GET_CODE (t
) == SUBREG
)
2917 if (t
== arg_pointer_rtx
|| t
== frame_pointer_rtx
2918 || t
== virtual_incoming_args_rtx
|| t
== virtual_stack_vars_rtx
2919 || t
== virtual_stack_dynamic_rtx
)
2922 return GET_CODE (op
) == CONST_INT
|| register_operand (op
, mode
);
2925 /* Return true if op is a Q_REGS class register. */
2928 q_regs_operand (op
, mode
)
2930 enum machine_mode mode
;
2932 if (mode
!= VOIDmode
&& GET_MODE (op
) != mode
)
2934 if (GET_CODE (op
) == SUBREG
)
2935 op
= SUBREG_REG (op
);
2936 return QI_REG_P (op
);
2939 /* Return true if op is a NON_Q_REGS class register. */
2942 non_q_regs_operand (op
, mode
)
2944 enum machine_mode mode
;
2946 if (mode
!= VOIDmode
&& GET_MODE (op
) != mode
)
2948 if (GET_CODE (op
) == SUBREG
)
2949 op
= SUBREG_REG (op
);
2950 return NON_QI_REG_P (op
);
2953 /* Return 1 if OP is a comparison that can be used in the CMPSS/CMPPS
2956 sse_comparison_operator (op
, mode
)
2958 enum machine_mode mode ATTRIBUTE_UNUSED
;
2960 enum rtx_code code
= GET_CODE (op
);
2963 /* Operations supported directly. */
2973 /* These are equivalent to ones above in non-IEEE comparisons. */
2980 return !TARGET_IEEE_FP
;
2985 /* Return 1 if OP is a valid comparison operator in valid mode. */
2987 ix86_comparison_operator (op
, mode
)
2989 enum machine_mode mode
;
2991 enum machine_mode inmode
;
2992 enum rtx_code code
= GET_CODE (op
);
2993 if (mode
!= VOIDmode
&& GET_MODE (op
) != mode
)
2995 if (GET_RTX_CLASS (code
) != '<')
2997 inmode
= GET_MODE (XEXP (op
, 0));
2999 if (inmode
== CCFPmode
|| inmode
== CCFPUmode
)
3001 enum rtx_code second_code
, bypass_code
;
3002 ix86_fp_comparison_codes (code
, &bypass_code
, &code
, &second_code
);
3003 return (bypass_code
== NIL
&& second_code
== NIL
);
3010 if (inmode
== CCmode
|| inmode
== CCGCmode
3011 || inmode
== CCGOCmode
|| inmode
== CCNOmode
)
3014 case LTU
: case GTU
: case LEU
: case ORDERED
: case UNORDERED
: case GEU
:
3015 if (inmode
== CCmode
)
3019 if (inmode
== CCmode
|| inmode
== CCGCmode
|| inmode
== CCNOmode
)
3027 /* Return 1 if OP is a comparison operator that can be issued by fcmov. */
3030 fcmov_comparison_operator (op
, mode
)
3032 enum machine_mode mode
;
3034 enum machine_mode inmode
;
3035 enum rtx_code code
= GET_CODE (op
);
3036 if (mode
!= VOIDmode
&& GET_MODE (op
) != mode
)
3038 if (GET_RTX_CLASS (code
) != '<')
3040 inmode
= GET_MODE (XEXP (op
, 0));
3041 if (inmode
== CCFPmode
|| inmode
== CCFPUmode
)
3043 enum rtx_code second_code
, bypass_code
;
3044 ix86_fp_comparison_codes (code
, &bypass_code
, &code
, &second_code
);
3045 if (bypass_code
!= NIL
|| second_code
!= NIL
)
3047 code
= ix86_fp_compare_code_to_integer (code
);
3049 /* i387 supports just limited amount of conditional codes. */
3052 case LTU
: case GTU
: case LEU
: case GEU
:
3053 if (inmode
== CCmode
|| inmode
== CCFPmode
|| inmode
== CCFPUmode
)
3056 case ORDERED
: case UNORDERED
:
3064 /* Return 1 if OP is a binary operator that can be promoted to wider mode. */
3067 promotable_binary_operator (op
, mode
)
3069 enum machine_mode mode ATTRIBUTE_UNUSED
;
3071 switch (GET_CODE (op
))
3074 /* Modern CPUs have same latency for HImode and SImode multiply,
3075 but 386 and 486 do HImode multiply faster. */
3076 return ix86_cpu
> PROCESSOR_I486
;
3088 /* Nearly general operand, but accept any const_double, since we wish
3089 to be able to drop them into memory rather than have them get pulled
3093 cmp_fp_expander_operand (op
, mode
)
3095 enum machine_mode mode
;
3097 if (mode
!= VOIDmode
&& mode
!= GET_MODE (op
))
3099 if (GET_CODE (op
) == CONST_DOUBLE
)
3101 return general_operand (op
, mode
);
3104 /* Match an SI or HImode register for a zero_extract. */
3107 ext_register_operand (op
, mode
)
3109 enum machine_mode mode ATTRIBUTE_UNUSED
;
3112 if ((!TARGET_64BIT
|| GET_MODE (op
) != DImode
)
3113 && GET_MODE (op
) != SImode
&& GET_MODE (op
) != HImode
)
3116 if (!register_operand (op
, VOIDmode
))
3119 /* Be curefull to accept only registers having upper parts. */
3120 regno
= REG_P (op
) ? REGNO (op
) : REGNO (SUBREG_REG (op
));
3121 return (regno
> LAST_VIRTUAL_REGISTER
|| regno
< 4);
3124 /* Return 1 if this is a valid binary floating-point operation.
3125 OP is the expression matched, and MODE is its mode. */
3128 binary_fp_operator (op
, mode
)
3130 enum machine_mode mode
;
3132 if (mode
!= VOIDmode
&& mode
!= GET_MODE (op
))
3135 switch (GET_CODE (op
))
3141 return GET_MODE_CLASS (GET_MODE (op
)) == MODE_FLOAT
;
3149 mult_operator(op
, mode
)
3151 enum machine_mode mode ATTRIBUTE_UNUSED
;
3153 return GET_CODE (op
) == MULT
;
3157 div_operator(op
, mode
)
3159 enum machine_mode mode ATTRIBUTE_UNUSED
;
3161 return GET_CODE (op
) == DIV
;
3165 arith_or_logical_operator (op
, mode
)
3167 enum machine_mode mode
;
3169 return ((mode
== VOIDmode
|| GET_MODE (op
) == mode
)
3170 && (GET_RTX_CLASS (GET_CODE (op
)) == 'c'
3171 || GET_RTX_CLASS (GET_CODE (op
)) == '2'));
3174 /* Returns 1 if OP is memory operand with a displacement. */
3177 memory_displacement_operand (op
, mode
)
3179 enum machine_mode mode
;
3181 struct ix86_address parts
;
3183 if (! memory_operand (op
, mode
))
3186 if (! ix86_decompose_address (XEXP (op
, 0), &parts
))
3189 return parts
.disp
!= NULL_RTX
;
3192 /* To avoid problems when jump re-emits comparisons like testqi_ext_ccno_0,
3193 re-recognize the operand to avoid a copy_to_mode_reg that will fail.
3195 ??? It seems likely that this will only work because cmpsi is an
3196 expander, and no actual insns use this. */
3199 cmpsi_operand (op
, mode
)
3201 enum machine_mode mode
;
3203 if (nonimmediate_operand (op
, mode
))
3206 if (GET_CODE (op
) == AND
3207 && GET_MODE (op
) == SImode
3208 && GET_CODE (XEXP (op
, 0)) == ZERO_EXTRACT
3209 && GET_CODE (XEXP (XEXP (op
, 0), 1)) == CONST_INT
3210 && GET_CODE (XEXP (XEXP (op
, 0), 2)) == CONST_INT
3211 && INTVAL (XEXP (XEXP (op
, 0), 1)) == 8
3212 && INTVAL (XEXP (XEXP (op
, 0), 2)) == 8
3213 && GET_CODE (XEXP (op
, 1)) == CONST_INT
)
3219 /* Returns 1 if OP is memory operand that can not be represented by the
3223 long_memory_operand (op
, mode
)
3225 enum machine_mode mode
;
3227 if (! memory_operand (op
, mode
))
3230 return memory_address_length (op
) != 0;
3233 /* Return nonzero if the rtx is known aligned. */
3236 aligned_operand (op
, mode
)
3238 enum machine_mode mode
;
3240 struct ix86_address parts
;
3242 if (!general_operand (op
, mode
))
3245 /* Registers and immediate operands are always "aligned". */
3246 if (GET_CODE (op
) != MEM
)
3249 /* Don't even try to do any aligned optimizations with volatiles. */
3250 if (MEM_VOLATILE_P (op
))
3255 /* Pushes and pops are only valid on the stack pointer. */
3256 if (GET_CODE (op
) == PRE_DEC
3257 || GET_CODE (op
) == POST_INC
)
3260 /* Decode the address. */
3261 if (! ix86_decompose_address (op
, &parts
))
3264 /* Look for some component that isn't known to be aligned. */
3268 && REGNO_POINTER_ALIGN (REGNO (parts
.index
)) < 32)
3273 if (REGNO_POINTER_ALIGN (REGNO (parts
.base
)) < 32)
3278 if (GET_CODE (parts
.disp
) != CONST_INT
3279 || (INTVAL (parts
.disp
) & 3) != 0)
3283 /* Didn't find one -- this must be an aligned address. */
3287 /* Return true if the constant is something that can be loaded with
3288 a special instruction. Only handle 0.0 and 1.0; others are less
3292 standard_80387_constant_p (x
)
3295 if (GET_CODE (x
) != CONST_DOUBLE
|| !FLOAT_MODE_P (GET_MODE (x
)))
3297 /* Note that on the 80387, other constants, such as pi, that we should support
3298 too. On some machines, these are much slower to load as standard constant,
3299 than to load from doubles in memory. */
3300 if (x
== CONST0_RTX (GET_MODE (x
)))
3302 if (x
== CONST1_RTX (GET_MODE (x
)))
3307 /* Return 1 if X is FP constant we can load to SSE register w/o using memory.
3310 standard_sse_constant_p (x
)
3313 if (GET_CODE (x
) != CONST_DOUBLE
)
3315 return (x
== CONST0_RTX (GET_MODE (x
)));
3318 /* Returns 1 if OP contains a symbol reference */
3321 symbolic_reference_mentioned_p (op
)
3324 register const char *fmt
;
3327 if (GET_CODE (op
) == SYMBOL_REF
|| GET_CODE (op
) == LABEL_REF
)
3330 fmt
= GET_RTX_FORMAT (GET_CODE (op
));
3331 for (i
= GET_RTX_LENGTH (GET_CODE (op
)) - 1; i
>= 0; i
--)
3337 for (j
= XVECLEN (op
, i
) - 1; j
>= 0; j
--)
3338 if (symbolic_reference_mentioned_p (XVECEXP (op
, i
, j
)))
3342 else if (fmt
[i
] == 'e' && symbolic_reference_mentioned_p (XEXP (op
, i
)))
3349 /* Return 1 if it is appropriate to emit `ret' instructions in the
3350 body of a function. Do this only if the epilogue is simple, needing a
3351 couple of insns. Prior to reloading, we can't tell how many registers
3352 must be saved, so return 0 then. Return 0 if there is no frame
3353 marker to de-allocate.
3355 If NON_SAVING_SETJMP is defined and true, then it is not possible
3356 for the epilogue to be simple, so return 0. This is a special case
3357 since NON_SAVING_SETJMP will not cause regs_ever_live to change
3358 until final, but jump_optimize may need to know sooner if a
3362 ix86_can_use_return_insn_p ()
3364 struct ix86_frame frame
;
3366 #ifdef NON_SAVING_SETJMP
3367 if (NON_SAVING_SETJMP
&& current_function_calls_setjmp
)
3370 #ifdef FUNCTION_BLOCK_PROFILER_EXIT
3371 if (profile_block_flag
== 2)
3375 if (! reload_completed
|| frame_pointer_needed
)
3378 /* Don't allow more than 32 pop, since that's all we can do
3379 with one instruction. */
3380 if (current_function_pops_args
3381 && current_function_args_size
>= 32768)
3384 ix86_compute_frame_layout (&frame
);
3385 return frame
.to_allocate
== 0 && frame
.nregs
== 0;
3388 /* Return 1 if VALUE can be stored in the sign extended immediate field. */
3390 x86_64_sign_extended_value (value
)
3393 switch (GET_CODE (value
))
3395 /* CONST_DOUBLES never match, since HOST_BITS_PER_WIDE_INT is known
3396 to be at least 32 and this all acceptable constants are
3397 represented as CONST_INT. */
3399 if (HOST_BITS_PER_WIDE_INT
== 32)
3403 HOST_WIDE_INT val
= trunc_int_for_mode (INTVAL (value
), DImode
);
3404 return trunc_int_for_mode (val
, SImode
) == val
;
3408 /* For certain code models, the symbolic references are known to fit. */
3410 return ix86_cmodel
== CM_SMALL
|| ix86_cmodel
== CM_KERNEL
;
3412 /* For certain code models, the code is near as well. */
3414 return ix86_cmodel
!= CM_LARGE
&& ix86_cmodel
!= CM_SMALL_PIC
;
3416 /* We also may accept the offsetted memory references in certain special
3419 if (GET_CODE (XEXP (value
, 0)) == UNSPEC
3420 && XVECLEN (XEXP (value
, 0), 0) == 1
3421 && XINT (XEXP (value
, 0), 1) == 15)
3423 else if (GET_CODE (XEXP (value
, 0)) == PLUS
)
3425 rtx op1
= XEXP (XEXP (value
, 0), 0);
3426 rtx op2
= XEXP (XEXP (value
, 0), 1);
3427 HOST_WIDE_INT offset
;
3429 if (ix86_cmodel
== CM_LARGE
)
3431 if (GET_CODE (op2
) != CONST_INT
)
3433 offset
= trunc_int_for_mode (INTVAL (op2
), DImode
);
3434 switch (GET_CODE (op1
))
3437 /* For CM_SMALL assume that latest object is 1MB before
3438 end of 31bits boundary. We may also accept pretty
3439 large negative constants knowing that all objects are
3440 in the positive half of address space. */
3441 if (ix86_cmodel
== CM_SMALL
3442 && offset
< 1024*1024*1024
3443 && trunc_int_for_mode (offset
, SImode
) == offset
)
3445 /* For CM_KERNEL we know that all object resist in the
3446 negative half of 32bits address space. We may not
3447 accept negative offsets, since they may be just off
3448 and we may accept pretty large possitive ones. */
3449 if (ix86_cmodel
== CM_KERNEL
3451 && trunc_int_for_mode (offset
, SImode
) == offset
)
3455 /* These conditions are similar to SYMBOL_REF ones, just the
3456 constraints for code models differ. */
3457 if ((ix86_cmodel
== CM_SMALL
|| ix86_cmodel
== CM_MEDIUM
)
3458 && offset
< 1024*1024*1024
3459 && trunc_int_for_mode (offset
, SImode
) == offset
)
3461 if (ix86_cmodel
== CM_KERNEL
3463 && trunc_int_for_mode (offset
, SImode
) == offset
)
3476 /* Return 1 if VALUE can be stored in the zero extended immediate field. */
3478 x86_64_zero_extended_value (value
)
3481 switch (GET_CODE (value
))
3484 if (HOST_BITS_PER_WIDE_INT
== 32)
3485 return (GET_MODE (value
) == VOIDmode
3486 && !CONST_DOUBLE_HIGH (value
));
3490 if (HOST_BITS_PER_WIDE_INT
== 32)
3491 return INTVAL (value
) >= 0;
3493 return !(INTVAL (value
) & ~(HOST_WIDE_INT
)0xffffffff);
3496 /* For certain code models, the symbolic references are known to fit. */
3498 return ix86_cmodel
== CM_SMALL
;
3500 /* For certain code models, the code is near as well. */
3502 return ix86_cmodel
== CM_SMALL
|| ix86_cmodel
== CM_MEDIUM
;
3504 /* We also may accept the offsetted memory references in certain special
3507 if (GET_CODE (XEXP (value
, 0)) == PLUS
)
3509 rtx op1
= XEXP (XEXP (value
, 0), 0);
3510 rtx op2
= XEXP (XEXP (value
, 0), 1);
3512 if (ix86_cmodel
== CM_LARGE
)
3514 switch (GET_CODE (op1
))
3518 /* For small code model we may accept pretty large possitive
3519 offsets, since one bit is available for free. Negative
3520 offsets are limited by the size of NULL pointer area
3521 specified by the ABI. */
3522 if (ix86_cmodel
== CM_SMALL
3523 && GET_CODE (op2
) == CONST_INT
3524 && trunc_int_for_mode (INTVAL (op2
), DImode
) > -0x10000
3525 && (trunc_int_for_mode (INTVAL (op2
), SImode
)
3528 /* ??? For the kernel, we may accept adjustment of
3529 -0x10000000, since we know that it will just convert
3530 negative address space to possitive, but perhaps this
3531 is not worthwhile. */
3534 /* These conditions are similar to SYMBOL_REF ones, just the
3535 constraints for code models differ. */
3536 if ((ix86_cmodel
== CM_SMALL
|| ix86_cmodel
== CM_MEDIUM
)
3537 && GET_CODE (op2
) == CONST_INT
3538 && trunc_int_for_mode (INTVAL (op2
), DImode
) > -0x10000
3539 && (trunc_int_for_mode (INTVAL (op2
), SImode
)
3553 /* Value should be nonzero if functions must have frame pointers.
3554 Zero means the frame pointer need not be set up (and parms may
3555 be accessed via the stack pointer) in functions that seem suitable. */
3558 ix86_frame_pointer_required ()
3560 /* If we accessed previous frames, then the generated code expects
3561 to be able to access the saved ebp value in our frame. */
3562 if (cfun
->machine
->accesses_prev_frame
)
3565 /* Several x86 os'es need a frame pointer for other reasons,
3566 usually pertaining to setjmp. */
3567 if (SUBTARGET_FRAME_POINTER_REQUIRED
)
3570 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
3571 the frame pointer by default. Turn it back on now if we've not
3572 got a leaf function. */
3573 if (TARGET_OMIT_LEAF_FRAME_POINTER
&& ! leaf_function_p ())
3579 /* Record that the current function accesses previous call frames. */
3582 ix86_setup_frame_addresses ()
3584 cfun
->machine
->accesses_prev_frame
= 1;
3587 static char pic_label_name
[32];
3589 /* This function generates code for -fpic that loads %ebx with
3590 the return address of the caller and then returns. */
3593 ix86_asm_file_end (file
)
3598 if (! TARGET_DEEP_BRANCH_PREDICTION
|| pic_label_name
[0] == 0)
3601 /* ??? Binutils 2.10 and earlier has a linkonce elimination bug related
3602 to updating relocations to a section being discarded such that this
3603 doesn't work. Ought to detect this at configure time. */
3605 /* The trick here is to create a linkonce section containing the
3606 pic label thunk, but to refer to it with an internal label.
3607 Because the label is internal, we don't have inter-dso name
3608 binding issues on hosts that don't support ".hidden".
3610 In order to use these macros, however, we must create a fake
3612 if (targetm
.have_named_sections
)
3614 tree decl
= build_decl (FUNCTION_DECL
,
3615 get_identifier ("i686.get_pc_thunk"),
3617 DECL_ONE_ONLY (decl
) = 1;
3618 UNIQUE_SECTION (decl
, 0);
3619 named_section (decl
, NULL
);
3626 /* This used to call ASM_DECLARE_FUNCTION_NAME() but since it's an
3627 internal (non-global) label that's being emitted, it didn't make
3628 sense to have .type information for local labels. This caused
3629 the SCO OpenServer 5.0.4 ELF assembler grief (why are you giving
3630 me debug info for a label that you're declaring non-global?) this
3631 was changed to call ASM_OUTPUT_LABEL() instead. */
3633 ASM_OUTPUT_LABEL (file
, pic_label_name
);
3635 xops
[0] = pic_offset_table_rtx
;
3636 xops
[1] = gen_rtx_MEM (SImode
, stack_pointer_rtx
);
3637 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops
);
3638 output_asm_insn ("ret", xops
);
3642 load_pic_register ()
3649 gotsym
= gen_rtx_SYMBOL_REF (Pmode
, "_GLOBAL_OFFSET_TABLE_");
3651 if (TARGET_DEEP_BRANCH_PREDICTION
)
3653 if (! pic_label_name
[0])
3654 ASM_GENERATE_INTERNAL_LABEL (pic_label_name
, "LPR", 0);
3655 pclab
= gen_rtx_MEM (QImode
, gen_rtx_SYMBOL_REF (Pmode
, pic_label_name
));
3659 pclab
= gen_rtx_LABEL_REF (VOIDmode
, gen_label_rtx ());
3662 emit_insn (gen_prologue_get_pc (pic_offset_table_rtx
, pclab
));
3664 if (! TARGET_DEEP_BRANCH_PREDICTION
)
3665 emit_insn (gen_popsi1 (pic_offset_table_rtx
));
3667 emit_insn (gen_prologue_set_got (pic_offset_table_rtx
, gotsym
, pclab
));
3670 /* Generate an "push" pattern for input ARG. */
3676 return gen_rtx_SET (VOIDmode
,
3678 gen_rtx_PRE_DEC (Pmode
,
3679 stack_pointer_rtx
)),
3683 /* Return 1 if we need to save REGNO. */
3685 ix86_save_reg (regno
, maybe_eh_return
)
3687 int maybe_eh_return
;
3691 && regno
== PIC_OFFSET_TABLE_REGNUM
3692 && (current_function_uses_pic_offset_table
3693 || current_function_uses_const_pool
3694 || current_function_calls_eh_return
))
3697 if (current_function_calls_eh_return
&& maybe_eh_return
)
3702 unsigned test
= EH_RETURN_DATA_REGNO(i
);
3703 if (test
== INVALID_REGNUM
)
3705 if (test
== (unsigned) regno
)
3710 return (regs_ever_live
[regno
]
3711 && !call_used_regs
[regno
]
3712 && !fixed_regs
[regno
]
3713 && (regno
!= HARD_FRAME_POINTER_REGNUM
|| !frame_pointer_needed
));
3716 /* Return number of registers to be saved on the stack. */
3724 for (regno
= FIRST_PSEUDO_REGISTER
- 1; regno
>= 0; regno
--)
3725 if (ix86_save_reg (regno
, true))
3730 /* Return the offset between two registers, one to be eliminated, and the other
3731 its replacement, at the start of a routine. */
3734 ix86_initial_elimination_offset (from
, to
)
3738 struct ix86_frame frame
;
3739 ix86_compute_frame_layout (&frame
);
3741 if (from
== ARG_POINTER_REGNUM
&& to
== HARD_FRAME_POINTER_REGNUM
)
3742 return frame
.hard_frame_pointer_offset
;
3743 else if (from
== FRAME_POINTER_REGNUM
3744 && to
== HARD_FRAME_POINTER_REGNUM
)
3745 return frame
.hard_frame_pointer_offset
- frame
.frame_pointer_offset
;
3748 if (to
!= STACK_POINTER_REGNUM
)
3750 else if (from
== ARG_POINTER_REGNUM
)
3751 return frame
.stack_pointer_offset
;
3752 else if (from
!= FRAME_POINTER_REGNUM
)
3755 return frame
.stack_pointer_offset
- frame
.frame_pointer_offset
;
3759 /* Fill structure ix86_frame about frame of currently computed function. */
3762 ix86_compute_frame_layout (frame
)
3763 struct ix86_frame
*frame
;
3765 HOST_WIDE_INT total_size
;
3766 int stack_alignment_needed
= cfun
->stack_alignment_needed
/ BITS_PER_UNIT
;
3768 int preferred_alignment
= cfun
->preferred_stack_boundary
/ BITS_PER_UNIT
;
3769 HOST_WIDE_INT size
= get_frame_size ();
3771 frame
->nregs
= ix86_nsaved_regs ();
3774 /* Skip return value and save base pointer. */
3775 offset
= frame_pointer_needed
? UNITS_PER_WORD
* 2 : UNITS_PER_WORD
;
3777 frame
->hard_frame_pointer_offset
= offset
;
3779 /* Do some sanity checking of stack_alignment_needed and
3780 preferred_alignment, since i386 port is the only using those features
3781 that may break easilly. */
3783 if (size
&& !stack_alignment_needed
)
3785 if (preferred_alignment
< STACK_BOUNDARY
/ BITS_PER_UNIT
)
3787 if (preferred_alignment
> PREFERRED_STACK_BOUNDARY
/ BITS_PER_UNIT
)
3789 if (stack_alignment_needed
> PREFERRED_STACK_BOUNDARY
/ BITS_PER_UNIT
)
3792 if (stack_alignment_needed
< STACK_BOUNDARY
/ BITS_PER_UNIT
)
3793 stack_alignment_needed
= STACK_BOUNDARY
/ BITS_PER_UNIT
;
3795 /* Register save area */
3796 offset
+= frame
->nregs
* UNITS_PER_WORD
;
3799 if (ix86_save_varrargs_registers
)
3801 offset
+= X86_64_VARARGS_SIZE
;
3802 frame
->va_arg_size
= X86_64_VARARGS_SIZE
;
3805 frame
->va_arg_size
= 0;
3807 /* Align start of frame for local function. */
3808 frame
->padding1
= ((offset
+ stack_alignment_needed
- 1)
3809 & -stack_alignment_needed
) - offset
;
3811 offset
+= frame
->padding1
;
3813 /* Frame pointer points here. */
3814 frame
->frame_pointer_offset
= offset
;
3818 /* Add outgoing arguments area. */
3819 if (ACCUMULATE_OUTGOING_ARGS
)
3821 offset
+= current_function_outgoing_args_size
;
3822 frame
->outgoing_arguments_size
= current_function_outgoing_args_size
;
3825 frame
->outgoing_arguments_size
= 0;
3827 /* Align stack boundary. */
3828 frame
->padding2
= ((offset
+ preferred_alignment
- 1)
3829 & -preferred_alignment
) - offset
;
3831 offset
+= frame
->padding2
;
3833 /* We've reached end of stack frame. */
3834 frame
->stack_pointer_offset
= offset
;
3836 /* Size prologue needs to allocate. */
3837 frame
->to_allocate
=
3838 (size
+ frame
->padding1
+ frame
->padding2
3839 + frame
->outgoing_arguments_size
+ frame
->va_arg_size
);
3841 if (TARGET_64BIT
&& TARGET_RED_ZONE
&& current_function_sp_is_unchanging
3842 && current_function_is_leaf
)
3844 frame
->red_zone_size
= frame
->to_allocate
;
3845 if (frame
->red_zone_size
> RED_ZONE_SIZE
- RED_ZONE_RESERVE
)
3846 frame
->red_zone_size
= RED_ZONE_SIZE
- RED_ZONE_RESERVE
;
3849 frame
->red_zone_size
= 0;
3850 frame
->to_allocate
-= frame
->red_zone_size
;
3851 frame
->stack_pointer_offset
-= frame
->red_zone_size
;
3853 fprintf (stderr
, "nregs: %i\n", frame
->nregs
);
3854 fprintf (stderr
, "size: %i\n", size
);
3855 fprintf (stderr
, "alignment1: %i\n", stack_alignment_needed
);
3856 fprintf (stderr
, "padding1: %i\n", frame
->padding1
);
3857 fprintf (stderr
, "va_arg: %i\n", frame
->va_arg_size
);
3858 fprintf (stderr
, "padding2: %i\n", frame
->padding2
);
3859 fprintf (stderr
, "to_allocate: %i\n", frame
->to_allocate
);
3860 fprintf (stderr
, "red_zone_size: %i\n", frame
->red_zone_size
);
3861 fprintf (stderr
, "frame_pointer_offset: %i\n", frame
->frame_pointer_offset
);
3862 fprintf (stderr
, "hard_frame_pointer_offset: %i\n",
3863 frame
->hard_frame_pointer_offset
);
3864 fprintf (stderr
, "stack_pointer_offset: %i\n", frame
->stack_pointer_offset
);
3868 /* Emit code to save registers in the prologue. */
3871 ix86_emit_save_regs ()
3876 for (regno
= FIRST_PSEUDO_REGISTER
- 1; regno
>= 0; regno
--)
3877 if (ix86_save_reg (regno
, true))
3879 insn
= emit_insn (gen_push (gen_rtx_REG (Pmode
, regno
)));
3880 RTX_FRAME_RELATED_P (insn
) = 1;
3884 /* Emit code to save registers using MOV insns. First register
3885 is restored from POINTER + OFFSET. */
3887 ix86_emit_save_regs_using_mov (pointer
, offset
)
3889 HOST_WIDE_INT offset
;
3894 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
3895 if (ix86_save_reg (regno
, true))
3897 insn
= emit_move_insn (adjust_address (gen_rtx_MEM (Pmode
, pointer
),
3899 gen_rtx_REG (Pmode
, regno
));
3900 RTX_FRAME_RELATED_P (insn
) = 1;
3901 offset
+= UNITS_PER_WORD
;
3905 /* Expand the prologue into a bunch of separate insns. */
3908 ix86_expand_prologue ()
3911 int pic_reg_used
= (flag_pic
&& (current_function_uses_pic_offset_table
3912 || current_function_uses_const_pool
)
3914 struct ix86_frame frame
;
3916 HOST_WIDE_INT allocate
;
3920 use_fast_prologue_epilogue
3921 = !expensive_function_p (FAST_PROLOGUE_INSN_COUNT
);
3922 if (TARGET_PROLOGUE_USING_MOVE
)
3923 use_mov
= use_fast_prologue_epilogue
;
3925 ix86_compute_frame_layout (&frame
);
3927 /* Note: AT&T enter does NOT have reversed args. Enter is probably
3928 slower on all targets. Also sdb doesn't like it. */
3930 if (frame_pointer_needed
)
3932 insn
= emit_insn (gen_push (hard_frame_pointer_rtx
));
3933 RTX_FRAME_RELATED_P (insn
) = 1;
3935 insn
= emit_move_insn (hard_frame_pointer_rtx
, stack_pointer_rtx
);
3936 RTX_FRAME_RELATED_P (insn
) = 1;
3939 allocate
= frame
.to_allocate
;
3940 /* In case we are dealing only with single register and empty frame,
3941 push is equivalent of the mov+add sequence. */
3942 if (allocate
== 0 && frame
.nregs
<= 1)
3946 ix86_emit_save_regs ();
3948 allocate
+= frame
.nregs
* UNITS_PER_WORD
;
3952 else if (! TARGET_STACK_PROBE
|| allocate
< CHECK_STACK_LIMIT
)
3954 insn
= emit_insn (gen_pro_epilogue_adjust_stack
3955 (stack_pointer_rtx
, stack_pointer_rtx
,
3956 GEN_INT (-allocate
)));
3957 RTX_FRAME_RELATED_P (insn
) = 1;
3961 /* ??? Is this only valid for Win32? */
3968 arg0
= gen_rtx_REG (SImode
, 0);
3969 emit_move_insn (arg0
, GEN_INT (allocate
));
3971 sym
= gen_rtx_MEM (FUNCTION_MODE
,
3972 gen_rtx_SYMBOL_REF (Pmode
, "_alloca"));
3973 insn
= emit_call_insn (gen_call (sym
, const0_rtx
, constm1_rtx
));
3975 CALL_INSN_FUNCTION_USAGE (insn
)
3976 = gen_rtx_EXPR_LIST (VOIDmode
, gen_rtx_USE (VOIDmode
, arg0
),
3977 CALL_INSN_FUNCTION_USAGE (insn
));
3981 if (!frame_pointer_needed
|| !frame
.to_allocate
)
3982 ix86_emit_save_regs_using_mov (stack_pointer_rtx
, frame
.to_allocate
);
3984 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx
,
3985 -frame
.nregs
* UNITS_PER_WORD
);
3988 #ifdef SUBTARGET_PROLOGUE
3993 load_pic_register ();
3995 /* If we are profiling, make sure no instructions are scheduled before
3996 the call to mcount. However, if -fpic, the above call will have
3998 if ((profile_flag
|| profile_block_flag
) && ! pic_reg_used
)
3999 emit_insn (gen_blockage ());
4002 /* Emit code to restore saved registers using MOV insns. First register
4003 is restored from POINTER + OFFSET. */
4005 ix86_emit_restore_regs_using_mov (pointer
, offset
, maybe_eh_return
)
4008 int maybe_eh_return
;
4012 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
4013 if (ix86_save_reg (regno
, maybe_eh_return
))
4015 emit_move_insn (gen_rtx_REG (Pmode
, regno
),
4016 adjust_address (gen_rtx_MEM (Pmode
, pointer
),
4018 offset
+= UNITS_PER_WORD
;
4022 /* Restore function stack, frame, and registers. */
4025 ix86_expand_epilogue (style
)
4029 int sp_valid
= !frame_pointer_needed
|| current_function_sp_is_unchanging
;
4030 struct ix86_frame frame
;
4031 HOST_WIDE_INT offset
;
4033 ix86_compute_frame_layout (&frame
);
4035 /* Calculate start of saved registers relative to ebp. Special care
4036 must be taken for the normal return case of a function using
4037 eh_return: the eax and edx registers are marked as saved, but not
4038 restored along this path. */
4039 offset
= frame
.nregs
;
4040 if (current_function_calls_eh_return
&& style
!= 2)
4042 offset
*= -UNITS_PER_WORD
;
4044 #ifdef FUNCTION_BLOCK_PROFILER_EXIT
4045 if (profile_block_flag
== 2)
4047 FUNCTION_BLOCK_PROFILER_EXIT
;
4051 /* If we're only restoring one register and sp is not valid then
4052 using a move instruction to restore the register since it's
4053 less work than reloading sp and popping the register.
4055 The default code result in stack adjustment using add/lea instruction,
4056 while this code results in LEAVE instruction (or discrete equivalent),
4057 so it is profitable in some other cases as well. Especially when there
4058 are no registers to restore. We also use this code when TARGET_USE_LEAVE
4059 and there is exactly one register to pop. This heruistic may need some
4060 tuning in future. */
4061 if ((!sp_valid
&& frame
.nregs
<= 1)
4062 || (TARGET_EPILOGUE_USING_MOVE
4063 && use_fast_prologue_epilogue
4064 && (frame
.nregs
> 1 || frame
.to_allocate
))
4065 || (frame_pointer_needed
&& !frame
.nregs
&& frame
.to_allocate
)
4066 || (frame_pointer_needed
&& TARGET_USE_LEAVE
4067 && use_fast_prologue_epilogue
&& frame
.nregs
== 1)
4068 || current_function_calls_eh_return
)
4070 /* Restore registers. We can use ebp or esp to address the memory
4071 locations. If both are available, default to ebp, since offsets
4072 are known to be small. Only exception is esp pointing directly to the
4073 end of block of saved registers, where we may simplify addressing
4076 if (!frame_pointer_needed
|| (sp_valid
&& !frame
.to_allocate
))
4077 ix86_emit_restore_regs_using_mov (stack_pointer_rtx
,
4078 frame
.to_allocate
, style
== 2);
4080 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx
,
4081 offset
, style
== 2);
4083 /* eh_return epilogues need %ecx added to the stack pointer. */
4086 rtx tmp
, sa
= EH_RETURN_STACKADJ_RTX
;
4088 if (frame_pointer_needed
)
4090 tmp
= gen_rtx_PLUS (Pmode
, hard_frame_pointer_rtx
, sa
);
4091 tmp
= plus_constant (tmp
, UNITS_PER_WORD
);
4092 emit_insn (gen_rtx_SET (VOIDmode
, sa
, tmp
));
4094 tmp
= gen_rtx_MEM (Pmode
, hard_frame_pointer_rtx
);
4095 emit_move_insn (hard_frame_pointer_rtx
, tmp
);
4097 emit_insn (gen_pro_epilogue_adjust_stack
4098 (stack_pointer_rtx
, sa
, const0_rtx
));
4102 tmp
= gen_rtx_PLUS (Pmode
, stack_pointer_rtx
, sa
);
4103 tmp
= plus_constant (tmp
, (frame
.to_allocate
4104 + frame
.nregs
* UNITS_PER_WORD
));
4105 emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, tmp
));
4108 else if (!frame_pointer_needed
)
4109 emit_insn (gen_pro_epilogue_adjust_stack
4110 (stack_pointer_rtx
, stack_pointer_rtx
,
4111 GEN_INT (frame
.to_allocate
4112 + frame
.nregs
* UNITS_PER_WORD
)));
4113 /* If not an i386, mov & pop is faster than "leave". */
4114 else if (TARGET_USE_LEAVE
|| optimize_size
|| !use_fast_prologue_epilogue
)
4115 emit_insn (TARGET_64BIT
? gen_leave_rex64 () : gen_leave ());
4118 emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx
,
4119 hard_frame_pointer_rtx
,
4122 emit_insn (gen_popdi1 (hard_frame_pointer_rtx
));
4124 emit_insn (gen_popsi1 (hard_frame_pointer_rtx
));
4129 /* First step is to deallocate the stack frame so that we can
4130 pop the registers. */
4133 if (!frame_pointer_needed
)
4135 emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx
,
4136 hard_frame_pointer_rtx
,
4139 else if (frame
.to_allocate
)
4140 emit_insn (gen_pro_epilogue_adjust_stack
4141 (stack_pointer_rtx
, stack_pointer_rtx
,
4142 GEN_INT (frame
.to_allocate
)));
4144 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
4145 if (ix86_save_reg (regno
, false))
4148 emit_insn (gen_popdi1 (gen_rtx_REG (Pmode
, regno
)));
4150 emit_insn (gen_popsi1 (gen_rtx_REG (Pmode
, regno
)));
4152 if (frame_pointer_needed
)
4154 /* Leave results in shorter depdendancy chains on CPUs that are
4155 able to grok it fast. */
4156 if (TARGET_USE_LEAVE
)
4157 emit_insn (TARGET_64BIT
? gen_leave_rex64 () : gen_leave ());
4158 else if (TARGET_64BIT
)
4159 emit_insn (gen_popdi1 (hard_frame_pointer_rtx
));
4161 emit_insn (gen_popsi1 (hard_frame_pointer_rtx
));
4165 /* Sibcall epilogues don't want a return instruction. */
4169 if (current_function_pops_args
&& current_function_args_size
)
4171 rtx popc
= GEN_INT (current_function_pops_args
);
4173 /* i386 can only pop 64K bytes. If asked to pop more, pop
4174 return address, do explicit add, and jump indirectly to the
4177 if (current_function_pops_args
>= 65536)
4179 rtx ecx
= gen_rtx_REG (SImode
, 2);
4181 /* There are is no "pascal" calling convention in 64bit ABI. */
4185 emit_insn (gen_popsi1 (ecx
));
4186 emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
, popc
));
4187 emit_jump_insn (gen_return_indirect_internal (ecx
));
4190 emit_jump_insn (gen_return_pop_internal (popc
));
4193 emit_jump_insn (gen_return_internal ());
4196 /* Extract the parts of an RTL expression that is a valid memory address
4197 for an instruction. Return false if the structure of the address is
4201 ix86_decompose_address (addr
, out
)
4203 struct ix86_address
*out
;
4205 rtx base
= NULL_RTX
;
4206 rtx index
= NULL_RTX
;
4207 rtx disp
= NULL_RTX
;
4208 HOST_WIDE_INT scale
= 1;
4209 rtx scale_rtx
= NULL_RTX
;
4211 if (GET_CODE (addr
) == REG
|| GET_CODE (addr
) == SUBREG
)
4213 else if (GET_CODE (addr
) == PLUS
)
4215 rtx op0
= XEXP (addr
, 0);
4216 rtx op1
= XEXP (addr
, 1);
4217 enum rtx_code code0
= GET_CODE (op0
);
4218 enum rtx_code code1
= GET_CODE (op1
);
4220 if (code0
== REG
|| code0
== SUBREG
)
4222 if (code1
== REG
|| code1
== SUBREG
)
4223 index
= op0
, base
= op1
; /* index + base */
4225 base
= op0
, disp
= op1
; /* base + displacement */
4227 else if (code0
== MULT
)
4229 index
= XEXP (op0
, 0);
4230 scale_rtx
= XEXP (op0
, 1);
4231 if (code1
== REG
|| code1
== SUBREG
)
4232 base
= op1
; /* index*scale + base */
4234 disp
= op1
; /* index*scale + disp */
4236 else if (code0
== PLUS
&& GET_CODE (XEXP (op0
, 0)) == MULT
)
4238 index
= XEXP (XEXP (op0
, 0), 0); /* index*scale + base + disp */
4239 scale_rtx
= XEXP (XEXP (op0
, 0), 1);
4240 base
= XEXP (op0
, 1);
4243 else if (code0
== PLUS
)
4245 index
= XEXP (op0
, 0); /* index + base + disp */
4246 base
= XEXP (op0
, 1);
4252 else if (GET_CODE (addr
) == MULT
)
4254 index
= XEXP (addr
, 0); /* index*scale */
4255 scale_rtx
= XEXP (addr
, 1);
4257 else if (GET_CODE (addr
) == ASHIFT
)
4261 /* We're called for lea too, which implements ashift on occasion. */
4262 index
= XEXP (addr
, 0);
4263 tmp
= XEXP (addr
, 1);
4264 if (GET_CODE (tmp
) != CONST_INT
)
4266 scale
= INTVAL (tmp
);
4267 if ((unsigned HOST_WIDE_INT
) scale
> 3)
4272 disp
= addr
; /* displacement */
4274 /* Extract the integral value of scale. */
4277 if (GET_CODE (scale_rtx
) != CONST_INT
)
4279 scale
= INTVAL (scale_rtx
);
4282 /* Allow arg pointer and stack pointer as index if there is not scaling */
4283 if (base
&& index
&& scale
== 1
4284 && (index
== arg_pointer_rtx
|| index
== frame_pointer_rtx
4285 || index
== stack_pointer_rtx
))
4292 /* Special case: %ebp cannot be encoded as a base without a displacement. */
4293 if ((base
== hard_frame_pointer_rtx
4294 || base
== frame_pointer_rtx
4295 || base
== arg_pointer_rtx
) && !disp
)
4298 /* Special case: on K6, [%esi] makes the instruction vector decoded.
4299 Avoid this by transforming to [%esi+0]. */
4300 if (ix86_cpu
== PROCESSOR_K6
&& !optimize_size
4301 && base
&& !index
&& !disp
4303 && REGNO_REG_CLASS (REGNO (base
)) == SIREG
)
4306 /* Special case: encode reg+reg instead of reg*2. */
4307 if (!base
&& index
&& scale
&& scale
== 2)
4308 base
= index
, scale
= 1;
4310 /* Special case: scaling cannot be encoded without base or displacement. */
4311 if (!base
&& !disp
&& index
&& scale
!= 1)
4322 /* Return cost of the memory address x.
4323 For i386, it is better to use a complex address than let gcc copy
4324 the address into a reg and make a new pseudo. But not if the address
4325 requires to two regs - that would mean more pseudos with longer
4328 ix86_address_cost (x
)
4331 struct ix86_address parts
;
4334 if (!ix86_decompose_address (x
, &parts
))
4337 /* More complex memory references are better. */
4338 if (parts
.disp
&& parts
.disp
!= const0_rtx
)
4341 /* Attempt to minimize number of registers in the address. */
4343 && (!REG_P (parts
.base
) || REGNO (parts
.base
) >= FIRST_PSEUDO_REGISTER
))
4345 && (!REG_P (parts
.index
)
4346 || REGNO (parts
.index
) >= FIRST_PSEUDO_REGISTER
)))
4350 && (!REG_P (parts
.base
) || REGNO (parts
.base
) >= FIRST_PSEUDO_REGISTER
)
4352 && (!REG_P (parts
.index
) || REGNO (parts
.index
) >= FIRST_PSEUDO_REGISTER
)
4353 && parts
.base
!= parts
.index
)
4356 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
4357 since it's predecode logic can't detect the length of instructions
4358 and it degenerates to vector decoded. Increase cost of such
4359 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
4360 to split such addresses or even refuse such addresses at all.
4362 Following addressing modes are affected:
4367 The first and last case may be avoidable by explicitly coding the zero in
4368 memory address, but I don't have AMD-K6 machine handy to check this
4372 && ((!parts
.disp
&& parts
.base
&& parts
.index
&& parts
.scale
!= 1)
4373 || (parts
.disp
&& !parts
.base
&& parts
.index
&& parts
.scale
!= 1)
4374 || (!parts
.disp
&& parts
.base
&& parts
.index
&& parts
.scale
== 1)))
4380 /* If X is a machine specific address (i.e. a symbol or label being
4381 referenced as a displacement from the GOT implemented using an
4382 UNSPEC), then return the base term. Otherwise return X. */
4385 ix86_find_base_term (x
)
4392 if (GET_CODE (x
) != CONST
)
4395 if (GET_CODE (term
) == PLUS
4396 && (GET_CODE (XEXP (term
, 1)) == CONST_INT
4397 || GET_CODE (XEXP (term
, 1)) == CONST_DOUBLE
))
4398 term
= XEXP (term
, 0);
4399 if (GET_CODE (term
) != UNSPEC
4400 || XVECLEN (term
, 0) != 1
4401 || XINT (term
, 1) != 15)
4404 term
= XVECEXP (term
, 0, 0);
4406 if (GET_CODE (term
) != SYMBOL_REF
4407 && GET_CODE (term
) != LABEL_REF
)
4413 if (GET_CODE (x
) != PLUS
4414 || XEXP (x
, 0) != pic_offset_table_rtx
4415 || GET_CODE (XEXP (x
, 1)) != CONST
)
4418 term
= XEXP (XEXP (x
, 1), 0);
4420 if (GET_CODE (term
) == PLUS
&& GET_CODE (XEXP (term
, 1)) == CONST_INT
)
4421 term
= XEXP (term
, 0);
4423 if (GET_CODE (term
) != UNSPEC
4424 || XVECLEN (term
, 0) != 1
4425 || XINT (term
, 1) != 7)
4428 term
= XVECEXP (term
, 0, 0);
4430 if (GET_CODE (term
) != SYMBOL_REF
4431 && GET_CODE (term
) != LABEL_REF
)
4437 /* Determine if a given CONST RTX is a valid memory displacement
4441 legitimate_pic_address_disp_p (disp
)
4444 /* In 64bit mode we can allow direct addresses of symbols and labels
4445 when they are not dynamic symbols. */
4449 if (GET_CODE (disp
) == CONST
)
4451 /* ??? Handle PIC code models */
4452 if (GET_CODE (x
) == PLUS
4453 && (GET_CODE (XEXP (x
, 1)) == CONST_INT
4454 && ix86_cmodel
== CM_SMALL_PIC
4455 && INTVAL (XEXP (x
, 1)) < 1024*1024*1024
4456 && INTVAL (XEXP (x
, 1)) > -1024*1024*1024))
4458 if (local_symbolic_operand (x
, Pmode
))
4461 if (GET_CODE (disp
) != CONST
)
4463 disp
= XEXP (disp
, 0);
4467 /* We are unsafe to allow PLUS expressions. This limit allowed distance
4468 of GOT tables. We should not need these anyway. */
4469 if (GET_CODE (disp
) != UNSPEC
4470 || XVECLEN (disp
, 0) != 1
4471 || XINT (disp
, 1) != 15)
4474 if (GET_CODE (XVECEXP (disp
, 0, 0)) != SYMBOL_REF
4475 && GET_CODE (XVECEXP (disp
, 0, 0)) != LABEL_REF
)
4480 if (GET_CODE (disp
) == PLUS
)
4482 if (GET_CODE (XEXP (disp
, 1)) != CONST_INT
)
4484 disp
= XEXP (disp
, 0);
4487 if (GET_CODE (disp
) != UNSPEC
4488 || XVECLEN (disp
, 0) != 1)
4491 /* Must be @GOT or @GOTOFF. */
4492 switch (XINT (disp
, 1))
4495 return GET_CODE (XVECEXP (disp
, 0, 0)) == SYMBOL_REF
;
4497 case 7: /* @GOTOFF */
4498 return local_symbolic_operand (XVECEXP (disp
, 0, 0), Pmode
);
4504 /* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
4505 memory address for an instruction. The MODE argument is the machine mode
4506 for the MEM expression that wants to use this address.
4508 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
4509 convert common non-canonical forms to canonical form so that they will
4513 legitimate_address_p (mode
, addr
, strict
)
4514 enum machine_mode mode
;
4518 struct ix86_address parts
;
4519 rtx base
, index
, disp
;
4520 HOST_WIDE_INT scale
;
4521 const char *reason
= NULL
;
4522 rtx reason_rtx
= NULL_RTX
;
4524 if (TARGET_DEBUG_ADDR
)
4527 "\n======\nGO_IF_LEGITIMATE_ADDRESS, mode = %s, strict = %d\n",
4528 GET_MODE_NAME (mode
), strict
);
4532 if (! ix86_decompose_address (addr
, &parts
))
4534 reason
= "decomposition failed";
4539 index
= parts
.index
;
4541 scale
= parts
.scale
;
4543 /* Validate base register.
4545 Don't allow SUBREG's here, it can lead to spill failures when the base
4546 is one word out of a two word structure, which is represented internally
4553 if (GET_CODE (base
) != REG
)
4555 reason
= "base is not a register";
4559 if (GET_MODE (base
) != Pmode
)
4561 reason
= "base is not in Pmode";
4565 if ((strict
&& ! REG_OK_FOR_BASE_STRICT_P (base
))
4566 || (! strict
&& ! REG_OK_FOR_BASE_NONSTRICT_P (base
)))
4568 reason
= "base is not valid";
4573 /* Validate index register.
4575 Don't allow SUBREG's here, it can lead to spill failures when the index
4576 is one word out of a two word structure, which is represented internally
4583 if (GET_CODE (index
) != REG
)
4585 reason
= "index is not a register";
4589 if (GET_MODE (index
) != Pmode
)
4591 reason
= "index is not in Pmode";
4595 if ((strict
&& ! REG_OK_FOR_INDEX_STRICT_P (index
))
4596 || (! strict
&& ! REG_OK_FOR_INDEX_NONSTRICT_P (index
)))
4598 reason
= "index is not valid";
4603 /* Validate scale factor. */
4606 reason_rtx
= GEN_INT (scale
);
4609 reason
= "scale without index";
4613 if (scale
!= 2 && scale
!= 4 && scale
!= 8)
4615 reason
= "scale is not a valid multiplier";
4620 /* Validate displacement. */
4625 if (!CONSTANT_ADDRESS_P (disp
))
4627 reason
= "displacement is not constant";
4633 if (!x86_64_sign_extended_value (disp
))
4635 reason
= "displacement is out of range";
4641 if (GET_CODE (disp
) == CONST_DOUBLE
)
4643 reason
= "displacement is a const_double";
4648 if (flag_pic
&& SYMBOLIC_CONST (disp
))
4650 if (TARGET_64BIT
&& (index
|| base
))
4652 reason
= "non-constant pic memory reference";
4655 if (! legitimate_pic_address_disp_p (disp
))
4657 reason
= "displacement is an invalid pic construct";
4661 /* This code used to verify that a symbolic pic displacement
4662 includes the pic_offset_table_rtx register.
4664 While this is good idea, unfortunately these constructs may
4665 be created by "adds using lea" optimization for incorrect
4674 This code is nonsensical, but results in addressing
4675 GOT table with pic_offset_table_rtx base. We can't
4676 just refuse it easilly, since it gets matched by
4677 "addsi3" pattern, that later gets split to lea in the
4678 case output register differs from input. While this
4679 can be handled by separate addsi pattern for this case
4680 that never results in lea, this seems to be easier and
4681 correct fix for crash to disable this test. */
4683 else if (HALF_PIC_P ())
4685 if (! HALF_PIC_ADDRESS_P (disp
)
4686 || (base
!= NULL_RTX
|| index
!= NULL_RTX
))
4688 reason
= "displacement is an invalid half-pic reference";
4694 /* Everything looks valid. */
4695 if (TARGET_DEBUG_ADDR
)
4696 fprintf (stderr
, "Success.\n");
4700 if (TARGET_DEBUG_ADDR
)
4702 fprintf (stderr
, "Error: %s\n", reason
);
4703 debug_rtx (reason_rtx
);
4708 /* Return an unique alias set for the GOT. */
4710 static HOST_WIDE_INT
4711 ix86_GOT_alias_set ()
4713 static HOST_WIDE_INT set
= -1;
4715 set
= new_alias_set ();
4719 /* Return a legitimate reference for ORIG (an address) using the
4720 register REG. If REG is 0, a new pseudo is generated.
4722 There are two types of references that must be handled:
4724 1. Global data references must load the address from the GOT, via
4725 the PIC reg. An insn is emitted to do this load, and the reg is
4728 2. Static data references, constant pool addresses, and code labels
4729 compute the address as an offset from the GOT, whose base is in
4730 the PIC reg. Static data objects have SYMBOL_REF_FLAG set to
4731 differentiate them from global data objects. The returned
4732 address is the PIC reg + an unspec constant.
4734 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
4735 reg also appears in the address. */
4738 legitimize_pic_address (orig
, reg
)
4746 if (local_symbolic_operand (addr
, Pmode
))
4748 /* In 64bit mode we can address such objects directly. */
4753 /* This symbol may be referenced via a displacement from the PIC
4754 base address (@GOTOFF). */
4756 current_function_uses_pic_offset_table
= 1;
4757 new = gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), 7);
4758 new = gen_rtx_CONST (Pmode
, new);
4759 new = gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, new);
4763 emit_move_insn (reg
, new);
4768 else if (GET_CODE (addr
) == SYMBOL_REF
)
4772 current_function_uses_pic_offset_table
= 1;
4773 new = gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), 15);
4774 new = gen_rtx_CONST (Pmode
, new);
4775 new = gen_rtx_MEM (Pmode
, new);
4776 RTX_UNCHANGING_P (new) = 1;
4777 set_mem_alias_set (new, ix86_GOT_alias_set ());
4780 reg
= gen_reg_rtx (Pmode
);
4781 /* Use directly gen_movsi, otherwise the address is loaded
4782 into register for CSE. We don't want to CSE this addresses,
4783 instead we CSE addresses from the GOT table, so skip this. */
4784 emit_insn (gen_movsi (reg
, new));
4789 /* This symbol must be referenced via a load from the
4790 Global Offset Table (@GOT). */
4792 current_function_uses_pic_offset_table
= 1;
4793 new = gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), 6);
4794 new = gen_rtx_CONST (Pmode
, new);
4795 new = gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, new);
4796 new = gen_rtx_MEM (Pmode
, new);
4797 RTX_UNCHANGING_P (new) = 1;
4798 set_mem_alias_set (new, ix86_GOT_alias_set ());
4801 reg
= gen_reg_rtx (Pmode
);
4802 emit_move_insn (reg
, new);
4808 if (GET_CODE (addr
) == CONST
)
4810 addr
= XEXP (addr
, 0);
4811 if (GET_CODE (addr
) == UNSPEC
)
4813 /* Check that the unspec is one of the ones we generate? */
4815 else if (GET_CODE (addr
) != PLUS
)
4818 if (GET_CODE (addr
) == PLUS
)
4820 rtx op0
= XEXP (addr
, 0), op1
= XEXP (addr
, 1);
4822 /* Check first to see if this is a constant offset from a @GOTOFF
4823 symbol reference. */
4824 if (local_symbolic_operand (op0
, Pmode
)
4825 && GET_CODE (op1
) == CONST_INT
)
4829 current_function_uses_pic_offset_table
= 1;
4830 new = gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, op0
), 7);
4831 new = gen_rtx_PLUS (Pmode
, new, op1
);
4832 new = gen_rtx_CONST (Pmode
, new);
4833 new = gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, new);
4837 emit_move_insn (reg
, new);
4843 /* ??? We need to limit offsets here. */
4848 base
= legitimize_pic_address (XEXP (addr
, 0), reg
);
4849 new = legitimize_pic_address (XEXP (addr
, 1),
4850 base
== reg
? NULL_RTX
: reg
);
4852 if (GET_CODE (new) == CONST_INT
)
4853 new = plus_constant (base
, INTVAL (new));
4856 if (GET_CODE (new) == PLUS
&& CONSTANT_P (XEXP (new, 1)))
4858 base
= gen_rtx_PLUS (Pmode
, base
, XEXP (new, 0));
4859 new = XEXP (new, 1);
4861 new = gen_rtx_PLUS (Pmode
, base
, new);
4869 /* Try machine-dependent ways of modifying an illegitimate address
4870 to be legitimate. If we find one, return the new, valid address.
4871 This macro is used in only one place: `memory_address' in explow.c.
4873 OLDX is the address as it was before break_out_memory_refs was called.
4874 In some cases it is useful to look at this to decide what needs to be done.
4876 MODE and WIN are passed so that this macro can use
4877 GO_IF_LEGITIMATE_ADDRESS.
4879 It is always safe for this macro to do nothing. It exists to recognize
4880 opportunities to optimize the output.
4882 For the 80386, we handle X+REG by loading X into a register R and
4883 using R+REG. R will go in a general reg and indexing will be used.
4884 However, if REG is a broken-out memory address or multiplication,
4885 nothing needs to be done because REG can certainly go in a general reg.
4887 When -fpic is used, special handling is needed for symbolic references.
4888 See comments by legitimize_pic_address in i386.c for details. */
4891 legitimize_address (x
, oldx
, mode
)
4893 register rtx oldx ATTRIBUTE_UNUSED
;
4894 enum machine_mode mode
;
4899 if (TARGET_DEBUG_ADDR
)
4901 fprintf (stderr
, "\n==========\nLEGITIMIZE_ADDRESS, mode = %s\n",
4902 GET_MODE_NAME (mode
));
4906 if (flag_pic
&& SYMBOLIC_CONST (x
))
4907 return legitimize_pic_address (x
, 0);
4909 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
4910 if (GET_CODE (x
) == ASHIFT
4911 && GET_CODE (XEXP (x
, 1)) == CONST_INT
4912 && (log
= (unsigned)exact_log2 (INTVAL (XEXP (x
, 1)))) < 4)
4915 x
= gen_rtx_MULT (Pmode
, force_reg (Pmode
, XEXP (x
, 0)),
4916 GEN_INT (1 << log
));
4919 if (GET_CODE (x
) == PLUS
)
4921 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
4923 if (GET_CODE (XEXP (x
, 0)) == ASHIFT
4924 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == CONST_INT
4925 && (log
= (unsigned)exact_log2 (INTVAL (XEXP (XEXP (x
, 0), 1)))) < 4)
4928 XEXP (x
, 0) = gen_rtx_MULT (Pmode
,
4929 force_reg (Pmode
, XEXP (XEXP (x
, 0), 0)),
4930 GEN_INT (1 << log
));
4933 if (GET_CODE (XEXP (x
, 1)) == ASHIFT
4934 && GET_CODE (XEXP (XEXP (x
, 1), 1)) == CONST_INT
4935 && (log
= (unsigned)exact_log2 (INTVAL (XEXP (XEXP (x
, 1), 1)))) < 4)
4938 XEXP (x
, 1) = gen_rtx_MULT (Pmode
,
4939 force_reg (Pmode
, XEXP (XEXP (x
, 1), 0)),
4940 GEN_INT (1 << log
));
4943 /* Put multiply first if it isn't already. */
4944 if (GET_CODE (XEXP (x
, 1)) == MULT
)
4946 rtx tmp
= XEXP (x
, 0);
4947 XEXP (x
, 0) = XEXP (x
, 1);
4952 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
4953 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
4954 created by virtual register instantiation, register elimination, and
4955 similar optimizations. */
4956 if (GET_CODE (XEXP (x
, 0)) == MULT
&& GET_CODE (XEXP (x
, 1)) == PLUS
)
4959 x
= gen_rtx_PLUS (Pmode
,
4960 gen_rtx_PLUS (Pmode
, XEXP (x
, 0),
4961 XEXP (XEXP (x
, 1), 0)),
4962 XEXP (XEXP (x
, 1), 1));
4966 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
4967 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
4968 else if (GET_CODE (x
) == PLUS
&& GET_CODE (XEXP (x
, 0)) == PLUS
4969 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
4970 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == PLUS
4971 && CONSTANT_P (XEXP (x
, 1)))
4974 rtx other
= NULL_RTX
;
4976 if (GET_CODE (XEXP (x
, 1)) == CONST_INT
)
4978 constant
= XEXP (x
, 1);
4979 other
= XEXP (XEXP (XEXP (x
, 0), 1), 1);
4981 else if (GET_CODE (XEXP (XEXP (XEXP (x
, 0), 1), 1)) == CONST_INT
)
4983 constant
= XEXP (XEXP (XEXP (x
, 0), 1), 1);
4984 other
= XEXP (x
, 1);
4992 x
= gen_rtx_PLUS (Pmode
,
4993 gen_rtx_PLUS (Pmode
, XEXP (XEXP (x
, 0), 0),
4994 XEXP (XEXP (XEXP (x
, 0), 1), 0)),
4995 plus_constant (other
, INTVAL (constant
)));
4999 if (changed
&& legitimate_address_p (mode
, x
, FALSE
))
5002 if (GET_CODE (XEXP (x
, 0)) == MULT
)
5005 XEXP (x
, 0) = force_operand (XEXP (x
, 0), 0);
5008 if (GET_CODE (XEXP (x
, 1)) == MULT
)
5011 XEXP (x
, 1) = force_operand (XEXP (x
, 1), 0);
5015 && GET_CODE (XEXP (x
, 1)) == REG
5016 && GET_CODE (XEXP (x
, 0)) == REG
)
5019 if (flag_pic
&& SYMBOLIC_CONST (XEXP (x
, 1)))
5022 x
= legitimize_pic_address (x
, 0);
5025 if (changed
&& legitimate_address_p (mode
, x
, FALSE
))
5028 if (GET_CODE (XEXP (x
, 0)) == REG
)
5030 register rtx temp
= gen_reg_rtx (Pmode
);
5031 register rtx val
= force_operand (XEXP (x
, 1), temp
);
5033 emit_move_insn (temp
, val
);
5039 else if (GET_CODE (XEXP (x
, 1)) == REG
)
5041 register rtx temp
= gen_reg_rtx (Pmode
);
5042 register rtx val
= force_operand (XEXP (x
, 0), temp
);
5044 emit_move_insn (temp
, val
);
5054 /* Print an integer constant expression in assembler syntax. Addition
5055 and subtraction are the only arithmetic that may appear in these
5056 expressions. FILE is the stdio stream to write to, X is the rtx, and
5057 CODE is the operand print code from the output string. */
5060 output_pic_addr_const (file
, x
, code
)
5067 switch (GET_CODE (x
))
5077 assemble_name (file
, XSTR (x
, 0));
5078 if (code
== 'P' && ! SYMBOL_REF_FLAG (x
))
5079 fputs ("@PLT", file
);
5086 ASM_GENERATE_INTERNAL_LABEL (buf
, "L", CODE_LABEL_NUMBER (x
));
5087 assemble_name (asm_out_file
, buf
);
5091 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
));
5095 /* This used to output parentheses around the expression,
5096 but that does not work on the 386 (either ATT or BSD assembler). */
5097 output_pic_addr_const (file
, XEXP (x
, 0), code
);
5101 if (GET_MODE (x
) == VOIDmode
)
5103 /* We can use %d if the number is <32 bits and positive. */
5104 if (CONST_DOUBLE_HIGH (x
) || CONST_DOUBLE_LOW (x
) < 0)
5105 fprintf (file
, "0x%lx%08lx",
5106 (unsigned long) CONST_DOUBLE_HIGH (x
),
5107 (unsigned long) CONST_DOUBLE_LOW (x
));
5109 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, CONST_DOUBLE_LOW (x
));
5112 /* We can't handle floating point constants;
5113 PRINT_OPERAND must handle them. */
5114 output_operand_lossage ("floating constant misused");
5118 /* Some assemblers need integer constants to appear first. */
5119 if (GET_CODE (XEXP (x
, 0)) == CONST_INT
)
5121 output_pic_addr_const (file
, XEXP (x
, 0), code
);
5123 output_pic_addr_const (file
, XEXP (x
, 1), code
);
5125 else if (GET_CODE (XEXP (x
, 1)) == CONST_INT
)
5127 output_pic_addr_const (file
, XEXP (x
, 1), code
);
5129 output_pic_addr_const (file
, XEXP (x
, 0), code
);
5136 putc (ASSEMBLER_DIALECT
? '(' : '[', file
);
5137 output_pic_addr_const (file
, XEXP (x
, 0), code
);
5139 output_pic_addr_const (file
, XEXP (x
, 1), code
);
5140 putc (ASSEMBLER_DIALECT
? ')' : ']', file
);
5144 if (XVECLEN (x
, 0) != 1)
5146 output_pic_addr_const (file
, XVECEXP (x
, 0, 0), code
);
5147 switch (XINT (x
, 1))
5150 fputs ("@GOT", file
);
5153 fputs ("@GOTOFF", file
);
5156 fputs ("@PLT", file
);
5159 fputs ("@GOTPCREL(%RIP)", file
);
5162 output_operand_lossage ("invalid UNSPEC as operand");
5168 output_operand_lossage ("invalid expression as operand");
5172 /* This is called from dwarfout.c via ASM_OUTPUT_DWARF_ADDR_CONST.
5173 We need to handle our special PIC relocations. */
5176 i386_dwarf_output_addr_const (file
, x
)
5181 fprintf (file
, "%s", TARGET_64BIT
? ASM_QUAD
: INT_ASM_OP
);
5185 fprintf (file
, "%s", INT_ASM_OP
);
5188 output_pic_addr_const (file
, x
, '\0');
5190 output_addr_const (file
, x
);
5194 /* In the name of slightly smaller debug output, and to cater to
5195 general assembler losage, recognize PIC+GOTOFF and turn it back
5196 into a direct symbol reference. */
5199 i386_simplify_dwarf_addr (orig_x
)
5206 if (GET_CODE (x
) != CONST
5207 || GET_CODE (XEXP (x
, 0)) != UNSPEC
5208 || XINT (XEXP (x
, 0), 1) != 15)
5210 return XVECEXP (XEXP (x
, 0), 0, 0);
5213 if (GET_CODE (x
) != PLUS
5214 || GET_CODE (XEXP (x
, 0)) != REG
5215 || GET_CODE (XEXP (x
, 1)) != CONST
)
5218 x
= XEXP (XEXP (x
, 1), 0);
5219 if (GET_CODE (x
) == UNSPEC
5220 && (XINT (x
, 1) == 6
5221 || XINT (x
, 1) == 7))
5222 return XVECEXP (x
, 0, 0);
5224 if (GET_CODE (x
) == PLUS
5225 && GET_CODE (XEXP (x
, 0)) == UNSPEC
5226 && GET_CODE (XEXP (x
, 1)) == CONST_INT
5227 && (XINT (XEXP (x
, 0), 1) == 6
5228 || XINT (XEXP (x
, 0), 1) == 7))
5229 return gen_rtx_PLUS (VOIDmode
, XVECEXP (XEXP (x
, 0), 0, 0), XEXP (x
, 1));
5235 put_condition_code (code
, mode
, reverse
, fp
, file
)
5237 enum machine_mode mode
;
5243 if (mode
== CCFPmode
|| mode
== CCFPUmode
)
5245 enum rtx_code second_code
, bypass_code
;
5246 ix86_fp_comparison_codes (code
, &bypass_code
, &code
, &second_code
);
5247 if (bypass_code
!= NIL
|| second_code
!= NIL
)
5249 code
= ix86_fp_compare_code_to_integer (code
);
5253 code
= reverse_condition (code
);
5264 if (mode
!= CCmode
&& mode
!= CCNOmode
&& mode
!= CCGCmode
)
5269 /* ??? Use "nbe" instead of "a" for fcmov losage on some assemblers.
5270 Those same assemblers have the same but opposite losage on cmov. */
5273 suffix
= fp
? "nbe" : "a";
5276 if (mode
== CCNOmode
|| mode
== CCGOCmode
)
5278 else if (mode
== CCmode
|| mode
== CCGCmode
)
5289 if (mode
== CCNOmode
|| mode
== CCGOCmode
)
5291 else if (mode
== CCmode
|| mode
== CCGCmode
)
5300 suffix
= fp
? "nb" : "ae";
5303 if (mode
!= CCmode
&& mode
!= CCGCmode
&& mode
!= CCNOmode
)
5313 suffix
= fp
? "u" : "p";
5316 suffix
= fp
? "nu" : "np";
5321 fputs (suffix
, file
);
5325 print_reg (x
, code
, file
)
5330 if (REGNO (x
) == ARG_POINTER_REGNUM
5331 || REGNO (x
) == FRAME_POINTER_REGNUM
5332 || REGNO (x
) == FLAGS_REG
5333 || REGNO (x
) == FPSR_REG
)
5336 if (ASSEMBLER_DIALECT
== 0 || USER_LABEL_PREFIX
[0] == 0)
5339 if (code
== 'w' || MMX_REG_P (x
))
5341 else if (code
== 'b')
5343 else if (code
== 'k')
5345 else if (code
== 'q')
5347 else if (code
== 'y')
5349 else if (code
== 'h')
5352 code
= GET_MODE_SIZE (GET_MODE (x
));
5354 /* Irritatingly, AMD extended registers use different naming convention
5355 from the normal registers. */
5356 if (REX_INT_REG_P (x
))
5363 error ("Extended registers have no high halves\n");
5366 fprintf (file
, "r%ib", REGNO (x
) - FIRST_REX_INT_REG
+ 8);
5369 fprintf (file
, "r%iw", REGNO (x
) - FIRST_REX_INT_REG
+ 8);
5372 fprintf (file
, "r%id", REGNO (x
) - FIRST_REX_INT_REG
+ 8);
5375 fprintf (file
, "r%i", REGNO (x
) - FIRST_REX_INT_REG
+ 8);
5378 error ("Unsupported operand size for extended register.\n");
5386 if (STACK_TOP_P (x
))
5388 fputs ("st(0)", file
);
5395 if (! ANY_FP_REG_P (x
))
5396 putc (code
== 8 && TARGET_64BIT
? 'r' : 'e', file
);
5400 fputs (hi_reg_name
[REGNO (x
)], file
);
5403 fputs (qi_reg_name
[REGNO (x
)], file
);
5406 fputs (qi_high_reg_name
[REGNO (x
)], file
);
5414 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
5415 C -- print opcode suffix for set/cmov insn.
5416 c -- like C, but print reversed condition
5417 F,f -- likewise, but for floating-point.
5418 R -- print the prefix for register names.
5419 z -- print the opcode suffix for the size of the current operand.
5420 * -- print a star (in certain assembler syntax)
5421 A -- print an absolute memory reference.
5422 w -- print the operand as if it's a "word" (HImode) even if it isn't.
5423 s -- print a shift double count, followed by the assemblers argument
5425 b -- print the QImode name of the register for the indicated operand.
5426 %b0 would print %al if operands[0] is reg 0.
5427 w -- likewise, print the HImode name of the register.
5428 k -- likewise, print the SImode name of the register.
5429 q -- likewise, print the DImode name of the register.
5430 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
5431 y -- print "st(0)" instead of "st" as a register.
5432 D -- print condition for SSE cmp instruction.
5433 P -- if PIC, print an @PLT suffix.
5434 X -- don't print any sort of PIC '@' suffix for a symbol.
5438 print_operand (file
, x
, code
)
5448 if (ASSEMBLER_DIALECT
== 0)
5453 if (ASSEMBLER_DIALECT
== 0)
5455 else if (ASSEMBLER_DIALECT
== 1)
5457 /* Intel syntax. For absolute addresses, registers should not
5458 be surrounded by braces. */
5459 if (GET_CODE (x
) != REG
)
5462 PRINT_OPERAND (file
, x
, 0);
5468 PRINT_OPERAND (file
, x
, 0);
5473 if (ASSEMBLER_DIALECT
== 0)
5478 if (ASSEMBLER_DIALECT
== 0)
5483 if (ASSEMBLER_DIALECT
== 0)
5488 if (ASSEMBLER_DIALECT
== 0)
5493 if (ASSEMBLER_DIALECT
== 0)
5498 if (ASSEMBLER_DIALECT
== 0)
5503 /* 387 opcodes don't get size suffixes if the operands are
5506 if (STACK_REG_P (x
))
5509 /* this is the size of op from size of operand */
5510 switch (GET_MODE_SIZE (GET_MODE (x
)))
5513 #ifdef HAVE_GAS_FILDS_FISTS
5519 if (GET_MODE (x
) == SFmode
)
5534 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_INT
)
5536 #ifdef GAS_MNEMONICS
5562 if (GET_CODE (x
) == CONST_INT
|| ! SHIFT_DOUBLE_OMITS_COUNT
)
5564 PRINT_OPERAND (file
, x
, 0);
5570 /* Little bit of braindamage here. The SSE compare instructions
5571 does use completely different names for the comparisons that the
5572 fp conditional moves. */
5573 switch (GET_CODE (x
))
5588 fputs ("unord", file
);
5592 fputs ("neq", file
);
5596 fputs ("nlt", file
);
5600 fputs ("nle", file
);
5603 fputs ("ord", file
);
5611 put_condition_code (GET_CODE (x
), GET_MODE (XEXP (x
, 0)), 0, 0, file
);
5614 put_condition_code (GET_CODE (x
), GET_MODE (XEXP (x
, 0)), 0, 1, file
);
5617 /* Like above, but reverse condition */
5619 put_condition_code (GET_CODE (x
), GET_MODE (XEXP (x
, 0)), 1, 0, file
);
5622 put_condition_code (GET_CODE (x
), GET_MODE (XEXP (x
, 0)), 1, 1, file
);
5628 if (!optimize
|| optimize_size
|| !TARGET_BRANCH_PREDICTION_HINTS
)
5631 x
= find_reg_note (current_output_insn
, REG_BR_PROB
, 0);
5634 int pred_val
= INTVAL (XEXP (x
, 0));
5636 if (pred_val
< REG_BR_PROB_BASE
* 45 / 100
5637 || pred_val
> REG_BR_PROB_BASE
* 55 / 100)
5639 int taken
= pred_val
> REG_BR_PROB_BASE
/ 2;
5640 int cputaken
= final_forward_branch_p (current_output_insn
) == 0;
5642 /* Emit hints only in the case default branch prediction
5643 heruistics would fail. */
5644 if (taken
!= cputaken
)
5646 /* We use 3e (DS) prefix for taken branches and
5647 2e (CS) prefix for not taken branches. */
5649 fputs ("ds ; ", file
);
5651 fputs ("cs ; ", file
);
5660 sprintf (str
, "invalid operand code `%c'", code
);
5661 output_operand_lossage (str
);
5666 if (GET_CODE (x
) == REG
)
5668 PRINT_REG (x
, code
, file
);
5671 else if (GET_CODE (x
) == MEM
)
5673 /* No `byte ptr' prefix for call instructions. */
5674 if (ASSEMBLER_DIALECT
!= 0 && code
!= 'X' && code
!= 'P')
5677 switch (GET_MODE_SIZE (GET_MODE (x
)))
5679 case 1: size
= "BYTE"; break;
5680 case 2: size
= "WORD"; break;
5681 case 4: size
= "DWORD"; break;
5682 case 8: size
= "QWORD"; break;
5683 case 12: size
= "XWORD"; break;
5684 case 16: size
= "XMMWORD"; break;
5689 /* Check for explicit size override (codes 'b', 'w' and 'k') */
5692 else if (code
== 'w')
5694 else if (code
== 'k')
5698 fputs (" PTR ", file
);
5702 if (flag_pic
&& CONSTANT_ADDRESS_P (x
))
5703 output_pic_addr_const (file
, x
, code
);
5704 /* Avoid (%rip) for call operands. */
5705 else if (CONSTANT_ADDRESS_P (x
) && code
=='P'
5706 && GET_CODE (x
) != CONST_INT
)
5707 output_addr_const (file
, x
);
5712 else if (GET_CODE (x
) == CONST_DOUBLE
&& GET_MODE (x
) == SFmode
)
5717 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
5718 REAL_VALUE_TO_TARGET_SINGLE (r
, l
);
5720 if (ASSEMBLER_DIALECT
== 0)
5722 fprintf (file
, "0x%lx", l
);
5725 /* These float cases don't actually occur as immediate operands. */
5726 else if (GET_CODE (x
) == CONST_DOUBLE
&& GET_MODE (x
) == DFmode
)
5731 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
5732 REAL_VALUE_TO_DECIMAL (r
, "%.22e", dstr
);
5733 fprintf (file
, "%s", dstr
);
5736 else if (GET_CODE (x
) == CONST_DOUBLE
5737 && (GET_MODE (x
) == XFmode
|| GET_MODE (x
) == TFmode
))
5742 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
5743 REAL_VALUE_TO_DECIMAL (r
, "%.22e", dstr
);
5744 fprintf (file
, "%s", dstr
);
5750 if (GET_CODE (x
) == CONST_INT
|| GET_CODE (x
) == CONST_DOUBLE
)
5752 if (ASSEMBLER_DIALECT
== 0)
5755 else if (GET_CODE (x
) == CONST
|| GET_CODE (x
) == SYMBOL_REF
5756 || GET_CODE (x
) == LABEL_REF
)
5758 if (ASSEMBLER_DIALECT
== 0)
5761 fputs ("OFFSET FLAT:", file
);
5764 if (GET_CODE (x
) == CONST_INT
)
5765 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
));
5767 output_pic_addr_const (file
, x
, code
);
5769 output_addr_const (file
, x
);
5773 /* Print a memory operand whose address is ADDR. */
5776 print_operand_address (file
, addr
)
5780 struct ix86_address parts
;
5781 rtx base
, index
, disp
;
5784 if (! ix86_decompose_address (addr
, &parts
))
5788 index
= parts
.index
;
5790 scale
= parts
.scale
;
5792 if (!base
&& !index
)
5794 /* Displacement only requires special attention. */
5796 if (GET_CODE (disp
) == CONST_INT
)
5798 if (ASSEMBLER_DIALECT
!= 0)
5800 if (USER_LABEL_PREFIX
[0] == 0)
5802 fputs ("ds:", file
);
5804 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (addr
));
5807 output_pic_addr_const (file
, addr
, 0);
5809 output_addr_const (file
, addr
);
5811 /* Use one byte shorter RIP relative addressing for 64bit mode. */
5812 if (GET_CODE (disp
) != CONST_INT
&& TARGET_64BIT
)
5813 fputs ("(%rip)", file
);
5817 if (ASSEMBLER_DIALECT
== 0)
5822 output_pic_addr_const (file
, disp
, 0);
5823 else if (GET_CODE (disp
) == LABEL_REF
)
5824 output_asm_label (disp
);
5826 output_addr_const (file
, disp
);
5831 PRINT_REG (base
, 0, file
);
5835 PRINT_REG (index
, 0, file
);
5837 fprintf (file
, ",%d", scale
);
5843 rtx offset
= NULL_RTX
;
5847 /* Pull out the offset of a symbol; print any symbol itself. */
5848 if (GET_CODE (disp
) == CONST
5849 && GET_CODE (XEXP (disp
, 0)) == PLUS
5850 && GET_CODE (XEXP (XEXP (disp
, 0), 1)) == CONST_INT
)
5852 offset
= XEXP (XEXP (disp
, 0), 1);
5853 disp
= gen_rtx_CONST (VOIDmode
,
5854 XEXP (XEXP (disp
, 0), 0));
5858 output_pic_addr_const (file
, disp
, 0);
5859 else if (GET_CODE (disp
) == LABEL_REF
)
5860 output_asm_label (disp
);
5861 else if (GET_CODE (disp
) == CONST_INT
)
5864 output_addr_const (file
, disp
);
5870 PRINT_REG (base
, 0, file
);
5873 if (INTVAL (offset
) >= 0)
5875 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (offset
));
5879 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (offset
));
5886 PRINT_REG (index
, 0, file
);
5888 fprintf (file
, "*%d", scale
);
5895 /* Split one or more DImode RTL references into pairs of SImode
5896 references. The RTL can be REG, offsettable MEM, integer constant, or
5897 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
5898 split and "num" is its length. lo_half and hi_half are output arrays
5899 that parallel "operands". */
5902 split_di (operands
, num
, lo_half
, hi_half
)
5905 rtx lo_half
[], hi_half
[];
5909 rtx op
= operands
[num
];
5910 if (CONSTANT_P (op
))
5911 split_double (op
, &lo_half
[num
], &hi_half
[num
]);
5912 else if (! reload_completed
)
5914 lo_half
[num
] = gen_lowpart (SImode
, op
);
5915 hi_half
[num
] = gen_highpart (SImode
, op
);
5917 else if (GET_CODE (op
) == REG
)
5921 lo_half
[num
] = gen_rtx_REG (SImode
, REGNO (op
));
5922 hi_half
[num
] = gen_rtx_REG (SImode
, REGNO (op
) + 1);
5924 else if (offsettable_memref_p (op
))
5926 lo_half
[num
] = adjust_address (op
, SImode
, 0);
5927 hi_half
[num
] = adjust_address (op
, SImode
, 4);
5934 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
5935 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
5936 is the expression of the binary operation. The output may either be
5937 emitted here, or returned to the caller, like all output_* functions.
5939 There is no guarantee that the operands are the same mode, as they
5940 might be within FLOAT or FLOAT_EXTEND expressions. */
5942 #ifndef SYSV386_COMPAT
5943 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
5944 wants to fix the assemblers because that causes incompatibility
5945 with gcc. No-one wants to fix gcc because that causes
5946 incompatibility with assemblers... You can use the option of
5947 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
5948 #define SYSV386_COMPAT 1
5952 output_387_binary_op (insn
, operands
)
5956 static char buf
[30];
5959 int is_sse
= SSE_REG_P (operands
[0]) | SSE_REG_P (operands
[1]) | SSE_REG_P (operands
[2]);
5961 #ifdef ENABLE_CHECKING
5962 /* Even if we do not want to check the inputs, this documents input
5963 constraints. Which helps in understanding the following code. */
5964 if (STACK_REG_P (operands
[0])
5965 && ((REG_P (operands
[1])
5966 && REGNO (operands
[0]) == REGNO (operands
[1])
5967 && (STACK_REG_P (operands
[2]) || GET_CODE (operands
[2]) == MEM
))
5968 || (REG_P (operands
[2])
5969 && REGNO (operands
[0]) == REGNO (operands
[2])
5970 && (STACK_REG_P (operands
[1]) || GET_CODE (operands
[1]) == MEM
)))
5971 && (STACK_TOP_P (operands
[1]) || STACK_TOP_P (operands
[2])))
5977 switch (GET_CODE (operands
[3]))
5980 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
5981 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
5989 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
5990 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
5998 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
5999 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
6007 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
6008 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
6022 if (GET_MODE (operands
[0]) == SFmode
)
6023 strcat (buf
, "ss\t{%2, %0|%0, %2}");
6025 strcat (buf
, "sd\t{%2, %0|%0, %2}");
6030 switch (GET_CODE (operands
[3]))
6034 if (REG_P (operands
[2]) && REGNO (operands
[0]) == REGNO (operands
[2]))
6036 rtx temp
= operands
[2];
6037 operands
[2] = operands
[1];
6041 /* know operands[0] == operands[1]. */
6043 if (GET_CODE (operands
[2]) == MEM
)
6049 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[2])))
6051 if (STACK_TOP_P (operands
[0]))
6052 /* How is it that we are storing to a dead operand[2]?
6053 Well, presumably operands[1] is dead too. We can't
6054 store the result to st(0) as st(0) gets popped on this
6055 instruction. Instead store to operands[2] (which I
6056 think has to be st(1)). st(1) will be popped later.
6057 gcc <= 2.8.1 didn't have this check and generated
6058 assembly code that the Unixware assembler rejected. */
6059 p
= "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
6061 p
= "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
6065 if (STACK_TOP_P (operands
[0]))
6066 p
= "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
6068 p
= "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
6073 if (GET_CODE (operands
[1]) == MEM
)
6079 if (GET_CODE (operands
[2]) == MEM
)
6085 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[2])))
6088 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
6089 derived assemblers, confusingly reverse the direction of
6090 the operation for fsub{r} and fdiv{r} when the
6091 destination register is not st(0). The Intel assembler
6092 doesn't have this brain damage. Read !SYSV386_COMPAT to
6093 figure out what the hardware really does. */
6094 if (STACK_TOP_P (operands
[0]))
6095 p
= "{p\t%0, %2|rp\t%2, %0}";
6097 p
= "{rp\t%2, %0|p\t%0, %2}";
6099 if (STACK_TOP_P (operands
[0]))
6100 /* As above for fmul/fadd, we can't store to st(0). */
6101 p
= "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
6103 p
= "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
6108 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[1])))
6111 if (STACK_TOP_P (operands
[0]))
6112 p
= "{rp\t%0, %1|p\t%1, %0}";
6114 p
= "{p\t%1, %0|rp\t%0, %1}";
6116 if (STACK_TOP_P (operands
[0]))
6117 p
= "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
6119 p
= "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
6124 if (STACK_TOP_P (operands
[0]))
6126 if (STACK_TOP_P (operands
[1]))
6127 p
= "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
6129 p
= "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
6132 else if (STACK_TOP_P (operands
[1]))
6135 p
= "{\t%1, %0|r\t%0, %1}";
6137 p
= "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
6143 p
= "{r\t%2, %0|\t%0, %2}";
6145 p
= "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
6158 /* Output code to initialize control word copies used by
6159 trunc?f?i patterns. NORMAL is set to current control word, while ROUND_DOWN
6160 is set to control word rounding downwards. */
6162 emit_i387_cw_initialization (normal
, round_down
)
6163 rtx normal
, round_down
;
6165 rtx reg
= gen_reg_rtx (HImode
);
6167 emit_insn (gen_x86_fnstcw_1 (normal
));
6168 emit_move_insn (reg
, normal
);
6169 if (!TARGET_PARTIAL_REG_STALL
&& !optimize_size
6171 emit_insn (gen_movsi_insv_1 (reg
, GEN_INT (0xc)));
6173 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0xc00)));
6174 emit_move_insn (round_down
, reg
);
6177 /* Output code for INSN to convert a float to a signed int. OPERANDS
6178 are the insn operands. The output may be [HSD]Imode and the input
6179 operand may be [SDX]Fmode. */
6182 output_fix_trunc (insn
, operands
)
6186 int stack_top_dies
= find_regno_note (insn
, REG_DEAD
, FIRST_STACK_REG
) != 0;
6187 int dimode_p
= GET_MODE (operands
[0]) == DImode
;
6189 /* Jump through a hoop or two for DImode, since the hardware has no
6190 non-popping instruction. We used to do this a different way, but
6191 that was somewhat fragile and broke with post-reload splitters. */
6192 if (dimode_p
&& !stack_top_dies
)
6193 output_asm_insn ("fld\t%y1", operands
);
6195 if (!STACK_TOP_P (operands
[1]))
6198 if (GET_CODE (operands
[0]) != MEM
)
6201 output_asm_insn ("fldcw\t%3", operands
);
6202 if (stack_top_dies
|| dimode_p
)
6203 output_asm_insn ("fistp%z0\t%0", operands
);
6205 output_asm_insn ("fist%z0\t%0", operands
);
6206 output_asm_insn ("fldcw\t%2", operands
);
6211 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
6212 should be used and 2 when fnstsw should be used. UNORDERED_P is true
6213 when fucom should be used. */
6216 output_fp_compare (insn
, operands
, eflags_p
, unordered_p
)
6219 int eflags_p
, unordered_p
;
6222 rtx cmp_op0
= operands
[0];
6223 rtx cmp_op1
= operands
[1];
6224 int is_sse
= SSE_REG_P (operands
[0]) | SSE_REG_P (operands
[1]);
6229 cmp_op1
= operands
[2];
6233 if (GET_MODE (operands
[0]) == SFmode
)
6235 return "ucomiss\t{%1, %0|%0, %1}";
6237 return "comiss\t{%1, %0|%0, %y}";
6240 return "ucomisd\t{%1, %0|%0, %1}";
6242 return "comisd\t{%1, %0|%0, %y}";
6245 if (! STACK_TOP_P (cmp_op0
))
6248 stack_top_dies
= find_regno_note (insn
, REG_DEAD
, FIRST_STACK_REG
) != 0;
6250 if (STACK_REG_P (cmp_op1
)
6252 && find_regno_note (insn
, REG_DEAD
, REGNO (cmp_op1
))
6253 && REGNO (cmp_op1
) != FIRST_STACK_REG
)
6255 /* If both the top of the 387 stack dies, and the other operand
6256 is also a stack register that dies, then this must be a
6257 `fcompp' float compare */
6261 /* There is no double popping fcomi variant. Fortunately,
6262 eflags is immune from the fstp's cc clobbering. */
6264 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands
);
6266 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands
);
6274 return "fucompp\n\tfnstsw\t%0";
6276 return "fcompp\n\tfnstsw\t%0";
6289 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
6291 static const char * const alt
[24] =
6303 "fcomi\t{%y1, %0|%0, %y1}",
6304 "fcomip\t{%y1, %0|%0, %y1}",
6305 "fucomi\t{%y1, %0|%0, %y1}",
6306 "fucomip\t{%y1, %0|%0, %y1}",
6313 "fcom%z2\t%y2\n\tfnstsw\t%0",
6314 "fcomp%z2\t%y2\n\tfnstsw\t%0",
6315 "fucom%z2\t%y2\n\tfnstsw\t%0",
6316 "fucomp%z2\t%y2\n\tfnstsw\t%0",
6318 "ficom%z2\t%y2\n\tfnstsw\t%0",
6319 "ficomp%z2\t%y2\n\tfnstsw\t%0",
6327 mask
= eflags_p
<< 3;
6328 mask
|= (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
) << 2;
6329 mask
|= unordered_p
<< 1;
6330 mask
|= stack_top_dies
;
6342 /* Output assembler code to FILE to initialize basic-block profiling.
6344 If profile_block_flag == 2
6346 Output code to call the subroutine `__bb_init_trace_func'
6347 and pass two parameters to it. The first parameter is
6348 the address of a block allocated in the object module.
6349 The second parameter is the number of the first basic block
6352 The name of the block is a local symbol made with this statement:
6354 ASM_GENERATE_INTERNAL_LABEL (BUFFER, "LPBX", 0);
6356 Of course, since you are writing the definition of
6357 `ASM_GENERATE_INTERNAL_LABEL' as well as that of this macro, you
6358 can take a short cut in the definition of this macro and use the
6359 name that you know will result.
6361 The number of the first basic block of the function is
6362 passed to the macro in BLOCK_OR_LABEL.
6364 If described in a virtual assembler language the code to be
6368 parameter2 <- BLOCK_OR_LABEL
6369 call __bb_init_trace_func
6371 else if profile_block_flag != 0
6373 Output code to call the subroutine `__bb_init_func'
6374 and pass one single parameter to it, which is the same
6375 as the first parameter to `__bb_init_trace_func'.
6377 The first word of this parameter is a flag which will be nonzero if
6378 the object module has already been initialized. So test this word
6379 first, and do not call `__bb_init_func' if the flag is nonzero.
6380 Note: When profile_block_flag == 2 the test need not be done
6381 but `__bb_init_trace_func' *must* be called.
6383 BLOCK_OR_LABEL may be used to generate a label number as a
6384 branch destination in case `__bb_init_func' will not be called.
6386 If described in a virtual assembler language the code to be
6397 ix86_output_function_block_profiler (file
, block_or_label
)
6401 static int num_func
= 0;
6403 char block_table
[80], false_label
[80];
6405 ASM_GENERATE_INTERNAL_LABEL (block_table
, "LPBX", 0);
6407 xops
[1] = gen_rtx_SYMBOL_REF (VOIDmode
, block_table
);
6408 xops
[5] = stack_pointer_rtx
;
6409 xops
[7] = gen_rtx_REG (Pmode
, 0); /* eax */
6411 CONSTANT_POOL_ADDRESS_P (xops
[1]) = TRUE
;
6413 switch (profile_block_flag
)
6416 xops
[2] = GEN_INT (block_or_label
);
6417 xops
[3] = gen_rtx_MEM (Pmode
,
6418 gen_rtx_SYMBOL_REF (VOIDmode
, "__bb_init_trace_func"));
6419 xops
[6] = GEN_INT (8);
6421 output_asm_insn ("push{l}\t%2", xops
);
6423 output_asm_insn ("push{l}\t%1", xops
);
6426 output_asm_insn ("lea{l}\t{%a1, %7|%7, %a1}", xops
);
6427 output_asm_insn ("push{l}\t%7", xops
);
6429 output_asm_insn ("call\t%P3", xops
);
6430 output_asm_insn ("add{l}\t{%6, %5|%5, %6}", xops
);
6434 ASM_GENERATE_INTERNAL_LABEL (false_label
, "LPBZ", num_func
);
6436 xops
[0] = const0_rtx
;
6437 xops
[2] = gen_rtx_MEM (Pmode
,
6438 gen_rtx_SYMBOL_REF (VOIDmode
, false_label
));
6439 xops
[3] = gen_rtx_MEM (Pmode
,
6440 gen_rtx_SYMBOL_REF (VOIDmode
, "__bb_init_func"));
6441 xops
[4] = gen_rtx_MEM (Pmode
, xops
[1]);
6442 xops
[6] = GEN_INT (4);
6444 CONSTANT_POOL_ADDRESS_P (xops
[2]) = TRUE
;
6446 output_asm_insn ("cmp{l}\t{%0, %4|%4, %0}", xops
);
6447 output_asm_insn ("jne\t%2", xops
);
6450 output_asm_insn ("push{l}\t%1", xops
);
6453 output_asm_insn ("lea{l}\t{%a1, %7|%7, %a2}", xops
);
6454 output_asm_insn ("push{l}\t%7", xops
);
6456 output_asm_insn ("call\t%P3", xops
);
6457 output_asm_insn ("add{l}\t{%6, %5|%5, %6}", xops
);
6458 ASM_OUTPUT_INTERNAL_LABEL (file
, "LPBZ", num_func
);
6464 /* Output assembler code to FILE to increment a counter associated
6465 with basic block number BLOCKNO.
6467 If profile_block_flag == 2
6469 Output code to initialize the global structure `__bb' and
6470 call the function `__bb_trace_func' which will increment the
6473 `__bb' consists of two words. In the first word the number
6474 of the basic block has to be stored. In the second word
6475 the address of a block allocated in the object module
6478 The basic block number is given by BLOCKNO.
6480 The address of the block is given by the label created with
6482 ASM_GENERATE_INTERNAL_LABEL (BUFFER, "LPBX", 0);
6484 by FUNCTION_BLOCK_PROFILER.
6486 Of course, since you are writing the definition of
6487 `ASM_GENERATE_INTERNAL_LABEL' as well as that of this macro, you
6488 can take a short cut in the definition of this macro and use the
6489 name that you know will result.
6491 If described in a virtual assembler language the code to be
6494 move BLOCKNO -> (__bb)
6495 move LPBX0 -> (__bb+4)
6496 call __bb_trace_func
6498 Note that function `__bb_trace_func' must not change the
6499 machine state, especially the flag register. To grant
6500 this, you must output code to save and restore registers
6501 either in this macro or in the macros MACHINE_STATE_SAVE
6502 and MACHINE_STATE_RESTORE. The last two macros will be
6503 used in the function `__bb_trace_func', so you must make
6504 sure that the function prologue does not change any
6505 register prior to saving it with MACHINE_STATE_SAVE.
6507 else if profile_block_flag != 0
6509 Output code to increment the counter directly.
6510 Basic blocks are numbered separately from zero within each
6511 compiled object module. The count associated with block number
6512 BLOCKNO is at index BLOCKNO in an array of words; the name of
6513 this array is a local symbol made with this statement:
6515 ASM_GENERATE_INTERNAL_LABEL (BUFFER, "LPBX", 2);
6517 Of course, since you are writing the definition of
6518 `ASM_GENERATE_INTERNAL_LABEL' as well as that of this macro, you
6519 can take a short cut in the definition of this macro and use the
6520 name that you know will result.
6522 If described in a virtual assembler language the code to be
6525 inc (LPBX2+4*BLOCKNO)
6529 ix86_output_block_profiler (file
, blockno
)
6530 FILE *file ATTRIBUTE_UNUSED
;
6533 rtx xops
[8], cnt_rtx
;
6535 char *block_table
= counts
;
6537 switch (profile_block_flag
)
6540 ASM_GENERATE_INTERNAL_LABEL (block_table
, "LPBX", 0);
6542 xops
[1] = gen_rtx_SYMBOL_REF (VOIDmode
, block_table
);
6543 xops
[2] = GEN_INT (blockno
);
6544 xops
[3] = gen_rtx_MEM (Pmode
,
6545 gen_rtx_SYMBOL_REF (VOIDmode
, "__bb_trace_func"));
6546 xops
[4] = gen_rtx_SYMBOL_REF (VOIDmode
, "__bb");
6547 xops
[5] = plus_constant (xops
[4], 4);
6548 xops
[0] = gen_rtx_MEM (SImode
, xops
[4]);
6549 xops
[6] = gen_rtx_MEM (SImode
, xops
[5]);
6551 CONSTANT_POOL_ADDRESS_P (xops
[1]) = TRUE
;
6553 output_asm_insn ("pushf", xops
);
6554 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops
);
6557 xops
[7] = gen_rtx_REG (Pmode
, 0); /* eax */
6558 output_asm_insn ("push{l}\t%7", xops
);
6559 output_asm_insn ("lea{l}\t{%a1, %7|%7, %a1}", xops
);
6560 output_asm_insn ("mov{l}\t{%7, %6|%6, %7}", xops
);
6561 output_asm_insn ("pop{l}\t%7", xops
);
6564 output_asm_insn ("mov{l}\t{%1, %6|%6, %1}", xops
);
6565 output_asm_insn ("call\t%P3", xops
);
6566 output_asm_insn ("popf", xops
);
6571 ASM_GENERATE_INTERNAL_LABEL (counts
, "LPBX", 2);
6572 cnt_rtx
= gen_rtx_SYMBOL_REF (VOIDmode
, counts
);
6573 SYMBOL_REF_FLAG (cnt_rtx
) = TRUE
;
6576 cnt_rtx
= plus_constant (cnt_rtx
, blockno
*4);
6579 cnt_rtx
= gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, cnt_rtx
);
6581 xops
[0] = gen_rtx_MEM (SImode
, cnt_rtx
);
6582 output_asm_insn ("inc{l}\t%0", xops
);
6589 ix86_expand_move (mode
, operands
)
6590 enum machine_mode mode
;
6593 int strict
= (reload_in_progress
|| reload_completed
);
6596 if (flag_pic
&& mode
== Pmode
&& symbolic_operand (operands
[1], Pmode
))
6598 /* Emit insns to move operands[1] into operands[0]. */
6600 if (GET_CODE (operands
[0]) == MEM
)
6601 operands
[1] = force_reg (Pmode
, operands
[1]);
6604 rtx temp
= operands
[0];
6605 if (GET_CODE (temp
) != REG
)
6606 temp
= gen_reg_rtx (Pmode
);
6607 temp
= legitimize_pic_address (operands
[1], temp
);
6608 if (temp
== operands
[0])
6615 if (GET_CODE (operands
[0]) == MEM
6616 && (GET_MODE (operands
[0]) == QImode
6617 || !push_operand (operands
[0], mode
))
6618 && GET_CODE (operands
[1]) == MEM
)
6619 operands
[1] = force_reg (mode
, operands
[1]);
6621 if (push_operand (operands
[0], mode
)
6622 && ! general_no_elim_operand (operands
[1], mode
))
6623 operands
[1] = copy_to_mode_reg (mode
, operands
[1]);
6625 if (FLOAT_MODE_P (mode
))
6627 /* If we are loading a floating point constant to a register,
6628 force the value to memory now, since we'll get better code
6629 out the back end. */
6633 else if (GET_CODE (operands
[1]) == CONST_DOUBLE
6634 && register_operand (operands
[0], mode
))
6635 operands
[1] = validize_mem (force_const_mem (mode
, operands
[1]));
6639 insn
= gen_rtx_SET (VOIDmode
, operands
[0], operands
[1]);
6644 /* Attempt to expand a binary operator. Make the expansion closer to the
6645 actual machine, then just general_operand, which will allow 3 separate
6646 memory references (one output, two input) in a single insn. */
6649 ix86_expand_binary_operator (code
, mode
, operands
)
6651 enum machine_mode mode
;
6654 int matching_memory
;
6655 rtx src1
, src2
, dst
, op
, clob
;
6661 /* Recognize <var1> = <value> <op> <var1> for commutative operators */
6662 if (GET_RTX_CLASS (code
) == 'c'
6663 && (rtx_equal_p (dst
, src2
)
6664 || immediate_operand (src1
, mode
)))
6671 /* If the destination is memory, and we do not have matching source
6672 operands, do things in registers. */
6673 matching_memory
= 0;
6674 if (GET_CODE (dst
) == MEM
)
6676 if (rtx_equal_p (dst
, src1
))
6677 matching_memory
= 1;
6678 else if (GET_RTX_CLASS (code
) == 'c'
6679 && rtx_equal_p (dst
, src2
))
6680 matching_memory
= 2;
6682 dst
= gen_reg_rtx (mode
);
6685 /* Both source operands cannot be in memory. */
6686 if (GET_CODE (src1
) == MEM
&& GET_CODE (src2
) == MEM
)
6688 if (matching_memory
!= 2)
6689 src2
= force_reg (mode
, src2
);
6691 src1
= force_reg (mode
, src1
);
6694 /* If the operation is not commutable, source 1 cannot be a constant
6695 or non-matching memory. */
6696 if ((CONSTANT_P (src1
)
6697 || (!matching_memory
&& GET_CODE (src1
) == MEM
))
6698 && GET_RTX_CLASS (code
) != 'c')
6699 src1
= force_reg (mode
, src1
);
6701 /* If optimizing, copy to regs to improve CSE */
6702 if (optimize
&& ! no_new_pseudos
)
6704 if (GET_CODE (dst
) == MEM
)
6705 dst
= gen_reg_rtx (mode
);
6706 if (GET_CODE (src1
) == MEM
)
6707 src1
= force_reg (mode
, src1
);
6708 if (GET_CODE (src2
) == MEM
)
6709 src2
= force_reg (mode
, src2
);
6712 /* Emit the instruction. */
6714 op
= gen_rtx_SET (VOIDmode
, dst
, gen_rtx_fmt_ee (code
, mode
, src1
, src2
));
6715 if (reload_in_progress
)
6717 /* Reload doesn't know about the flags register, and doesn't know that
6718 it doesn't want to clobber it. We can only do this with PLUS. */
6725 clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
6726 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, op
, clob
)));
6729 /* Fix up the destination if needed. */
6730 if (dst
!= operands
[0])
6731 emit_move_insn (operands
[0], dst
);
6734 /* Return TRUE or FALSE depending on whether the binary operator meets the
6735 appropriate constraints. */
6738 ix86_binary_operator_ok (code
, mode
, operands
)
6740 enum machine_mode mode ATTRIBUTE_UNUSED
;
6743 /* Both source operands cannot be in memory. */
6744 if (GET_CODE (operands
[1]) == MEM
&& GET_CODE (operands
[2]) == MEM
)
6746 /* If the operation is not commutable, source 1 cannot be a constant. */
6747 if (CONSTANT_P (operands
[1]) && GET_RTX_CLASS (code
) != 'c')
6749 /* If the destination is memory, we must have a matching source operand. */
6750 if (GET_CODE (operands
[0]) == MEM
6751 && ! (rtx_equal_p (operands
[0], operands
[1])
6752 || (GET_RTX_CLASS (code
) == 'c'
6753 && rtx_equal_p (operands
[0], operands
[2]))))
6755 /* If the operation is not commutable and the source 1 is memory, we must
6756 have a matching destionation. */
6757 if (GET_CODE (operands
[1]) == MEM
6758 && GET_RTX_CLASS (code
) != 'c'
6759 && ! rtx_equal_p (operands
[0], operands
[1]))
6764 /* Attempt to expand a unary operator. Make the expansion closer to the
6765 actual machine, then just general_operand, which will allow 2 separate
6766 memory references (one output, one input) in a single insn. */
6769 ix86_expand_unary_operator (code
, mode
, operands
)
6771 enum machine_mode mode
;
6774 int matching_memory
;
6775 rtx src
, dst
, op
, clob
;
6780 /* If the destination is memory, and we do not have matching source
6781 operands, do things in registers. */
6782 matching_memory
= 0;
6783 if (GET_CODE (dst
) == MEM
)
6785 if (rtx_equal_p (dst
, src
))
6786 matching_memory
= 1;
6788 dst
= gen_reg_rtx (mode
);
6791 /* When source operand is memory, destination must match. */
6792 if (!matching_memory
&& GET_CODE (src
) == MEM
)
6793 src
= force_reg (mode
, src
);
6795 /* If optimizing, copy to regs to improve CSE */
6796 if (optimize
&& ! no_new_pseudos
)
6798 if (GET_CODE (dst
) == MEM
)
6799 dst
= gen_reg_rtx (mode
);
6800 if (GET_CODE (src
) == MEM
)
6801 src
= force_reg (mode
, src
);
6804 /* Emit the instruction. */
6806 op
= gen_rtx_SET (VOIDmode
, dst
, gen_rtx_fmt_e (code
, mode
, src
));
6807 if (reload_in_progress
|| code
== NOT
)
6809 /* Reload doesn't know about the flags register, and doesn't know that
6810 it doesn't want to clobber it. */
6817 clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
6818 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, op
, clob
)));
6821 /* Fix up the destination if needed. */
6822 if (dst
!= operands
[0])
6823 emit_move_insn (operands
[0], dst
);
6826 /* Return TRUE or FALSE depending on whether the unary operator meets the
6827 appropriate constraints. */
6830 ix86_unary_operator_ok (code
, mode
, operands
)
6831 enum rtx_code code ATTRIBUTE_UNUSED
;
6832 enum machine_mode mode ATTRIBUTE_UNUSED
;
6833 rtx operands
[2] ATTRIBUTE_UNUSED
;
6835 /* If one of operands is memory, source and destination must match. */
6836 if ((GET_CODE (operands
[0]) == MEM
6837 || GET_CODE (operands
[1]) == MEM
)
6838 && ! rtx_equal_p (operands
[0], operands
[1]))
6843 /* Return TRUE or FALSE depending on whether the first SET in INSN
6844 has source and destination with matching CC modes, and that the
6845 CC mode is at least as constrained as REQ_MODE. */
6848 ix86_match_ccmode (insn
, req_mode
)
6850 enum machine_mode req_mode
;
6853 enum machine_mode set_mode
;
6855 set
= PATTERN (insn
);
6856 if (GET_CODE (set
) == PARALLEL
)
6857 set
= XVECEXP (set
, 0, 0);
6858 if (GET_CODE (set
) != SET
)
6860 if (GET_CODE (SET_SRC (set
)) != COMPARE
)
6863 set_mode
= GET_MODE (SET_DEST (set
));
6867 if (req_mode
!= CCNOmode
6868 && (req_mode
!= CCmode
6869 || XEXP (SET_SRC (set
), 1) != const0_rtx
))
6873 if (req_mode
== CCGCmode
)
6877 if (req_mode
== CCGOCmode
|| req_mode
== CCNOmode
)
6881 if (req_mode
== CCZmode
)
6891 return (GET_MODE (SET_SRC (set
)) == set_mode
);
6894 /* Generate insn patterns to do an integer compare of OPERANDS. */
6897 ix86_expand_int_compare (code
, op0
, op1
)
6901 enum machine_mode cmpmode
;
6904 cmpmode
= SELECT_CC_MODE (code
, op0
, op1
);
6905 flags
= gen_rtx_REG (cmpmode
, FLAGS_REG
);
6907 /* This is very simple, but making the interface the same as in the
6908 FP case makes the rest of the code easier. */
6909 tmp
= gen_rtx_COMPARE (cmpmode
, op0
, op1
);
6910 emit_insn (gen_rtx_SET (VOIDmode
, flags
, tmp
));
6912 /* Return the test that should be put into the flags user, i.e.
6913 the bcc, scc, or cmov instruction. */
6914 return gen_rtx_fmt_ee (code
, VOIDmode
, flags
, const0_rtx
);
6917 /* Figure out whether to use ordered or unordered fp comparisons.
6918 Return the appropriate mode to use. */
6921 ix86_fp_compare_mode (code
)
6922 enum rtx_code code ATTRIBUTE_UNUSED
;
6924 /* ??? In order to make all comparisons reversible, we do all comparisons
6925 non-trapping when compiling for IEEE. Once gcc is able to distinguish
6926 all forms trapping and nontrapping comparisons, we can make inequality
6927 comparisons trapping again, since it results in better code when using
6928 FCOM based compares. */
6929 return TARGET_IEEE_FP
? CCFPUmode
: CCFPmode
;
6933 ix86_cc_mode (code
, op0
, op1
)
6937 if (GET_MODE_CLASS (GET_MODE (op0
)) == MODE_FLOAT
)
6938 return ix86_fp_compare_mode (code
);
6941 /* Only zero flag is needed. */
6943 case NE
: /* ZF!=0 */
6945 /* Codes needing carry flag. */
6946 case GEU
: /* CF=0 */
6947 case GTU
: /* CF=0 & ZF=0 */
6948 case LTU
: /* CF=1 */
6949 case LEU
: /* CF=1 | ZF=1 */
6951 /* Codes possibly doable only with sign flag when
6952 comparing against zero. */
6953 case GE
: /* SF=OF or SF=0 */
6954 case LT
: /* SF<>OF or SF=1 */
6955 if (op1
== const0_rtx
)
6958 /* For other cases Carry flag is not required. */
6960 /* Codes doable only with sign flag when comparing
6961 against zero, but we miss jump instruction for it
6962 so we need to use relational tests agains overflow
6963 that thus needs to be zero. */
6964 case GT
: /* ZF=0 & SF=OF */
6965 case LE
: /* ZF=1 | SF<>OF */
6966 if (op1
== const0_rtx
)
6975 /* Return true if we should use an FCOMI instruction for this fp comparison. */
6978 ix86_use_fcomi_compare (code
)
6979 enum rtx_code code ATTRIBUTE_UNUSED
;
6981 enum rtx_code swapped_code
= swap_condition (code
);
6982 return ((ix86_fp_comparison_cost (code
) == ix86_fp_comparison_fcomi_cost (code
))
6983 || (ix86_fp_comparison_cost (swapped_code
)
6984 == ix86_fp_comparison_fcomi_cost (swapped_code
)));
6987 /* Swap, force into registers, or otherwise massage the two operands
6988 to a fp comparison. The operands are updated in place; the new
6989 comparsion code is returned. */
6991 static enum rtx_code
6992 ix86_prepare_fp_compare_args (code
, pop0
, pop1
)
6996 enum machine_mode fpcmp_mode
= ix86_fp_compare_mode (code
);
6997 rtx op0
= *pop0
, op1
= *pop1
;
6998 enum machine_mode op_mode
= GET_MODE (op0
);
6999 int is_sse
= SSE_REG_P (op0
) | SSE_REG_P (op1
);
7001 /* All of the unordered compare instructions only work on registers.
7002 The same is true of the XFmode compare instructions. The same is
7003 true of the fcomi compare instructions. */
7006 && (fpcmp_mode
== CCFPUmode
7007 || op_mode
== XFmode
7008 || op_mode
== TFmode
7009 || ix86_use_fcomi_compare (code
)))
7011 op0
= force_reg (op_mode
, op0
);
7012 op1
= force_reg (op_mode
, op1
);
7016 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
7017 things around if they appear profitable, otherwise force op0
7020 if (standard_80387_constant_p (op0
) == 0
7021 || (GET_CODE (op0
) == MEM
7022 && ! (standard_80387_constant_p (op1
) == 0
7023 || GET_CODE (op1
) == MEM
)))
7026 tmp
= op0
, op0
= op1
, op1
= tmp
;
7027 code
= swap_condition (code
);
7030 if (GET_CODE (op0
) != REG
)
7031 op0
= force_reg (op_mode
, op0
);
7033 if (CONSTANT_P (op1
))
7035 if (standard_80387_constant_p (op1
))
7036 op1
= force_reg (op_mode
, op1
);
7038 op1
= validize_mem (force_const_mem (op_mode
, op1
));
7042 /* Try to rearrange the comparison to make it cheaper. */
7043 if (ix86_fp_comparison_cost (code
)
7044 > ix86_fp_comparison_cost (swap_condition (code
))
7045 && (GET_CODE (op0
) == REG
|| !reload_completed
))
7048 tmp
= op0
, op0
= op1
, op1
= tmp
;
7049 code
= swap_condition (code
);
7050 if (GET_CODE (op0
) != REG
)
7051 op0
= force_reg (op_mode
, op0
);
7059 /* Convert comparison codes we use to represent FP comparison to integer
7060 code that will result in proper branch. Return UNKNOWN if no such code
7062 static enum rtx_code
7063 ix86_fp_compare_code_to_integer (code
)
7093 /* Split comparison code CODE into comparisons we can do using branch
7094 instructions. BYPASS_CODE is comparison code for branch that will
7095 branch around FIRST_CODE and SECOND_CODE. If some of branches
7096 is not required, set value to NIL.
7097 We never require more than two branches. */
7099 ix86_fp_comparison_codes (code
, bypass_code
, first_code
, second_code
)
7100 enum rtx_code code
, *bypass_code
, *first_code
, *second_code
;
7106 /* The fcomi comparison sets flags as follows:
7116 case GT
: /* GTU - CF=0 & ZF=0 */
7117 case GE
: /* GEU - CF=0 */
7118 case ORDERED
: /* PF=0 */
7119 case UNORDERED
: /* PF=1 */
7120 case UNEQ
: /* EQ - ZF=1 */
7121 case UNLT
: /* LTU - CF=1 */
7122 case UNLE
: /* LEU - CF=1 | ZF=1 */
7123 case LTGT
: /* EQ - ZF=0 */
7125 case LT
: /* LTU - CF=1 - fails on unordered */
7127 *bypass_code
= UNORDERED
;
7129 case LE
: /* LEU - CF=1 | ZF=1 - fails on unordered */
7131 *bypass_code
= UNORDERED
;
7133 case EQ
: /* EQ - ZF=1 - fails on unordered */
7135 *bypass_code
= UNORDERED
;
7137 case NE
: /* NE - ZF=0 - fails on unordered */
7139 *second_code
= UNORDERED
;
7141 case UNGE
: /* GEU - CF=0 - fails on unordered */
7143 *second_code
= UNORDERED
;
7145 case UNGT
: /* GTU - CF=0 & ZF=0 - fails on unordered */
7147 *second_code
= UNORDERED
;
7152 if (!TARGET_IEEE_FP
)
7159 /* Return cost of comparison done fcom + arithmetics operations on AX.
7160 All following functions do use number of instructions as an cost metrics.
7161 In future this should be tweaked to compute bytes for optimize_size and
7162 take into account performance of various instructions on various CPUs. */
7164 ix86_fp_comparison_arithmetics_cost (code
)
7167 if (!TARGET_IEEE_FP
)
7169 /* The cost of code output by ix86_expand_fp_compare. */
7197 /* Return cost of comparison done using fcomi operation.
7198 See ix86_fp_comparison_arithmetics_cost for the metrics. */
7200 ix86_fp_comparison_fcomi_cost (code
)
7203 enum rtx_code bypass_code
, first_code
, second_code
;
7204 /* Return arbitarily high cost when instruction is not supported - this
7205 prevents gcc from using it. */
7208 ix86_fp_comparison_codes (code
, &bypass_code
, &first_code
, &second_code
);
7209 return (bypass_code
!= NIL
|| second_code
!= NIL
) + 2;
7212 /* Return cost of comparison done using sahf operation.
7213 See ix86_fp_comparison_arithmetics_cost for the metrics. */
7215 ix86_fp_comparison_sahf_cost (code
)
7218 enum rtx_code bypass_code
, first_code
, second_code
;
7219 /* Return arbitarily high cost when instruction is not preferred - this
7220 avoids gcc from using it. */
7221 if (!TARGET_USE_SAHF
&& !optimize_size
)
7223 ix86_fp_comparison_codes (code
, &bypass_code
, &first_code
, &second_code
);
7224 return (bypass_code
!= NIL
|| second_code
!= NIL
) + 3;
7227 /* Compute cost of the comparison done using any method.
7228 See ix86_fp_comparison_arithmetics_cost for the metrics. */
7230 ix86_fp_comparison_cost (code
)
7233 int fcomi_cost
, sahf_cost
, arithmetics_cost
= 1024;
7236 fcomi_cost
= ix86_fp_comparison_fcomi_cost (code
);
7237 sahf_cost
= ix86_fp_comparison_sahf_cost (code
);
7239 min
= arithmetics_cost
= ix86_fp_comparison_arithmetics_cost (code
);
7240 if (min
> sahf_cost
)
7242 if (min
> fcomi_cost
)
7247 /* Generate insn patterns to do a floating point compare of OPERANDS. */
7250 ix86_expand_fp_compare (code
, op0
, op1
, scratch
, second_test
, bypass_test
)
7252 rtx op0
, op1
, scratch
;
7256 enum machine_mode fpcmp_mode
, intcmp_mode
;
7258 int cost
= ix86_fp_comparison_cost (code
);
7259 enum rtx_code bypass_code
, first_code
, second_code
;
7261 fpcmp_mode
= ix86_fp_compare_mode (code
);
7262 code
= ix86_prepare_fp_compare_args (code
, &op0
, &op1
);
7265 *second_test
= NULL_RTX
;
7267 *bypass_test
= NULL_RTX
;
7269 ix86_fp_comparison_codes (code
, &bypass_code
, &first_code
, &second_code
);
7271 /* Do fcomi/sahf based test when profitable. */
7272 if ((bypass_code
== NIL
|| bypass_test
)
7273 && (second_code
== NIL
|| second_test
)
7274 && ix86_fp_comparison_arithmetics_cost (code
) > cost
)
7278 tmp
= gen_rtx_COMPARE (fpcmp_mode
, op0
, op1
);
7279 tmp
= gen_rtx_SET (VOIDmode
, gen_rtx_REG (fpcmp_mode
, FLAGS_REG
),
7285 tmp
= gen_rtx_COMPARE (fpcmp_mode
, op0
, op1
);
7286 tmp2
= gen_rtx_UNSPEC (HImode
, gen_rtvec (1, tmp
), 9);
7288 scratch
= gen_reg_rtx (HImode
);
7289 emit_insn (gen_rtx_SET (VOIDmode
, scratch
, tmp2
));
7290 emit_insn (gen_x86_sahf_1 (scratch
));
7293 /* The FP codes work out to act like unsigned. */
7294 intcmp_mode
= fpcmp_mode
;
7296 if (bypass_code
!= NIL
)
7297 *bypass_test
= gen_rtx_fmt_ee (bypass_code
, VOIDmode
,
7298 gen_rtx_REG (intcmp_mode
, FLAGS_REG
),
7300 if (second_code
!= NIL
)
7301 *second_test
= gen_rtx_fmt_ee (second_code
, VOIDmode
,
7302 gen_rtx_REG (intcmp_mode
, FLAGS_REG
),
7307 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
7308 tmp
= gen_rtx_COMPARE (fpcmp_mode
, op0
, op1
);
7309 tmp2
= gen_rtx_UNSPEC (HImode
, gen_rtvec (1, tmp
), 9);
7311 scratch
= gen_reg_rtx (HImode
);
7312 emit_insn (gen_rtx_SET (VOIDmode
, scratch
, tmp2
));
7314 /* In the unordered case, we have to check C2 for NaN's, which
7315 doesn't happen to work out to anything nice combination-wise.
7316 So do some bit twiddling on the value we've got in AH to come
7317 up with an appropriate set of condition codes. */
7319 intcmp_mode
= CCNOmode
;
7324 if (code
== GT
|| !TARGET_IEEE_FP
)
7326 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x45)));
7331 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
7332 emit_insn (gen_addqi_ext_1 (scratch
, scratch
, constm1_rtx
));
7333 emit_insn (gen_cmpqi_ext_3 (scratch
, GEN_INT (0x44)));
7334 intcmp_mode
= CCmode
;
7340 if (code
== LT
&& TARGET_IEEE_FP
)
7342 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
7343 emit_insn (gen_cmpqi_ext_3 (scratch
, GEN_INT (0x01)));
7344 intcmp_mode
= CCmode
;
7349 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x01)));
7355 if (code
== GE
|| !TARGET_IEEE_FP
)
7357 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x05)));
7362 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
7363 emit_insn (gen_xorqi_cc_ext_1 (scratch
, scratch
,
7370 if (code
== LE
&& TARGET_IEEE_FP
)
7372 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
7373 emit_insn (gen_addqi_ext_1 (scratch
, scratch
, constm1_rtx
));
7374 emit_insn (gen_cmpqi_ext_3 (scratch
, GEN_INT (0x40)));
7375 intcmp_mode
= CCmode
;
7380 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x45)));
7386 if (code
== EQ
&& TARGET_IEEE_FP
)
7388 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
7389 emit_insn (gen_cmpqi_ext_3 (scratch
, GEN_INT (0x40)));
7390 intcmp_mode
= CCmode
;
7395 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x40)));
7402 if (code
== NE
&& TARGET_IEEE_FP
)
7404 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
7405 emit_insn (gen_xorqi_cc_ext_1 (scratch
, scratch
,
7411 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x40)));
7417 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x04)));
7421 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x04)));
7430 /* Return the test that should be put into the flags user, i.e.
7431 the bcc, scc, or cmov instruction. */
7432 return gen_rtx_fmt_ee (code
, VOIDmode
,
7433 gen_rtx_REG (intcmp_mode
, FLAGS_REG
),
7438 ix86_expand_compare (code
, second_test
, bypass_test
)
7440 rtx
*second_test
, *bypass_test
;
7443 op0
= ix86_compare_op0
;
7444 op1
= ix86_compare_op1
;
7447 *second_test
= NULL_RTX
;
7449 *bypass_test
= NULL_RTX
;
7451 if (GET_MODE_CLASS (GET_MODE (op0
)) == MODE_FLOAT
)
7452 ret
= ix86_expand_fp_compare (code
, op0
, op1
, NULL_RTX
,
7453 second_test
, bypass_test
);
7455 ret
= ix86_expand_int_compare (code
, op0
, op1
);
7460 /* Return true if the CODE will result in nontrivial jump sequence. */
7462 ix86_fp_jump_nontrivial_p (code
)
7465 enum rtx_code bypass_code
, first_code
, second_code
;
7468 ix86_fp_comparison_codes (code
, &bypass_code
, &first_code
, &second_code
);
7469 return bypass_code
!= NIL
|| second_code
!= NIL
;
7473 ix86_expand_branch (code
, label
)
7479 switch (GET_MODE (ix86_compare_op0
))
7485 tmp
= ix86_expand_compare (code
, NULL
, NULL
);
7486 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
7487 gen_rtx_LABEL_REF (VOIDmode
, label
),
7489 emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
));
7499 enum rtx_code bypass_code
, first_code
, second_code
;
7501 code
= ix86_prepare_fp_compare_args (code
, &ix86_compare_op0
,
7504 ix86_fp_comparison_codes (code
, &bypass_code
, &first_code
, &second_code
);
7506 /* Check whether we will use the natural sequence with one jump. If
7507 so, we can expand jump early. Otherwise delay expansion by
7508 creating compound insn to not confuse optimizers. */
7509 if (bypass_code
== NIL
&& second_code
== NIL
7512 ix86_split_fp_branch (code
, ix86_compare_op0
, ix86_compare_op1
,
7513 gen_rtx_LABEL_REF (VOIDmode
, label
),
7518 tmp
= gen_rtx_fmt_ee (code
, VOIDmode
,
7519 ix86_compare_op0
, ix86_compare_op1
);
7520 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
7521 gen_rtx_LABEL_REF (VOIDmode
, label
),
7523 tmp
= gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
);
7525 use_fcomi
= ix86_use_fcomi_compare (code
);
7526 vec
= rtvec_alloc (3 + !use_fcomi
);
7527 RTVEC_ELT (vec
, 0) = tmp
;
7529 = gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCFPmode
, 18));
7531 = gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCFPmode
, 17));
7534 = gen_rtx_CLOBBER (VOIDmode
, gen_rtx_SCRATCH (HImode
));
7536 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode
, vec
));
7544 /* Expand DImode branch into multiple compare+branch. */
7546 rtx lo
[2], hi
[2], label2
;
7547 enum rtx_code code1
, code2
, code3
;
7549 if (CONSTANT_P (ix86_compare_op0
) && ! CONSTANT_P (ix86_compare_op1
))
7551 tmp
= ix86_compare_op0
;
7552 ix86_compare_op0
= ix86_compare_op1
;
7553 ix86_compare_op1
= tmp
;
7554 code
= swap_condition (code
);
7556 split_di (&ix86_compare_op0
, 1, lo
+0, hi
+0);
7557 split_di (&ix86_compare_op1
, 1, lo
+1, hi
+1);
7559 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
7560 avoid two branches. This costs one extra insn, so disable when
7561 optimizing for size. */
7563 if ((code
== EQ
|| code
== NE
)
7565 || hi
[1] == const0_rtx
|| lo
[1] == const0_rtx
))
7570 if (hi
[1] != const0_rtx
)
7571 xor1
= expand_binop (SImode
, xor_optab
, xor1
, hi
[1],
7572 NULL_RTX
, 0, OPTAB_WIDEN
);
7575 if (lo
[1] != const0_rtx
)
7576 xor0
= expand_binop (SImode
, xor_optab
, xor0
, lo
[1],
7577 NULL_RTX
, 0, OPTAB_WIDEN
);
7579 tmp
= expand_binop (SImode
, ior_optab
, xor1
, xor0
,
7580 NULL_RTX
, 0, OPTAB_WIDEN
);
7582 ix86_compare_op0
= tmp
;
7583 ix86_compare_op1
= const0_rtx
;
7584 ix86_expand_branch (code
, label
);
7588 /* Otherwise, if we are doing less-than or greater-or-equal-than,
7589 op1 is a constant and the low word is zero, then we can just
7590 examine the high word. */
7592 if (GET_CODE (hi
[1]) == CONST_INT
&& lo
[1] == const0_rtx
)
7595 case LT
: case LTU
: case GE
: case GEU
:
7596 ix86_compare_op0
= hi
[0];
7597 ix86_compare_op1
= hi
[1];
7598 ix86_expand_branch (code
, label
);
7604 /* Otherwise, we need two or three jumps. */
7606 label2
= gen_label_rtx ();
7609 code2
= swap_condition (code
);
7610 code3
= unsigned_condition (code
);
7614 case LT
: case GT
: case LTU
: case GTU
:
7617 case LE
: code1
= LT
; code2
= GT
; break;
7618 case GE
: code1
= GT
; code2
= LT
; break;
7619 case LEU
: code1
= LTU
; code2
= GTU
; break;
7620 case GEU
: code1
= GTU
; code2
= LTU
; break;
7622 case EQ
: code1
= NIL
; code2
= NE
; break;
7623 case NE
: code2
= NIL
; break;
7631 * if (hi(a) < hi(b)) goto true;
7632 * if (hi(a) > hi(b)) goto false;
7633 * if (lo(a) < lo(b)) goto true;
7637 ix86_compare_op0
= hi
[0];
7638 ix86_compare_op1
= hi
[1];
7641 ix86_expand_branch (code1
, label
);
7643 ix86_expand_branch (code2
, label2
);
7645 ix86_compare_op0
= lo
[0];
7646 ix86_compare_op1
= lo
[1];
7647 ix86_expand_branch (code3
, label
);
7650 emit_label (label2
);
7659 /* Split branch based on floating point condition. */
7661 ix86_split_fp_branch (code
, op1
, op2
, target1
, target2
, tmp
)
7663 rtx op1
, op2
, target1
, target2
, tmp
;
7666 rtx label
= NULL_RTX
;
7668 int bypass_probability
= -1, second_probability
= -1, probability
= -1;
7671 if (target2
!= pc_rtx
)
7674 code
= reverse_condition_maybe_unordered (code
);
7679 condition
= ix86_expand_fp_compare (code
, op1
, op2
,
7680 tmp
, &second
, &bypass
);
7682 if (split_branch_probability
>= 0)
7684 /* Distribute the probabilities across the jumps.
7685 Assume the BYPASS and SECOND to be always test
7687 probability
= split_branch_probability
;
7689 /* Value of 1 is low enought to make no need for probability
7690 to be updated. Later we may run some experiments and see
7691 if unordered values are more frequent in practice. */
7693 bypass_probability
= 1;
7695 second_probability
= 1;
7697 if (bypass
!= NULL_RTX
)
7699 label
= gen_label_rtx ();
7700 i
= emit_jump_insn (gen_rtx_SET
7702 gen_rtx_IF_THEN_ELSE (VOIDmode
,
7704 gen_rtx_LABEL_REF (VOIDmode
,
7707 if (bypass_probability
>= 0)
7709 = gen_rtx_EXPR_LIST (REG_BR_PROB
,
7710 GEN_INT (bypass_probability
),
7713 i
= emit_jump_insn (gen_rtx_SET
7715 gen_rtx_IF_THEN_ELSE (VOIDmode
,
7716 condition
, target1
, target2
)));
7717 if (probability
>= 0)
7719 = gen_rtx_EXPR_LIST (REG_BR_PROB
,
7720 GEN_INT (probability
),
7722 if (second
!= NULL_RTX
)
7724 i
= emit_jump_insn (gen_rtx_SET
7726 gen_rtx_IF_THEN_ELSE (VOIDmode
, second
, target1
,
7728 if (second_probability
>= 0)
7730 = gen_rtx_EXPR_LIST (REG_BR_PROB
,
7731 GEN_INT (second_probability
),
7734 if (label
!= NULL_RTX
)
7739 ix86_expand_setcc (code
, dest
)
7743 rtx ret
, tmp
, tmpreg
;
7744 rtx second_test
, bypass_test
;
7747 if (GET_MODE (ix86_compare_op0
) == DImode
7749 return 0; /* FAIL */
7751 /* Three modes of generation:
7752 0 -- destination does not overlap compare sources:
7753 clear dest first, emit strict_low_part setcc.
7754 1 -- destination does overlap compare sources:
7755 emit subreg setcc, zero extend.
7756 2 -- destination is in QImode:
7759 We don't use mode 0 early in compilation because it confuses CSE.
7760 There are peepholes to turn mode 1 into mode 0 if things work out
7761 nicely after reload. */
7763 type
= cse_not_expected
? 0 : 1;
7765 if (GET_MODE (dest
) == QImode
)
7767 else if (reg_overlap_mentioned_p (dest
, ix86_compare_op0
)
7768 || reg_overlap_mentioned_p (dest
, ix86_compare_op1
))
7772 emit_move_insn (dest
, const0_rtx
);
7774 ret
= ix86_expand_compare (code
, &second_test
, &bypass_test
);
7775 PUT_MODE (ret
, QImode
);
7781 tmp
= gen_lowpart (QImode
, dest
);
7783 tmp
= gen_rtx_STRICT_LOW_PART (VOIDmode
, tmp
);
7787 if (!cse_not_expected
)
7788 tmp
= gen_reg_rtx (QImode
);
7790 tmp
= gen_lowpart (QImode
, dest
);
7794 emit_insn (gen_rtx_SET (VOIDmode
, tmp
, ret
));
7795 if (bypass_test
|| second_test
)
7797 rtx test
= second_test
;
7799 rtx tmp2
= gen_reg_rtx (QImode
);
7806 PUT_CODE (test
, reverse_condition_maybe_unordered (GET_CODE (test
)));
7808 PUT_MODE (test
, QImode
);
7809 emit_insn (gen_rtx_SET (VOIDmode
, tmp2
, test
));
7812 emit_insn (gen_andqi3 (tmp
, tmpreg
, tmp2
));
7814 emit_insn (gen_iorqi3 (tmp
, tmpreg
, tmp2
));
7821 tmp
= gen_rtx_ZERO_EXTEND (GET_MODE (dest
), tmp
);
7822 tmp
= gen_rtx_SET (VOIDmode
, dest
, tmp
);
7823 clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
7824 tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, tmp
, clob
));
7828 return 1; /* DONE */
7832 ix86_expand_int_movcc (operands
)
7835 enum rtx_code code
= GET_CODE (operands
[1]), compare_code
;
7836 rtx compare_seq
, compare_op
;
7837 rtx second_test
, bypass_test
;
7839 /* When the compare code is not LTU or GEU, we can not use sbbl case.
7840 In case comparsion is done with immediate, we can convert it to LTU or
7841 GEU by altering the integer. */
7843 if ((code
== LEU
|| code
== GTU
)
7844 && GET_CODE (ix86_compare_op1
) == CONST_INT
7845 && GET_MODE (operands
[0]) != HImode
7846 && (unsigned int)INTVAL (ix86_compare_op1
) != 0xffffffff
7847 && GET_CODE (operands
[2]) == CONST_INT
7848 && GET_CODE (operands
[3]) == CONST_INT
)
7854 ix86_compare_op1
= GEN_INT (INTVAL (ix86_compare_op1
) + 1);
7858 compare_op
= ix86_expand_compare (code
, &second_test
, &bypass_test
);
7859 compare_seq
= gen_sequence ();
7862 compare_code
= GET_CODE (compare_op
);
7864 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
7865 HImode insns, we'd be swallowed in word prefix ops. */
7867 if (GET_MODE (operands
[0]) != HImode
7868 && (GET_MODE (operands
[0]) != DImode
|| TARGET_64BIT
)
7869 && GET_CODE (operands
[2]) == CONST_INT
7870 && GET_CODE (operands
[3]) == CONST_INT
)
7872 rtx out
= operands
[0];
7873 HOST_WIDE_INT ct
= INTVAL (operands
[2]);
7874 HOST_WIDE_INT cf
= INTVAL (operands
[3]);
7877 if ((compare_code
== LTU
|| compare_code
== GEU
)
7878 && !second_test
&& !bypass_test
)
7881 /* Detect overlap between destination and compare sources. */
7884 /* To simplify rest of code, restrict to the GEU case. */
7885 if (compare_code
== LTU
)
7890 compare_code
= reverse_condition (compare_code
);
7891 code
= reverse_condition (code
);
7895 if (reg_overlap_mentioned_p (out
, ix86_compare_op0
)
7896 || reg_overlap_mentioned_p (out
, ix86_compare_op1
))
7897 tmp
= gen_reg_rtx (GET_MODE (operands
[0]));
7899 emit_insn (compare_seq
);
7900 if (GET_MODE (tmp
) == DImode
)
7901 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp
));
7903 emit_insn (gen_x86_movsicc_0_m1 (tmp
));
7916 if (GET_MODE (tmp
) == DImode
)
7917 emit_insn (gen_adddi3 (tmp
, tmp
, GEN_INT (ct
)));
7919 emit_insn (gen_addsi3 (tmp
, tmp
, GEN_INT (ct
)));
7931 if (GET_MODE (tmp
) == DImode
)
7932 emit_insn (gen_iordi3 (tmp
, tmp
, GEN_INT (ct
)));
7934 emit_insn (gen_iorsi3 (tmp
, tmp
, GEN_INT (ct
)));
7936 else if (diff
== -1 && ct
)
7946 if (GET_MODE (tmp
) == DImode
)
7948 emit_insn (gen_one_cmpldi2 (tmp
, tmp
));
7950 emit_insn (gen_adddi3 (tmp
, tmp
, GEN_INT (cf
)));
7954 emit_insn (gen_one_cmplsi2 (tmp
, tmp
));
7956 emit_insn (gen_addsi3 (tmp
, tmp
, GEN_INT (cf
)));
7964 * andl cf - ct, dest
7969 if (GET_MODE (tmp
) == DImode
)
7971 emit_insn (gen_anddi3 (tmp
, tmp
, GEN_INT (trunc_int_for_mode
7972 (cf
- ct
, DImode
))));
7974 emit_insn (gen_adddi3 (tmp
, tmp
, GEN_INT (ct
)));
7978 emit_insn (gen_andsi3 (tmp
, tmp
, GEN_INT (trunc_int_for_mode
7979 (cf
- ct
, SImode
))));
7981 emit_insn (gen_addsi3 (tmp
, tmp
, GEN_INT (ct
)));
7986 emit_move_insn (out
, tmp
);
7988 return 1; /* DONE */
7995 tmp
= ct
, ct
= cf
, cf
= tmp
;
7997 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0
)))
7999 /* We may be reversing unordered compare to normal compare, that
8000 is not valid in general (we may convert non-trapping condition
8001 to trapping one), however on i386 we currently emit all
8002 comparisons unordered. */
8003 compare_code
= reverse_condition_maybe_unordered (compare_code
);
8004 code
= reverse_condition_maybe_unordered (code
);
8008 compare_code
= reverse_condition (compare_code
);
8009 code
= reverse_condition (code
);
8012 if (diff
== 1 || diff
== 2 || diff
== 4 || diff
== 8
8013 || diff
== 3 || diff
== 5 || diff
== 9)
8019 * lea cf(dest*(ct-cf)),dest
8023 * This also catches the degenerate setcc-only case.
8029 out
= emit_store_flag (out
, code
, ix86_compare_op0
,
8030 ix86_compare_op1
, VOIDmode
, 0, 1);
8033 /* On x86_64 the lea instruction operates on Pmode, so we need to get arithmetics
8034 done in proper mode to match. */
8041 tmp
= gen_rtx_MULT (GET_MODE (out
), out1
, GEN_INT (diff
& ~1));
8045 tmp
= gen_rtx_PLUS (GET_MODE (out
), tmp
, out1
);
8051 tmp
= gen_rtx_PLUS (GET_MODE (out
), tmp
, GEN_INT (cf
));
8055 && (GET_CODE (tmp
) != SUBREG
|| SUBREG_REG (tmp
) != out
))
8061 clob
= gen_rtx_REG (CCmode
, FLAGS_REG
);
8062 clob
= gen_rtx_CLOBBER (VOIDmode
, clob
);
8064 tmp
= gen_rtx_SET (VOIDmode
, out
, tmp
);
8065 tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, tmp
, clob
));
8069 emit_insn (gen_rtx_SET (VOIDmode
, out
, tmp
));
8071 if (out
!= operands
[0])
8072 emit_move_insn (operands
[0], out
);
8074 return 1; /* DONE */
8078 * General case: Jumpful:
8079 * xorl dest,dest cmpl op1, op2
8080 * cmpl op1, op2 movl ct, dest
8082 * decl dest movl cf, dest
8083 * andl (cf-ct),dest 1:
8088 * This is reasonably steep, but branch mispredict costs are
8089 * high on modern cpus, so consider failing only if optimizing
8092 * %%% Parameterize branch_cost on the tuning architecture, then
8093 * use that. The 80386 couldn't care less about mispredicts.
8096 if (!optimize_size
&& !TARGET_CMOVE
)
8102 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0
)))
8104 /* We may be reversing unordered compare to normal compare,
8105 that is not valid in general (we may convert non-trapping
8106 condition to trapping one), however on i386 we currently
8107 emit all comparisons unordered. */
8108 compare_code
= reverse_condition_maybe_unordered (compare_code
);
8109 code
= reverse_condition_maybe_unordered (code
);
8113 compare_code
= reverse_condition (compare_code
);
8114 code
= reverse_condition (code
);
8118 out
= emit_store_flag (out
, code
, ix86_compare_op0
,
8119 ix86_compare_op1
, VOIDmode
, 0, 1);
8121 emit_insn (gen_addsi3 (out
, out
, constm1_rtx
));
8122 emit_insn (gen_andsi3 (out
, out
, GEN_INT (trunc_int_for_mode
8123 (cf
- ct
, SImode
))));
8125 emit_insn (gen_addsi3 (out
, out
, GEN_INT (ct
)));
8126 if (out
!= operands
[0])
8127 emit_move_insn (operands
[0], out
);
8129 return 1; /* DONE */
8135 /* Try a few things more with specific constants and a variable. */
8138 rtx var
, orig_out
, out
, tmp
;
8141 return 0; /* FAIL */
8143 /* If one of the two operands is an interesting constant, load a
8144 constant with the above and mask it in with a logical operation. */
8146 if (GET_CODE (operands
[2]) == CONST_INT
)
8149 if (INTVAL (operands
[2]) == 0)
8150 operands
[3] = constm1_rtx
, op
= and_optab
;
8151 else if (INTVAL (operands
[2]) == -1)
8152 operands
[3] = const0_rtx
, op
= ior_optab
;
8154 return 0; /* FAIL */
8156 else if (GET_CODE (operands
[3]) == CONST_INT
)
8159 if (INTVAL (operands
[3]) == 0)
8160 operands
[2] = constm1_rtx
, op
= and_optab
;
8161 else if (INTVAL (operands
[3]) == -1)
8162 operands
[2] = const0_rtx
, op
= ior_optab
;
8164 return 0; /* FAIL */
8167 return 0; /* FAIL */
8169 orig_out
= operands
[0];
8170 tmp
= gen_reg_rtx (GET_MODE (orig_out
));
8173 /* Recurse to get the constant loaded. */
8174 if (ix86_expand_int_movcc (operands
) == 0)
8175 return 0; /* FAIL */
8177 /* Mask in the interesting variable. */
8178 out
= expand_binop (GET_MODE (orig_out
), op
, var
, tmp
, orig_out
, 0,
8180 if (out
!= orig_out
)
8181 emit_move_insn (orig_out
, out
);
8183 return 1; /* DONE */
8187 * For comparison with above,
8197 if (! nonimmediate_operand (operands
[2], GET_MODE (operands
[0])))
8198 operands
[2] = force_reg (GET_MODE (operands
[0]), operands
[2]);
8199 if (! nonimmediate_operand (operands
[3], GET_MODE (operands
[0])))
8200 operands
[3] = force_reg (GET_MODE (operands
[0]), operands
[3]);
8202 if (bypass_test
&& reg_overlap_mentioned_p (operands
[0], operands
[3]))
8204 rtx tmp
= gen_reg_rtx (GET_MODE (operands
[0]));
8205 emit_move_insn (tmp
, operands
[3]);
8208 if (second_test
&& reg_overlap_mentioned_p (operands
[0], operands
[2]))
8210 rtx tmp
= gen_reg_rtx (GET_MODE (operands
[0]));
8211 emit_move_insn (tmp
, operands
[2]);
8214 if (! register_operand (operands
[2], VOIDmode
)
8215 && ! register_operand (operands
[3], VOIDmode
))
8216 operands
[2] = force_reg (GET_MODE (operands
[0]), operands
[2]);
8218 emit_insn (compare_seq
);
8219 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
8220 gen_rtx_IF_THEN_ELSE (GET_MODE (operands
[0]),
8221 compare_op
, operands
[2],
8224 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
8225 gen_rtx_IF_THEN_ELSE (GET_MODE (operands
[0]),
8230 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
8231 gen_rtx_IF_THEN_ELSE (GET_MODE (operands
[0]),
8236 return 1; /* DONE */
8240 ix86_expand_fp_movcc (operands
)
8245 rtx compare_op
, second_test
, bypass_test
;
8247 /* For SF/DFmode conditional moves based on comparisons
8248 in same mode, we may want to use SSE min/max instructions. */
8249 if (((TARGET_SSE
&& GET_MODE (operands
[0]) == SFmode
)
8250 || (TARGET_SSE2
&& GET_MODE (operands
[0]) == DFmode
))
8251 && GET_MODE (ix86_compare_op0
) == GET_MODE (operands
[0])
8252 /* The SSE comparisons does not support the LTGT/UNEQ pair. */
8254 || (GET_CODE (operands
[1]) != LTGT
&& GET_CODE (operands
[1]) != UNEQ
))
8255 /* We may be called from the post-reload splitter. */
8256 && (!REG_P (operands
[0])
8257 || SSE_REG_P (operands
[0])
8258 || REGNO (operands
[0]) >= FIRST_PSEUDO_REGISTER
))
8260 rtx op0
= ix86_compare_op0
, op1
= ix86_compare_op1
;
8261 code
= GET_CODE (operands
[1]);
8263 /* See if we have (cross) match between comparison operands and
8264 conditional move operands. */
8265 if (rtx_equal_p (operands
[2], op1
))
8270 code
= reverse_condition_maybe_unordered (code
);
8272 if (rtx_equal_p (operands
[2], op0
) && rtx_equal_p (operands
[3], op1
))
8274 /* Check for min operation. */
8277 operands
[0] = force_reg (GET_MODE (operands
[0]), operands
[0]);
8278 if (memory_operand (op0
, VOIDmode
))
8279 op0
= force_reg (GET_MODE (operands
[0]), op0
);
8280 if (GET_MODE (operands
[0]) == SFmode
)
8281 emit_insn (gen_minsf3 (operands
[0], op0
, op1
));
8283 emit_insn (gen_mindf3 (operands
[0], op0
, op1
));
8286 /* Check for max operation. */
8289 operands
[0] = force_reg (GET_MODE (operands
[0]), operands
[0]);
8290 if (memory_operand (op0
, VOIDmode
))
8291 op0
= force_reg (GET_MODE (operands
[0]), op0
);
8292 if (GET_MODE (operands
[0]) == SFmode
)
8293 emit_insn (gen_maxsf3 (operands
[0], op0
, op1
));
8295 emit_insn (gen_maxdf3 (operands
[0], op0
, op1
));
8299 /* Manage condition to be sse_comparison_operator. In case we are
8300 in non-ieee mode, try to canonicalize the destination operand
8301 to be first in the comparison - this helps reload to avoid extra
8303 if (!sse_comparison_operator (operands
[1], VOIDmode
)
8304 || (rtx_equal_p (operands
[0], ix86_compare_op1
) && !TARGET_IEEE_FP
))
8306 rtx tmp
= ix86_compare_op0
;
8307 ix86_compare_op0
= ix86_compare_op1
;
8308 ix86_compare_op1
= tmp
;
8309 operands
[1] = gen_rtx_fmt_ee (swap_condition (GET_CODE (operands
[1])),
8310 VOIDmode
, ix86_compare_op0
,
8313 /* Similary try to manage result to be first operand of conditional
8314 move. We also don't support the NE comparison on SSE, so try to
8316 if ((rtx_equal_p (operands
[0], operands
[3])
8317 && (!TARGET_IEEE_FP
|| GET_CODE (operands
[1]) != EQ
))
8318 || (GET_CODE (operands
[1]) == NE
&& TARGET_IEEE_FP
))
8320 rtx tmp
= operands
[2];
8321 operands
[2] = operands
[3];
8323 operands
[1] = gen_rtx_fmt_ee (reverse_condition_maybe_unordered
8324 (GET_CODE (operands
[1])),
8325 VOIDmode
, ix86_compare_op0
,
8328 if (GET_MODE (operands
[0]) == SFmode
)
8329 emit_insn (gen_sse_movsfcc (operands
[0], operands
[1],
8330 operands
[2], operands
[3],
8331 ix86_compare_op0
, ix86_compare_op1
));
8333 emit_insn (gen_sse_movdfcc (operands
[0], operands
[1],
8334 operands
[2], operands
[3],
8335 ix86_compare_op0
, ix86_compare_op1
));
8339 /* The floating point conditional move instructions don't directly
8340 support conditions resulting from a signed integer comparison. */
8342 code
= GET_CODE (operands
[1]);
8343 compare_op
= ix86_expand_compare (code
, &second_test
, &bypass_test
);
8345 /* The floating point conditional move instructions don't directly
8346 support signed integer comparisons. */
8348 if (!fcmov_comparison_operator (compare_op
, VOIDmode
))
8350 if (second_test
!= NULL
|| bypass_test
!= NULL
)
8352 tmp
= gen_reg_rtx (QImode
);
8353 ix86_expand_setcc (code
, tmp
);
8355 ix86_compare_op0
= tmp
;
8356 ix86_compare_op1
= const0_rtx
;
8357 compare_op
= ix86_expand_compare (code
, &second_test
, &bypass_test
);
8359 if (bypass_test
&& reg_overlap_mentioned_p (operands
[0], operands
[3]))
8361 tmp
= gen_reg_rtx (GET_MODE (operands
[0]));
8362 emit_move_insn (tmp
, operands
[3]);
8365 if (second_test
&& reg_overlap_mentioned_p (operands
[0], operands
[2]))
8367 tmp
= gen_reg_rtx (GET_MODE (operands
[0]));
8368 emit_move_insn (tmp
, operands
[2]);
8372 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
8373 gen_rtx_IF_THEN_ELSE (GET_MODE (operands
[0]),
8378 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
8379 gen_rtx_IF_THEN_ELSE (GET_MODE (operands
[0]),
8384 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
8385 gen_rtx_IF_THEN_ELSE (GET_MODE (operands
[0]),
8393 /* Split operands 0 and 1 into SImode parts. Similar to split_di, but
8394 works for floating pointer parameters and nonoffsetable memories.
8395 For pushes, it returns just stack offsets; the values will be saved
8396 in the right order. Maximally three parts are generated. */
8399 ix86_split_to_parts (operand
, parts
, mode
)
8402 enum machine_mode mode
;
8407 size
= mode
== TFmode
? 3 : (GET_MODE_SIZE (mode
) / 4);
8409 size
= (GET_MODE_SIZE (mode
) + 4) / 8;
8411 if (GET_CODE (operand
) == REG
&& MMX_REGNO_P (REGNO (operand
)))
8413 if (size
< 2 || size
> 3)
8416 /* Optimize constant pool reference to immediates. This is used by fp moves,
8417 that force all constants to memory to allow combining. */
8419 if (GET_CODE (operand
) == MEM
8420 && GET_CODE (XEXP (operand
, 0)) == SYMBOL_REF
8421 && CONSTANT_POOL_ADDRESS_P (XEXP (operand
, 0)))
8422 operand
= get_pool_constant (XEXP (operand
, 0));
8424 if (GET_CODE (operand
) == MEM
&& !offsettable_memref_p (operand
))
8426 /* The only non-offsetable memories we handle are pushes. */
8427 if (! push_operand (operand
, VOIDmode
))
8430 operand
= copy_rtx (operand
);
8431 PUT_MODE (operand
, Pmode
);
8432 parts
[0] = parts
[1] = parts
[2] = operand
;
8434 else if (!TARGET_64BIT
)
8437 split_di (&operand
, 1, &parts
[0], &parts
[1]);
8440 if (REG_P (operand
))
8442 if (!reload_completed
)
8444 parts
[0] = gen_rtx_REG (SImode
, REGNO (operand
) + 0);
8445 parts
[1] = gen_rtx_REG (SImode
, REGNO (operand
) + 1);
8447 parts
[2] = gen_rtx_REG (SImode
, REGNO (operand
) + 2);
8449 else if (offsettable_memref_p (operand
))
8451 operand
= adjust_address (operand
, SImode
, 0);
8453 parts
[1] = adjust_address (operand
, SImode
, 4);
8455 parts
[2] = adjust_address (operand
, SImode
, 8);
8457 else if (GET_CODE (operand
) == CONST_DOUBLE
)
8462 REAL_VALUE_FROM_CONST_DOUBLE (r
, operand
);
8467 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r
, l
);
8468 parts
[2] = GEN_INT (l
[2]);
8471 REAL_VALUE_TO_TARGET_DOUBLE (r
, l
);
8476 parts
[1] = GEN_INT (l
[1]);
8477 parts
[0] = GEN_INT (l
[0]);
8485 if (mode
== XFmode
|| mode
== TFmode
)
8487 if (REG_P (operand
))
8489 if (!reload_completed
)
8491 parts
[0] = gen_rtx_REG (DImode
, REGNO (operand
) + 0);
8492 parts
[1] = gen_rtx_REG (SImode
, REGNO (operand
) + 1);
8494 else if (offsettable_memref_p (operand
))
8496 operand
= adjust_address (operand
, DImode
, 0);
8498 parts
[1] = adjust_address (operand
, SImode
, 8);
8500 else if (GET_CODE (operand
) == CONST_DOUBLE
)
8505 REAL_VALUE_FROM_CONST_DOUBLE (r
, operand
);
8506 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r
, l
);
8507 /* Do not use shift by 32 to avoid warning on 32bit systems. */
8508 if (HOST_BITS_PER_WIDE_INT
>= 64)
8509 parts
[0] = GEN_INT (l
[0] + ((l
[1] << 31) << 1));
8511 parts
[0] = immed_double_const (l
[0], l
[1], DImode
);
8512 parts
[1] = GEN_INT (l
[2]);
8522 /* Emit insns to perform a move or push of DI, DF, and XF values.
8523 Return false when normal moves are needed; true when all required
8524 insns have been emitted. Operands 2-4 contain the input values
8525 int the correct order; operands 5-7 contain the output values. */
8528 ix86_split_long_move (operands
)
8535 enum machine_mode mode
= GET_MODE (operands
[0]);
8537 /* The DFmode expanders may ask us to move double.
8538 For 64bit target this is single move. By hiding the fact
8539 here we simplify i386.md splitters. */
8540 if (GET_MODE_SIZE (GET_MODE (operands
[0])) == 8 && TARGET_64BIT
)
8542 /* Optimize constant pool reference to immediates. This is used by fp moves,
8543 that force all constants to memory to allow combining. */
8545 if (GET_CODE (operands
[1]) == MEM
8546 && GET_CODE (XEXP (operands
[1], 0)) == SYMBOL_REF
8547 && CONSTANT_POOL_ADDRESS_P (XEXP (operands
[1], 0)))
8548 operands
[1] = get_pool_constant (XEXP (operands
[1], 0));
8549 if (push_operand (operands
[0], VOIDmode
))
8551 operands
[0] = copy_rtx (operands
[0]);
8552 PUT_MODE (operands
[0], Pmode
);
8555 operands
[0] = gen_lowpart (DImode
, operands
[0]);
8556 operands
[1] = gen_lowpart (DImode
, operands
[1]);
8557 emit_move_insn (operands
[0], operands
[1]);
8561 /* The only non-offsettable memory we handle is push. */
8562 if (push_operand (operands
[0], VOIDmode
))
8564 else if (GET_CODE (operands
[0]) == MEM
8565 && ! offsettable_memref_p (operands
[0]))
8568 nparts
= ix86_split_to_parts (operands
[1], part
[1], GET_MODE (operands
[0]));
8569 ix86_split_to_parts (operands
[0], part
[0], GET_MODE (operands
[0]));
8571 /* When emitting push, take care for source operands on the stack. */
8572 if (push
&& GET_CODE (operands
[1]) == MEM
8573 && reg_overlap_mentioned_p (stack_pointer_rtx
, operands
[1]))
8576 part
[1][1] = change_address (part
[1][1], GET_MODE (part
[1][1]),
8577 XEXP (part
[1][2], 0));
8578 part
[1][0] = change_address (part
[1][0], GET_MODE (part
[1][0]),
8579 XEXP (part
[1][1], 0));
8582 /* We need to do copy in the right order in case an address register
8583 of the source overlaps the destination. */
8584 if (REG_P (part
[0][0]) && GET_CODE (part
[1][0]) == MEM
)
8586 if (reg_overlap_mentioned_p (part
[0][0], XEXP (part
[1][0], 0)))
8588 if (reg_overlap_mentioned_p (part
[0][1], XEXP (part
[1][0], 0)))
8591 && reg_overlap_mentioned_p (part
[0][2], XEXP (part
[1][0], 0)))
8594 /* Collision in the middle part can be handled by reordering. */
8595 if (collisions
== 1 && nparts
== 3
8596 && reg_overlap_mentioned_p (part
[0][1], XEXP (part
[1][0], 0)))
8599 tmp
= part
[0][1]; part
[0][1] = part
[0][2]; part
[0][2] = tmp
;
8600 tmp
= part
[1][1]; part
[1][1] = part
[1][2]; part
[1][2] = tmp
;
8603 /* If there are more collisions, we can't handle it by reordering.
8604 Do an lea to the last part and use only one colliding move. */
8605 else if (collisions
> 1)
8608 emit_insn (gen_rtx_SET (VOIDmode
, part
[0][nparts
- 1],
8609 XEXP (part
[1][0], 0)));
8610 part
[1][0] = change_address (part
[1][0],
8611 TARGET_64BIT
? DImode
: SImode
,
8612 part
[0][nparts
- 1]);
8613 part
[1][1] = adjust_address (part
[1][0], VOIDmode
, UNITS_PER_WORD
);
8615 part
[1][2] = adjust_address (part
[1][0], VOIDmode
, 8);
8625 /* We use only first 12 bytes of TFmode value, but for pushing we
8626 are required to adjust stack as if we were pushing real 16byte
8628 if (mode
== TFmode
&& !TARGET_64BIT
)
8629 emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
,
8631 emit_move_insn (part
[0][2], part
[1][2]);
8636 /* In 64bit mode we don't have 32bit push available. In case this is
8637 register, it is OK - we will just use larger counterpart. We also
8638 retype memory - these comes from attempt to avoid REX prefix on
8639 moving of second half of TFmode value. */
8640 if (GET_MODE (part
[1][1]) == SImode
)
8642 if (GET_CODE (part
[1][1]) == MEM
)
8643 part
[1][1] = adjust_address (part
[1][1], DImode
, 0);
8644 else if (REG_P (part
[1][1]))
8645 part
[1][1] = gen_rtx_REG (DImode
, REGNO (part
[1][1]));
8648 if (GET_MODE (part
[1][0]) == SImode
)
8649 part
[1][0] = part
[1][1];
8652 emit_move_insn (part
[0][1], part
[1][1]);
8653 emit_move_insn (part
[0][0], part
[1][0]);
8657 /* Choose correct order to not overwrite the source before it is copied. */
8658 if ((REG_P (part
[0][0])
8659 && REG_P (part
[1][1])
8660 && (REGNO (part
[0][0]) == REGNO (part
[1][1])
8662 && REGNO (part
[0][0]) == REGNO (part
[1][2]))))
8664 && reg_overlap_mentioned_p (part
[0][0], XEXP (part
[1][0], 0))))
8668 operands
[2] = part
[0][2];
8669 operands
[3] = part
[0][1];
8670 operands
[4] = part
[0][0];
8671 operands
[5] = part
[1][2];
8672 operands
[6] = part
[1][1];
8673 operands
[7] = part
[1][0];
8677 operands
[2] = part
[0][1];
8678 operands
[3] = part
[0][0];
8679 operands
[5] = part
[1][1];
8680 operands
[6] = part
[1][0];
8687 operands
[2] = part
[0][0];
8688 operands
[3] = part
[0][1];
8689 operands
[4] = part
[0][2];
8690 operands
[5] = part
[1][0];
8691 operands
[6] = part
[1][1];
8692 operands
[7] = part
[1][2];
8696 operands
[2] = part
[0][0];
8697 operands
[3] = part
[0][1];
8698 operands
[5] = part
[1][0];
8699 operands
[6] = part
[1][1];
8702 emit_move_insn (operands
[2], operands
[5]);
8703 emit_move_insn (operands
[3], operands
[6]);
8705 emit_move_insn (operands
[4], operands
[7]);
8711 ix86_split_ashldi (operands
, scratch
)
8712 rtx
*operands
, scratch
;
8714 rtx low
[2], high
[2];
8717 if (GET_CODE (operands
[2]) == CONST_INT
)
8719 split_di (operands
, 2, low
, high
);
8720 count
= INTVAL (operands
[2]) & 63;
8724 emit_move_insn (high
[0], low
[1]);
8725 emit_move_insn (low
[0], const0_rtx
);
8728 emit_insn (gen_ashlsi3 (high
[0], high
[0], GEN_INT (count
- 32)));
8732 if (!rtx_equal_p (operands
[0], operands
[1]))
8733 emit_move_insn (operands
[0], operands
[1]);
8734 emit_insn (gen_x86_shld_1 (high
[0], low
[0], GEN_INT (count
)));
8735 emit_insn (gen_ashlsi3 (low
[0], low
[0], GEN_INT (count
)));
8740 if (!rtx_equal_p (operands
[0], operands
[1]))
8741 emit_move_insn (operands
[0], operands
[1]);
8743 split_di (operands
, 1, low
, high
);
8745 emit_insn (gen_x86_shld_1 (high
[0], low
[0], operands
[2]));
8746 emit_insn (gen_ashlsi3 (low
[0], low
[0], operands
[2]));
8748 if (TARGET_CMOVE
&& (! no_new_pseudos
|| scratch
))
8750 if (! no_new_pseudos
)
8751 scratch
= force_reg (SImode
, const0_rtx
);
8753 emit_move_insn (scratch
, const0_rtx
);
8755 emit_insn (gen_x86_shift_adj_1 (high
[0], low
[0], operands
[2],
8759 emit_insn (gen_x86_shift_adj_2 (high
[0], low
[0], operands
[2]));
8764 ix86_split_ashrdi (operands
, scratch
)
8765 rtx
*operands
, scratch
;
8767 rtx low
[2], high
[2];
8770 if (GET_CODE (operands
[2]) == CONST_INT
)
8772 split_di (operands
, 2, low
, high
);
8773 count
= INTVAL (operands
[2]) & 63;
8777 emit_move_insn (low
[0], high
[1]);
8779 if (! reload_completed
)
8780 emit_insn (gen_ashrsi3 (high
[0], low
[0], GEN_INT (31)));
8783 emit_move_insn (high
[0], low
[0]);
8784 emit_insn (gen_ashrsi3 (high
[0], high
[0], GEN_INT (31)));
8788 emit_insn (gen_ashrsi3 (low
[0], low
[0], GEN_INT (count
- 32)));
8792 if (!rtx_equal_p (operands
[0], operands
[1]))
8793 emit_move_insn (operands
[0], operands
[1]);
8794 emit_insn (gen_x86_shrd_1 (low
[0], high
[0], GEN_INT (count
)));
8795 emit_insn (gen_ashrsi3 (high
[0], high
[0], GEN_INT (count
)));
8800 if (!rtx_equal_p (operands
[0], operands
[1]))
8801 emit_move_insn (operands
[0], operands
[1]);
8803 split_di (operands
, 1, low
, high
);
8805 emit_insn (gen_x86_shrd_1 (low
[0], high
[0], operands
[2]));
8806 emit_insn (gen_ashrsi3 (high
[0], high
[0], operands
[2]));
8808 if (TARGET_CMOVE
&& (! no_new_pseudos
|| scratch
))
8810 if (! no_new_pseudos
)
8811 scratch
= gen_reg_rtx (SImode
);
8812 emit_move_insn (scratch
, high
[0]);
8813 emit_insn (gen_ashrsi3 (scratch
, scratch
, GEN_INT (31)));
8814 emit_insn (gen_x86_shift_adj_1 (low
[0], high
[0], operands
[2],
8818 emit_insn (gen_x86_shift_adj_3 (low
[0], high
[0], operands
[2]));
8823 ix86_split_lshrdi (operands
, scratch
)
8824 rtx
*operands
, scratch
;
8826 rtx low
[2], high
[2];
8829 if (GET_CODE (operands
[2]) == CONST_INT
)
8831 split_di (operands
, 2, low
, high
);
8832 count
= INTVAL (operands
[2]) & 63;
8836 emit_move_insn (low
[0], high
[1]);
8837 emit_move_insn (high
[0], const0_rtx
);
8840 emit_insn (gen_lshrsi3 (low
[0], low
[0], GEN_INT (count
- 32)));
8844 if (!rtx_equal_p (operands
[0], operands
[1]))
8845 emit_move_insn (operands
[0], operands
[1]);
8846 emit_insn (gen_x86_shrd_1 (low
[0], high
[0], GEN_INT (count
)));
8847 emit_insn (gen_lshrsi3 (high
[0], high
[0], GEN_INT (count
)));
8852 if (!rtx_equal_p (operands
[0], operands
[1]))
8853 emit_move_insn (operands
[0], operands
[1]);
8855 split_di (operands
, 1, low
, high
);
8857 emit_insn (gen_x86_shrd_1 (low
[0], high
[0], operands
[2]));
8858 emit_insn (gen_lshrsi3 (high
[0], high
[0], operands
[2]));
8860 /* Heh. By reversing the arguments, we can reuse this pattern. */
8861 if (TARGET_CMOVE
&& (! no_new_pseudos
|| scratch
))
8863 if (! no_new_pseudos
)
8864 scratch
= force_reg (SImode
, const0_rtx
);
8866 emit_move_insn (scratch
, const0_rtx
);
8868 emit_insn (gen_x86_shift_adj_1 (low
[0], high
[0], operands
[2],
8872 emit_insn (gen_x86_shift_adj_2 (low
[0], high
[0], operands
[2]));
8876 /* Helper function for the string operations below. Dest VARIABLE whether
8877 it is aligned to VALUE bytes. If true, jump to the label. */
8879 ix86_expand_aligntest (variable
, value
)
8883 rtx label
= gen_label_rtx ();
8884 rtx tmpcount
= gen_reg_rtx (GET_MODE (variable
));
8885 if (GET_MODE (variable
) == DImode
)
8886 emit_insn (gen_anddi3 (tmpcount
, variable
, GEN_INT (value
)));
8888 emit_insn (gen_andsi3 (tmpcount
, variable
, GEN_INT (value
)));
8889 emit_cmp_and_jump_insns (tmpcount
, const0_rtx
, EQ
, 0, GET_MODE (variable
),
8894 /* Adjust COUNTER by the VALUE. */
8896 ix86_adjust_counter (countreg
, value
)
8898 HOST_WIDE_INT value
;
8900 if (GET_MODE (countreg
) == DImode
)
8901 emit_insn (gen_adddi3 (countreg
, countreg
, GEN_INT (-value
)));
8903 emit_insn (gen_addsi3 (countreg
, countreg
, GEN_INT (-value
)));
8906 /* Zero extend possibly SImode EXP to Pmode register. */
8908 ix86_zero_extend_to_Pmode (exp
)
8912 if (GET_MODE (exp
) == VOIDmode
)
8913 return force_reg (Pmode
, exp
);
8914 if (GET_MODE (exp
) == Pmode
)
8915 return copy_to_mode_reg (Pmode
, exp
);
8916 r
= gen_reg_rtx (Pmode
);
8917 emit_insn (gen_zero_extendsidi2 (r
, exp
));
8921 /* Expand string move (memcpy) operation. Use i386 string operations when
8922 profitable. expand_clrstr contains similar code. */
8924 ix86_expand_movstr (dst
, src
, count_exp
, align_exp
)
8925 rtx dst
, src
, count_exp
, align_exp
;
8927 rtx srcreg
, destreg
, countreg
;
8928 enum machine_mode counter_mode
;
8929 HOST_WIDE_INT align
= 0;
8930 unsigned HOST_WIDE_INT count
= 0;
8935 if (GET_CODE (align_exp
) == CONST_INT
)
8936 align
= INTVAL (align_exp
);
8938 /* This simple hack avoids all inlining code and simplifies code bellow. */
8939 if (!TARGET_ALIGN_STRINGOPS
)
8942 if (GET_CODE (count_exp
) == CONST_INT
)
8943 count
= INTVAL (count_exp
);
8945 /* Figure out proper mode for counter. For 32bits it is always SImode,
8946 for 64bits use SImode when possible, otherwise DImode.
8947 Set count to number of bytes copied when known at compile time. */
8948 if (!TARGET_64BIT
|| GET_MODE (count_exp
) == SImode
8949 || x86_64_zero_extended_value (count_exp
))
8950 counter_mode
= SImode
;
8952 counter_mode
= DImode
;
8954 if (counter_mode
!= SImode
&& counter_mode
!= DImode
)
8957 destreg
= copy_to_mode_reg (Pmode
, XEXP (dst
, 0));
8958 srcreg
= copy_to_mode_reg (Pmode
, XEXP (src
, 0));
8960 emit_insn (gen_cld ());
8962 /* When optimizing for size emit simple rep ; movsb instruction for
8963 counts not divisible by 4. */
8965 if ((!optimize
|| optimize_size
) && (count
== 0 || (count
& 0x03)))
8967 countreg
= ix86_zero_extend_to_Pmode (count_exp
);
8969 emit_insn (gen_rep_movqi_rex64 (destreg
, srcreg
, countreg
,
8970 destreg
, srcreg
, countreg
));
8972 emit_insn (gen_rep_movqi (destreg
, srcreg
, countreg
,
8973 destreg
, srcreg
, countreg
));
8976 /* For constant aligned (or small unaligned) copies use rep movsl
8977 followed by code copying the rest. For PentiumPro ensure 8 byte
8978 alignment to allow rep movsl acceleration. */
8982 || (!TARGET_PENTIUMPRO
&& !TARGET_64BIT
&& align
>= 4)
8983 || optimize_size
|| count
< (unsigned int)64))
8985 int size
= TARGET_64BIT
&& !optimize_size
? 8 : 4;
8986 if (count
& ~(size
- 1))
8988 countreg
= copy_to_mode_reg (counter_mode
,
8989 GEN_INT ((count
>> (size
== 4 ? 2 : 3))
8990 & (TARGET_64BIT
? -1 : 0x3fffffff)));
8991 countreg
= ix86_zero_extend_to_Pmode (countreg
);
8995 emit_insn (gen_rep_movsi_rex64 (destreg
, srcreg
, countreg
,
8996 destreg
, srcreg
, countreg
));
8998 emit_insn (gen_rep_movsi (destreg
, srcreg
, countreg
,
8999 destreg
, srcreg
, countreg
));
9002 emit_insn (gen_rep_movdi_rex64 (destreg
, srcreg
, countreg
,
9003 destreg
, srcreg
, countreg
));
9005 if (size
== 8 && (count
& 0x04))
9006 emit_insn (gen_strmovsi (destreg
, srcreg
));
9008 emit_insn (gen_strmovhi (destreg
, srcreg
));
9010 emit_insn (gen_strmovqi (destreg
, srcreg
));
9012 /* The generic code based on the glibc implementation:
9013 - align destination to 4 bytes (8 byte alignment is used for PentiumPro
9014 allowing accelerated copying there)
9015 - copy the data using rep movsl
9022 /* In case we don't know anything about the alignment, default to
9023 library version, since it is usually equally fast and result in
9025 if (!TARGET_INLINE_ALL_STRINGOPS
&& align
< UNITS_PER_WORD
)
9031 if (TARGET_SINGLE_STRINGOP
)
9032 emit_insn (gen_cld ());
9034 countreg2
= gen_reg_rtx (Pmode
);
9035 countreg
= copy_to_mode_reg (counter_mode
, count_exp
);
9037 /* We don't use loops to align destination and to copy parts smaller
9038 than 4 bytes, because gcc is able to optimize such code better (in
9039 the case the destination or the count really is aligned, gcc is often
9040 able to predict the branches) and also it is friendlier to the
9041 hardware branch prediction.
9043 Using loops is benefical for generic case, because we can
9044 handle small counts using the loops. Many CPUs (such as Athlon)
9045 have large REP prefix setup costs.
9047 This is quite costy. Maybe we can revisit this decision later or
9048 add some customizability to this code. */
9051 && align
< (TARGET_PENTIUMPRO
&& (count
== 0
9052 || count
>= (unsigned int)260)
9053 ? 8 : UNITS_PER_WORD
))
9055 label
= gen_label_rtx ();
9056 emit_cmp_and_jump_insns (countreg
, GEN_INT (UNITS_PER_WORD
- 1),
9057 LEU
, 0, counter_mode
, 1, 0, label
);
9061 rtx label
= ix86_expand_aligntest (destreg
, 1);
9062 emit_insn (gen_strmovqi (destreg
, srcreg
));
9063 ix86_adjust_counter (countreg
, 1);
9065 LABEL_NUSES (label
) = 1;
9069 rtx label
= ix86_expand_aligntest (destreg
, 2);
9070 emit_insn (gen_strmovhi (destreg
, srcreg
));
9071 ix86_adjust_counter (countreg
, 2);
9073 LABEL_NUSES (label
) = 1;
9076 && ((TARGET_PENTIUMPRO
&& (count
== 0
9077 || count
>= (unsigned int)260))
9080 rtx label
= ix86_expand_aligntest (destreg
, 4);
9081 emit_insn (gen_strmovsi (destreg
, srcreg
));
9082 ix86_adjust_counter (countreg
, 4);
9084 LABEL_NUSES (label
) = 1;
9087 if (!TARGET_SINGLE_STRINGOP
)
9088 emit_insn (gen_cld ());
9091 emit_insn (gen_lshrdi3 (countreg2
, ix86_zero_extend_to_Pmode (countreg
),
9093 emit_insn (gen_rep_movdi_rex64 (destreg
, srcreg
, countreg2
,
9094 destreg
, srcreg
, countreg2
));
9098 emit_insn (gen_lshrsi3 (countreg2
, countreg
, GEN_INT (2)));
9099 emit_insn (gen_rep_movsi (destreg
, srcreg
, countreg2
,
9100 destreg
, srcreg
, countreg2
));
9106 LABEL_NUSES (label
) = 1;
9108 if (TARGET_64BIT
&& align
> 4 && count
!= 0 && (count
& 4))
9109 emit_insn (gen_strmovsi (destreg
, srcreg
));
9110 if ((align
<= 4 || count
== 0) && TARGET_64BIT
)
9112 rtx label
= ix86_expand_aligntest (countreg
, 4);
9113 emit_insn (gen_strmovsi (destreg
, srcreg
));
9115 LABEL_NUSES (label
) = 1;
9117 if (align
> 2 && count
!= 0 && (count
& 2))
9118 emit_insn (gen_strmovhi (destreg
, srcreg
));
9119 if (align
<= 2 || count
== 0)
9121 rtx label
= ix86_expand_aligntest (countreg
, 2);
9122 emit_insn (gen_strmovhi (destreg
, srcreg
));
9124 LABEL_NUSES (label
) = 1;
9126 if (align
> 1 && count
!= 0 && (count
& 1))
9127 emit_insn (gen_strmovqi (destreg
, srcreg
));
9128 if (align
<= 1 || count
== 0)
9130 rtx label
= ix86_expand_aligntest (countreg
, 1);
9131 emit_insn (gen_strmovqi (destreg
, srcreg
));
9133 LABEL_NUSES (label
) = 1;
9137 insns
= get_insns ();
9140 ix86_set_move_mem_attrs (insns
, dst
, src
, destreg
, srcreg
);
9145 /* Expand string clear operation (bzero). Use i386 string operations when
9146 profitable. expand_movstr contains similar code. */
9148 ix86_expand_clrstr (src
, count_exp
, align_exp
)
9149 rtx src
, count_exp
, align_exp
;
9151 rtx destreg
, zeroreg
, countreg
;
9152 enum machine_mode counter_mode
;
9153 HOST_WIDE_INT align
= 0;
9154 unsigned HOST_WIDE_INT count
= 0;
9156 if (GET_CODE (align_exp
) == CONST_INT
)
9157 align
= INTVAL (align_exp
);
9159 /* This simple hack avoids all inlining code and simplifies code bellow. */
9160 if (!TARGET_ALIGN_STRINGOPS
)
9163 if (GET_CODE (count_exp
) == CONST_INT
)
9164 count
= INTVAL (count_exp
);
9165 /* Figure out proper mode for counter. For 32bits it is always SImode,
9166 for 64bits use SImode when possible, otherwise DImode.
9167 Set count to number of bytes copied when known at compile time. */
9168 if (!TARGET_64BIT
|| GET_MODE (count_exp
) == SImode
9169 || x86_64_zero_extended_value (count_exp
))
9170 counter_mode
= SImode
;
9172 counter_mode
= DImode
;
9174 destreg
= copy_to_mode_reg (Pmode
, XEXP (src
, 0));
9176 emit_insn (gen_cld ());
9178 /* When optimizing for size emit simple rep ; movsb instruction for
9179 counts not divisible by 4. */
9181 if ((!optimize
|| optimize_size
) && (count
== 0 || (count
& 0x03)))
9183 countreg
= ix86_zero_extend_to_Pmode (count_exp
);
9184 zeroreg
= copy_to_mode_reg (QImode
, const0_rtx
);
9186 emit_insn (gen_rep_stosqi_rex64 (destreg
, countreg
, zeroreg
,
9187 destreg
, countreg
));
9189 emit_insn (gen_rep_stosqi (destreg
, countreg
, zeroreg
,
9190 destreg
, countreg
));
9194 || (!TARGET_PENTIUMPRO
&& !TARGET_64BIT
&& align
>= 4)
9195 || optimize_size
|| count
< (unsigned int)64))
9197 int size
= TARGET_64BIT
&& !optimize_size
? 8 : 4;
9198 zeroreg
= copy_to_mode_reg (size
== 4 ? SImode
: DImode
, const0_rtx
);
9199 if (count
& ~(size
- 1))
9201 countreg
= copy_to_mode_reg (counter_mode
,
9202 GEN_INT ((count
>> (size
== 4 ? 2 : 3))
9203 & (TARGET_64BIT
? -1 : 0x3fffffff)));
9204 countreg
= ix86_zero_extend_to_Pmode (countreg
);
9208 emit_insn (gen_rep_stossi_rex64 (destreg
, countreg
, zeroreg
,
9209 destreg
, countreg
));
9211 emit_insn (gen_rep_stossi (destreg
, countreg
, zeroreg
,
9212 destreg
, countreg
));
9215 emit_insn (gen_rep_stosdi_rex64 (destreg
, countreg
, zeroreg
,
9216 destreg
, countreg
));
9218 if (size
== 8 && (count
& 0x04))
9219 emit_insn (gen_strsetsi (destreg
,
9220 gen_rtx_SUBREG (SImode
, zeroreg
, 0)));
9222 emit_insn (gen_strsethi (destreg
,
9223 gen_rtx_SUBREG (HImode
, zeroreg
, 0)));
9225 emit_insn (gen_strsetqi (destreg
,
9226 gen_rtx_SUBREG (QImode
, zeroreg
, 0)));
9233 /* In case we don't know anything about the alignment, default to
9234 library version, since it is usually equally fast and result in
9236 if (!TARGET_INLINE_ALL_STRINGOPS
&& align
< UNITS_PER_WORD
)
9239 if (TARGET_SINGLE_STRINGOP
)
9240 emit_insn (gen_cld ());
9242 countreg2
= gen_reg_rtx (Pmode
);
9243 countreg
= copy_to_mode_reg (counter_mode
, count_exp
);
9244 zeroreg
= copy_to_mode_reg (Pmode
, const0_rtx
);
9247 && align
< (TARGET_PENTIUMPRO
&& (count
== 0
9248 || count
>= (unsigned int)260)
9249 ? 8 : UNITS_PER_WORD
))
9251 label
= gen_label_rtx ();
9252 emit_cmp_and_jump_insns (countreg
, GEN_INT (UNITS_PER_WORD
- 1),
9253 LEU
, 0, counter_mode
, 1, 0, label
);
9257 rtx label
= ix86_expand_aligntest (destreg
, 1);
9258 emit_insn (gen_strsetqi (destreg
,
9259 gen_rtx_SUBREG (QImode
, zeroreg
, 0)));
9260 ix86_adjust_counter (countreg
, 1);
9262 LABEL_NUSES (label
) = 1;
9266 rtx label
= ix86_expand_aligntest (destreg
, 2);
9267 emit_insn (gen_strsethi (destreg
,
9268 gen_rtx_SUBREG (HImode
, zeroreg
, 0)));
9269 ix86_adjust_counter (countreg
, 2);
9271 LABEL_NUSES (label
) = 1;
9273 if (align
<= 4 && TARGET_PENTIUMPRO
&& (count
== 0
9274 || count
>= (unsigned int)260))
9276 rtx label
= ix86_expand_aligntest (destreg
, 4);
9277 emit_insn (gen_strsetsi (destreg
, (TARGET_64BIT
9278 ? gen_rtx_SUBREG (SImode
, zeroreg
, 0)
9280 ix86_adjust_counter (countreg
, 4);
9282 LABEL_NUSES (label
) = 1;
9285 if (!TARGET_SINGLE_STRINGOP
)
9286 emit_insn (gen_cld ());
9289 emit_insn (gen_lshrdi3 (countreg2
, ix86_zero_extend_to_Pmode (countreg
),
9291 emit_insn (gen_rep_stosdi_rex64 (destreg
, countreg2
, zeroreg
,
9292 destreg
, countreg2
));
9296 emit_insn (gen_lshrsi3 (countreg2
, countreg
, GEN_INT (2)));
9297 emit_insn (gen_rep_stossi (destreg
, countreg2
, zeroreg
,
9298 destreg
, countreg2
));
9304 LABEL_NUSES (label
) = 1;
9306 if (TARGET_64BIT
&& align
> 4 && count
!= 0 && (count
& 4))
9307 emit_insn (gen_strsetsi (destreg
,
9308 gen_rtx_SUBREG (SImode
, zeroreg
, 0)));
9309 if (TARGET_64BIT
&& (align
<= 4 || count
== 0))
9311 rtx label
= ix86_expand_aligntest (destreg
, 2);
9312 emit_insn (gen_strsetsi (destreg
,
9313 gen_rtx_SUBREG (SImode
, zeroreg
, 0)));
9315 LABEL_NUSES (label
) = 1;
9317 if (align
> 2 && count
!= 0 && (count
& 2))
9318 emit_insn (gen_strsethi (destreg
,
9319 gen_rtx_SUBREG (HImode
, zeroreg
, 0)));
9320 if (align
<= 2 || count
== 0)
9322 rtx label
= ix86_expand_aligntest (destreg
, 2);
9323 emit_insn (gen_strsethi (destreg
,
9324 gen_rtx_SUBREG (HImode
, zeroreg
, 0)));
9326 LABEL_NUSES (label
) = 1;
9328 if (align
> 1 && count
!= 0 && (count
& 1))
9329 emit_insn (gen_strsetqi (destreg
,
9330 gen_rtx_SUBREG (QImode
, zeroreg
, 0)));
9331 if (align
<= 1 || count
== 0)
9333 rtx label
= ix86_expand_aligntest (destreg
, 1);
9334 emit_insn (gen_strsetqi (destreg
,
9335 gen_rtx_SUBREG (QImode
, zeroreg
, 0)));
9337 LABEL_NUSES (label
) = 1;
9342 /* Expand strlen. */
9344 ix86_expand_strlen (out
, src
, eoschar
, align
)
9345 rtx out
, src
, eoschar
, align
;
9347 rtx addr
, scratch1
, scratch2
, scratch3
, scratch4
;
9349 /* The generic case of strlen expander is long. Avoid it's
9350 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
9352 if (TARGET_UNROLL_STRLEN
&& eoschar
== const0_rtx
&& optimize
> 1
9353 && !TARGET_INLINE_ALL_STRINGOPS
9355 && (GET_CODE (align
) != CONST_INT
|| INTVAL (align
) < 4))
9358 addr
= force_reg (Pmode
, XEXP (src
, 0));
9359 scratch1
= gen_reg_rtx (Pmode
);
9361 if (TARGET_UNROLL_STRLEN
&& eoschar
== const0_rtx
&& optimize
> 1
9364 /* Well it seems that some optimizer does not combine a call like
9365 foo(strlen(bar), strlen(bar));
9366 when the move and the subtraction is done here. It does calculate
9367 the length just once when these instructions are done inside of
9368 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
9369 often used and I use one fewer register for the lifetime of
9370 output_strlen_unroll() this is better. */
9372 emit_move_insn (out
, addr
);
9374 ix86_expand_strlensi_unroll_1 (out
, align
);
9376 /* strlensi_unroll_1 returns the address of the zero at the end of
9377 the string, like memchr(), so compute the length by subtracting
9378 the start address. */
9380 emit_insn (gen_subdi3 (out
, out
, addr
));
9382 emit_insn (gen_subsi3 (out
, out
, addr
));
9386 scratch2
= gen_reg_rtx (Pmode
);
9387 scratch3
= gen_reg_rtx (Pmode
);
9388 scratch4
= force_reg (Pmode
, constm1_rtx
);
9390 emit_move_insn (scratch3
, addr
);
9391 eoschar
= force_reg (QImode
, eoschar
);
9393 emit_insn (gen_cld ());
9396 emit_insn (gen_strlenqi_rex_1 (scratch1
, scratch3
, eoschar
,
9397 align
, scratch4
, scratch3
));
9398 emit_insn (gen_one_cmpldi2 (scratch2
, scratch1
));
9399 emit_insn (gen_adddi3 (out
, scratch2
, constm1_rtx
));
9403 emit_insn (gen_strlenqi_1 (scratch1
, scratch3
, eoschar
,
9404 align
, scratch4
, scratch3
));
9405 emit_insn (gen_one_cmplsi2 (scratch2
, scratch1
));
9406 emit_insn (gen_addsi3 (out
, scratch2
, constm1_rtx
));
9412 /* Expand the appropriate insns for doing strlen if not just doing
9415 out = result, initialized with the start address
9416 align_rtx = alignment of the address.
9417 scratch = scratch register, initialized with the startaddress when
9418 not aligned, otherwise undefined
9420 This is just the body. It needs the initialisations mentioned above and
9421 some address computing at the end. These things are done in i386.md. */
9424 ix86_expand_strlensi_unroll_1 (out
, align_rtx
)
9429 rtx align_2_label
= NULL_RTX
;
9430 rtx align_3_label
= NULL_RTX
;
9431 rtx align_4_label
= gen_label_rtx ();
9432 rtx end_0_label
= gen_label_rtx ();
9434 rtx tmpreg
= gen_reg_rtx (SImode
);
9435 rtx scratch
= gen_reg_rtx (SImode
);
9438 if (GET_CODE (align_rtx
) == CONST_INT
)
9439 align
= INTVAL (align_rtx
);
9441 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
9443 /* Is there a known alignment and is it less than 4? */
9446 rtx scratch1
= gen_reg_rtx (Pmode
);
9447 emit_move_insn (scratch1
, out
);
9448 /* Is there a known alignment and is it not 2? */
9451 align_3_label
= gen_label_rtx (); /* Label when aligned to 3-byte */
9452 align_2_label
= gen_label_rtx (); /* Label when aligned to 2-byte */
9454 /* Leave just the 3 lower bits. */
9455 align_rtx
= expand_binop (Pmode
, and_optab
, scratch1
, GEN_INT (3),
9456 NULL_RTX
, 0, OPTAB_WIDEN
);
9458 emit_cmp_and_jump_insns (align_rtx
, const0_rtx
, EQ
, NULL
,
9459 Pmode
, 1, 0, align_4_label
);
9460 emit_cmp_and_jump_insns (align_rtx
, GEN_INT (2), EQ
, NULL
,
9461 Pmode
, 1, 0, align_2_label
);
9462 emit_cmp_and_jump_insns (align_rtx
, GEN_INT (2), GTU
, NULL
,
9463 Pmode
, 1, 0, align_3_label
);
9467 /* Since the alignment is 2, we have to check 2 or 0 bytes;
9468 check if is aligned to 4 - byte. */
9470 align_rtx
= expand_binop (Pmode
, and_optab
, scratch1
, GEN_INT (2),
9471 NULL_RTX
, 0, OPTAB_WIDEN
);
9473 emit_cmp_and_jump_insns (align_rtx
, const0_rtx
, EQ
, NULL
,
9474 Pmode
, 1, 0, align_4_label
);
9477 mem
= gen_rtx_MEM (QImode
, out
);
9479 /* Now compare the bytes. */
9481 /* Compare the first n unaligned byte on a byte per byte basis. */
9482 emit_cmp_and_jump_insns (mem
, const0_rtx
, EQ
, NULL
,
9483 QImode
, 1, 0, end_0_label
);
9485 /* Increment the address. */
9487 emit_insn (gen_adddi3 (out
, out
, const1_rtx
));
9489 emit_insn (gen_addsi3 (out
, out
, const1_rtx
));
9491 /* Not needed with an alignment of 2 */
9494 emit_label (align_2_label
);
9496 emit_cmp_and_jump_insns (mem
, const0_rtx
, EQ
, NULL
,
9497 QImode
, 1, 0, end_0_label
);
9500 emit_insn (gen_adddi3 (out
, out
, const1_rtx
));
9502 emit_insn (gen_addsi3 (out
, out
, const1_rtx
));
9504 emit_label (align_3_label
);
9507 emit_cmp_and_jump_insns (mem
, const0_rtx
, EQ
, NULL
,
9508 QImode
, 1, 0, end_0_label
);
9511 emit_insn (gen_adddi3 (out
, out
, const1_rtx
));
9513 emit_insn (gen_addsi3 (out
, out
, const1_rtx
));
9516 /* Generate loop to check 4 bytes at a time. It is not a good idea to
9517 align this loop. It gives only huge programs, but does not help to
9519 emit_label (align_4_label
);
9521 mem
= gen_rtx_MEM (SImode
, out
);
9522 emit_move_insn (scratch
, mem
);
9524 emit_insn (gen_adddi3 (out
, out
, GEN_INT (4)));
9526 emit_insn (gen_addsi3 (out
, out
, GEN_INT (4)));
9528 /* This formula yields a nonzero result iff one of the bytes is zero.
9529 This saves three branches inside loop and many cycles. */
9531 emit_insn (gen_addsi3 (tmpreg
, scratch
, GEN_INT (-0x01010101)));
9532 emit_insn (gen_one_cmplsi2 (scratch
, scratch
));
9533 emit_insn (gen_andsi3 (tmpreg
, tmpreg
, scratch
));
9534 emit_insn (gen_andsi3 (tmpreg
, tmpreg
,
9535 GEN_INT (trunc_int_for_mode
9536 (0x80808080, SImode
))));
9537 emit_cmp_and_jump_insns (tmpreg
, const0_rtx
, EQ
, 0,
9538 SImode
, 1, 0, align_4_label
);
9542 rtx reg
= gen_reg_rtx (SImode
);
9543 rtx reg2
= gen_reg_rtx (Pmode
);
9544 emit_move_insn (reg
, tmpreg
);
9545 emit_insn (gen_lshrsi3 (reg
, reg
, GEN_INT (16)));
9547 /* If zero is not in the first two bytes, move two bytes forward. */
9548 emit_insn (gen_testsi_ccno_1 (tmpreg
, GEN_INT (0x8080)));
9549 tmp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
9550 tmp
= gen_rtx_EQ (VOIDmode
, tmp
, const0_rtx
);
9551 emit_insn (gen_rtx_SET (VOIDmode
, tmpreg
,
9552 gen_rtx_IF_THEN_ELSE (SImode
, tmp
,
9555 /* Emit lea manually to avoid clobbering of flags. */
9556 emit_insn (gen_rtx_SET (SImode
, reg2
,
9557 gen_rtx_PLUS (Pmode
, out
, GEN_INT (2))));
9559 tmp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
9560 tmp
= gen_rtx_EQ (VOIDmode
, tmp
, const0_rtx
);
9561 emit_insn (gen_rtx_SET (VOIDmode
, out
,
9562 gen_rtx_IF_THEN_ELSE (Pmode
, tmp
,
9569 rtx end_2_label
= gen_label_rtx ();
9570 /* Is zero in the first two bytes? */
9572 emit_insn (gen_testsi_ccno_1 (tmpreg
, GEN_INT (0x8080)));
9573 tmp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
9574 tmp
= gen_rtx_NE (VOIDmode
, tmp
, const0_rtx
);
9575 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
9576 gen_rtx_LABEL_REF (VOIDmode
, end_2_label
),
9578 tmp
= emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
));
9579 JUMP_LABEL (tmp
) = end_2_label
;
9581 /* Not in the first two. Move two bytes forward. */
9582 emit_insn (gen_lshrsi3 (tmpreg
, tmpreg
, GEN_INT (16)));
9584 emit_insn (gen_adddi3 (out
, out
, GEN_INT (2)));
9586 emit_insn (gen_addsi3 (out
, out
, GEN_INT (2)));
9588 emit_label (end_2_label
);
9592 /* Avoid branch in fixing the byte. */
9593 tmpreg
= gen_lowpart (QImode
, tmpreg
);
9594 emit_insn (gen_addqi3_cc (tmpreg
, tmpreg
, tmpreg
));
9596 emit_insn (gen_subdi3_carry_rex64 (out
, out
, GEN_INT (3)));
9598 emit_insn (gen_subsi3_carry (out
, out
, GEN_INT (3)));
9600 emit_label (end_0_label
);
9603 /* Clear stack slot assignments remembered from previous functions.
9604 This is called from INIT_EXPANDERS once before RTL is emitted for each
9608 ix86_init_machine_status (p
)
9611 p
->machine
= (struct machine_function
*)
9612 xcalloc (1, sizeof (struct machine_function
));
9615 /* Mark machine specific bits of P for GC. */
9617 ix86_mark_machine_status (p
)
9620 struct machine_function
*machine
= p
->machine
;
9621 enum machine_mode mode
;
9627 for (mode
= VOIDmode
; (int) mode
< (int) MAX_MACHINE_MODE
;
9628 mode
= (enum machine_mode
) ((int) mode
+ 1))
9629 for (n
= 0; n
< MAX_386_STACK_LOCALS
; n
++)
9630 ggc_mark_rtx (machine
->stack_locals
[(int) mode
][n
]);
9634 ix86_free_machine_status (p
)
9641 /* Return a MEM corresponding to a stack slot with mode MODE.
9642 Allocate a new slot if necessary.
9644 The RTL for a function can have several slots available: N is
9645 which slot to use. */
9648 assign_386_stack_local (mode
, n
)
9649 enum machine_mode mode
;
9652 if (n
< 0 || n
>= MAX_386_STACK_LOCALS
)
9655 if (ix86_stack_locals
[(int) mode
][n
] == NULL_RTX
)
9656 ix86_stack_locals
[(int) mode
][n
]
9657 = assign_stack_local (mode
, GET_MODE_SIZE (mode
), 0);
9659 return ix86_stack_locals
[(int) mode
][n
];
9662 /* Calculate the length of the memory address in the instruction
9663 encoding. Does not include the one-byte modrm, opcode, or prefix. */
9666 memory_address_length (addr
)
9669 struct ix86_address parts
;
9670 rtx base
, index
, disp
;
9673 if (GET_CODE (addr
) == PRE_DEC
9674 || GET_CODE (addr
) == POST_INC
9675 || GET_CODE (addr
) == PRE_MODIFY
9676 || GET_CODE (addr
) == POST_MODIFY
)
9679 if (! ix86_decompose_address (addr
, &parts
))
9683 index
= parts
.index
;
9687 /* Register Indirect. */
9688 if (base
&& !index
&& !disp
)
9690 /* Special cases: ebp and esp need the two-byte modrm form. */
9691 if (addr
== stack_pointer_rtx
9692 || addr
== arg_pointer_rtx
9693 || addr
== frame_pointer_rtx
9694 || addr
== hard_frame_pointer_rtx
)
9698 /* Direct Addressing. */
9699 else if (disp
&& !base
&& !index
)
9704 /* Find the length of the displacement constant. */
9707 if (GET_CODE (disp
) == CONST_INT
9708 && CONST_OK_FOR_LETTER_P (INTVAL (disp
), 'K'))
9714 /* An index requires the two-byte modrm form. */
9722 /* Compute default value for "length_immediate" attribute. When SHORTFORM is set
9723 expect that insn have 8bit immediate alternative. */
9725 ix86_attr_length_immediate_default (insn
, shortform
)
9731 extract_insn_cached (insn
);
9732 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
9733 if (CONSTANT_P (recog_data
.operand
[i
]))
9738 && GET_CODE (recog_data
.operand
[i
]) == CONST_INT
9739 && CONST_OK_FOR_LETTER_P (INTVAL (recog_data
.operand
[i
]), 'K'))
9743 switch (get_attr_mode (insn
))
9754 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
9759 fatal_insn ("Unknown insn mode", insn
);
9765 /* Compute default value for "length_address" attribute. */
9767 ix86_attr_length_address_default (insn
)
9771 extract_insn_cached (insn
);
9772 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
9773 if (GET_CODE (recog_data
.operand
[i
]) == MEM
)
9775 return memory_address_length (XEXP (recog_data
.operand
[i
], 0));
9781 /* Return the maximum number of instructions a cpu can issue. */
9788 case PROCESSOR_PENTIUM
:
9792 case PROCESSOR_PENTIUMPRO
:
9793 case PROCESSOR_PENTIUM4
:
9794 case PROCESSOR_ATHLON
:
9802 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
9803 by DEP_INSN and nothing set by DEP_INSN. */
9806 ix86_flags_dependant (insn
, dep_insn
, insn_type
)
9808 enum attr_type insn_type
;
9812 /* Simplify the test for uninteresting insns. */
9813 if (insn_type
!= TYPE_SETCC
9814 && insn_type
!= TYPE_ICMOV
9815 && insn_type
!= TYPE_FCMOV
9816 && insn_type
!= TYPE_IBR
)
9819 if ((set
= single_set (dep_insn
)) != 0)
9821 set
= SET_DEST (set
);
9824 else if (GET_CODE (PATTERN (dep_insn
)) == PARALLEL
9825 && XVECLEN (PATTERN (dep_insn
), 0) == 2
9826 && GET_CODE (XVECEXP (PATTERN (dep_insn
), 0, 0)) == SET
9827 && GET_CODE (XVECEXP (PATTERN (dep_insn
), 0, 1)) == SET
)
9829 set
= SET_DEST (XVECEXP (PATTERN (dep_insn
), 0, 0));
9830 set2
= SET_DEST (XVECEXP (PATTERN (dep_insn
), 0, 0));
9835 if (GET_CODE (set
) != REG
|| REGNO (set
) != FLAGS_REG
)
9838 /* This test is true if the dependant insn reads the flags but
9839 not any other potentially set register. */
9840 if (!reg_overlap_mentioned_p (set
, PATTERN (insn
)))
9843 if (set2
&& reg_overlap_mentioned_p (set2
, PATTERN (insn
)))
9849 /* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
9850 address with operands set by DEP_INSN. */
9853 ix86_agi_dependant (insn
, dep_insn
, insn_type
)
9855 enum attr_type insn_type
;
9859 if (insn_type
== TYPE_LEA
9862 addr
= PATTERN (insn
);
9863 if (GET_CODE (addr
) == SET
)
9865 else if (GET_CODE (addr
) == PARALLEL
9866 && GET_CODE (XVECEXP (addr
, 0, 0)) == SET
)
9867 addr
= XVECEXP (addr
, 0, 0);
9870 addr
= SET_SRC (addr
);
9875 extract_insn_cached (insn
);
9876 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
9877 if (GET_CODE (recog_data
.operand
[i
]) == MEM
)
9879 addr
= XEXP (recog_data
.operand
[i
], 0);
9886 return modified_in_p (addr
, dep_insn
);
9890 ix86_adjust_cost (insn
, link
, dep_insn
, cost
)
9891 rtx insn
, link
, dep_insn
;
9894 enum attr_type insn_type
, dep_insn_type
;
9895 enum attr_memory memory
, dep_memory
;
9897 int dep_insn_code_number
;
9899 /* Anti and output depenancies have zero cost on all CPUs. */
9900 if (REG_NOTE_KIND (link
) != 0)
9903 dep_insn_code_number
= recog_memoized (dep_insn
);
9905 /* If we can't recognize the insns, we can't really do anything. */
9906 if (dep_insn_code_number
< 0 || recog_memoized (insn
) < 0)
9909 insn_type
= get_attr_type (insn
);
9910 dep_insn_type
= get_attr_type (dep_insn
);
9914 case PROCESSOR_PENTIUM
:
9915 /* Address Generation Interlock adds a cycle of latency. */
9916 if (ix86_agi_dependant (insn
, dep_insn
, insn_type
))
9919 /* ??? Compares pair with jump/setcc. */
9920 if (ix86_flags_dependant (insn
, dep_insn
, insn_type
))
9923 /* Floating point stores require value to be ready one cycle ealier. */
9924 if (insn_type
== TYPE_FMOV
9925 && get_attr_memory (insn
) == MEMORY_STORE
9926 && !ix86_agi_dependant (insn
, dep_insn
, insn_type
))
9930 case PROCESSOR_PENTIUMPRO
:
9931 memory
= get_attr_memory (insn
);
9932 dep_memory
= get_attr_memory (dep_insn
);
9934 /* Since we can't represent delayed latencies of load+operation,
9935 increase the cost here for non-imov insns. */
9936 if (dep_insn_type
!= TYPE_IMOV
9937 && dep_insn_type
!= TYPE_FMOV
9938 && (dep_memory
== MEMORY_LOAD
|| dep_memory
== MEMORY_BOTH
))
9941 /* INT->FP conversion is expensive. */
9942 if (get_attr_fp_int_src (dep_insn
))
9945 /* There is one cycle extra latency between an FP op and a store. */
9946 if (insn_type
== TYPE_FMOV
9947 && (set
= single_set (dep_insn
)) != NULL_RTX
9948 && (set2
= single_set (insn
)) != NULL_RTX
9949 && rtx_equal_p (SET_DEST (set
), SET_SRC (set2
))
9950 && GET_CODE (SET_DEST (set2
)) == MEM
)
9953 /* Show ability of reorder buffer to hide latency of load by executing
9954 in parallel with previous instruction in case
9955 previous instruction is not needed to compute the address. */
9956 if ((memory
== MEMORY_LOAD
|| memory
== MEMORY_BOTH
)
9957 && !ix86_agi_dependant (insn
, dep_insn
, insn_type
))
9959 /* Claim moves to take one cycle, as core can issue one load
9960 at time and the next load can start cycle later. */
9961 if (dep_insn_type
== TYPE_IMOV
9962 || dep_insn_type
== TYPE_FMOV
)
9970 memory
= get_attr_memory (insn
);
9971 dep_memory
= get_attr_memory (dep_insn
);
9972 /* The esp dependency is resolved before the instruction is really
9974 if ((insn_type
== TYPE_PUSH
|| insn_type
== TYPE_POP
)
9975 && (dep_insn_type
== TYPE_PUSH
|| dep_insn_type
== TYPE_POP
))
9978 /* Since we can't represent delayed latencies of load+operation,
9979 increase the cost here for non-imov insns. */
9980 if (dep_memory
== MEMORY_LOAD
|| dep_memory
== MEMORY_BOTH
)
9981 cost
+= (dep_insn_type
!= TYPE_IMOV
) ? 2 : 1;
9983 /* INT->FP conversion is expensive. */
9984 if (get_attr_fp_int_src (dep_insn
))
9987 /* Show ability of reorder buffer to hide latency of load by executing
9988 in parallel with previous instruction in case
9989 previous instruction is not needed to compute the address. */
9990 if ((memory
== MEMORY_LOAD
|| memory
== MEMORY_BOTH
)
9991 && !ix86_agi_dependant (insn
, dep_insn
, insn_type
))
9993 /* Claim moves to take one cycle, as core can issue one load
9994 at time and the next load can start cycle later. */
9995 if (dep_insn_type
== TYPE_IMOV
9996 || dep_insn_type
== TYPE_FMOV
)
10005 case PROCESSOR_ATHLON
:
10006 memory
= get_attr_memory (insn
);
10007 dep_memory
= get_attr_memory (dep_insn
);
10009 if (dep_memory
== MEMORY_LOAD
|| dep_memory
== MEMORY_BOTH
)
10011 if (dep_insn_type
== TYPE_IMOV
|| dep_insn_type
== TYPE_FMOV
)
10016 /* Show ability of reorder buffer to hide latency of load by executing
10017 in parallel with previous instruction in case
10018 previous instruction is not needed to compute the address. */
10019 if ((memory
== MEMORY_LOAD
|| memory
== MEMORY_BOTH
)
10020 && !ix86_agi_dependant (insn
, dep_insn
, insn_type
))
10022 /* Claim moves to take one cycle, as core can issue one load
10023 at time and the next load can start cycle later. */
10024 if (dep_insn_type
== TYPE_IMOV
10025 || dep_insn_type
== TYPE_FMOV
)
10027 else if (cost
>= 3)
10042 struct ppro_sched_data
10045 int issued_this_cycle
;
10050 ix86_safe_length (insn
)
10053 if (recog_memoized (insn
) >= 0)
10054 return get_attr_length(insn
);
10060 ix86_safe_length_prefix (insn
)
10063 if (recog_memoized (insn
) >= 0)
10064 return get_attr_length(insn
);
10069 static enum attr_memory
10070 ix86_safe_memory (insn
)
10073 if (recog_memoized (insn
) >= 0)
10074 return get_attr_memory(insn
);
10076 return MEMORY_UNKNOWN
;
10079 static enum attr_pent_pair
10080 ix86_safe_pent_pair (insn
)
10083 if (recog_memoized (insn
) >= 0)
10084 return get_attr_pent_pair(insn
);
10086 return PENT_PAIR_NP
;
10089 static enum attr_ppro_uops
10090 ix86_safe_ppro_uops (insn
)
10093 if (recog_memoized (insn
) >= 0)
10094 return get_attr_ppro_uops (insn
);
10096 return PPRO_UOPS_MANY
;
10100 ix86_dump_ppro_packet (dump
)
10103 if (ix86_sched_data
.ppro
.decode
[0])
10105 fprintf (dump
, "PPRO packet: %d",
10106 INSN_UID (ix86_sched_data
.ppro
.decode
[0]));
10107 if (ix86_sched_data
.ppro
.decode
[1])
10108 fprintf (dump
, " %d", INSN_UID (ix86_sched_data
.ppro
.decode
[1]));
10109 if (ix86_sched_data
.ppro
.decode
[2])
10110 fprintf (dump
, " %d", INSN_UID (ix86_sched_data
.ppro
.decode
[2]));
10111 fputc ('\n', dump
);
10115 /* We're beginning a new block. Initialize data structures as necessary. */
10118 ix86_sched_init (dump
, sched_verbose
, veclen
)
10119 FILE *dump ATTRIBUTE_UNUSED
;
10120 int sched_verbose ATTRIBUTE_UNUSED
;
10121 int veclen ATTRIBUTE_UNUSED
;
10123 memset (&ix86_sched_data
, 0, sizeof (ix86_sched_data
));
10126 /* Shift INSN to SLOT, and shift everything else down. */
10129 ix86_reorder_insn (insnp
, slot
)
10136 insnp
[0] = insnp
[1];
10137 while (++insnp
!= slot
);
10142 /* Find an instruction with given pairability and minimal amount of cycles
10143 lost by the fact that the CPU waits for both pipelines to finish before
10144 reading next instructions. Also take care that both instructions together
10145 can not exceed 7 bytes. */
10148 ix86_pent_find_pair (e_ready
, ready
, type
, first
)
10151 enum attr_pent_pair type
;
10154 int mincycles
, cycles
;
10155 enum attr_pent_pair tmp
;
10156 enum attr_memory memory
;
10157 rtx
*insnp
, *bestinsnp
= NULL
;
10159 if (ix86_safe_length (first
) > 7 + ix86_safe_length_prefix (first
))
10162 memory
= ix86_safe_memory (first
);
10163 cycles
= result_ready_cost (first
);
10164 mincycles
= INT_MAX
;
10166 for (insnp
= e_ready
; insnp
>= ready
&& mincycles
; --insnp
)
10167 if ((tmp
= ix86_safe_pent_pair (*insnp
)) == type
10168 && ix86_safe_length (*insnp
) <= 7 + ix86_safe_length_prefix (*insnp
))
10170 enum attr_memory second_memory
;
10171 int secondcycles
, currentcycles
;
10173 second_memory
= ix86_safe_memory (*insnp
);
10174 secondcycles
= result_ready_cost (*insnp
);
10175 currentcycles
= abs (cycles
- secondcycles
);
10177 if (secondcycles
>= 1 && cycles
>= 1)
10179 /* Two read/modify/write instructions together takes two
10181 if (memory
== MEMORY_BOTH
&& second_memory
== MEMORY_BOTH
)
10182 currentcycles
+= 2;
10184 /* Read modify/write instruction followed by read/modify
10185 takes one cycle longer. */
10186 if (memory
== MEMORY_BOTH
&& second_memory
== MEMORY_LOAD
10187 && tmp
!= PENT_PAIR_UV
10188 && ix86_safe_pent_pair (first
) != PENT_PAIR_UV
)
10189 currentcycles
+= 1;
10191 if (currentcycles
< mincycles
)
10192 bestinsnp
= insnp
, mincycles
= currentcycles
;
10198 /* Subroutines of ix86_sched_reorder. */
10201 ix86_sched_reorder_pentium (ready
, e_ready
)
10205 enum attr_pent_pair pair1
, pair2
;
10208 /* This wouldn't be necessary if Haifa knew that static insn ordering
10209 is important to which pipe an insn is issued to. So we have to make
10210 some minor rearrangements. */
10212 pair1
= ix86_safe_pent_pair (*e_ready
);
10214 /* If the first insn is non-pairable, let it be. */
10215 if (pair1
== PENT_PAIR_NP
)
10218 pair2
= PENT_PAIR_NP
;
10221 /* If the first insn is UV or PV pairable, search for a PU
10222 insn to go with. */
10223 if (pair1
== PENT_PAIR_UV
|| pair1
== PENT_PAIR_PV
)
10225 insnp
= ix86_pent_find_pair (e_ready
-1, ready
,
10226 PENT_PAIR_PU
, *e_ready
);
10228 pair2
= PENT_PAIR_PU
;
10231 /* If the first insn is PU or UV pairable, search for a PV
10232 insn to go with. */
10233 if (pair2
== PENT_PAIR_NP
10234 && (pair1
== PENT_PAIR_PU
|| pair1
== PENT_PAIR_UV
))
10236 insnp
= ix86_pent_find_pair (e_ready
-1, ready
,
10237 PENT_PAIR_PV
, *e_ready
);
10239 pair2
= PENT_PAIR_PV
;
10242 /* If the first insn is pairable, search for a UV
10243 insn to go with. */
10244 if (pair2
== PENT_PAIR_NP
)
10246 insnp
= ix86_pent_find_pair (e_ready
-1, ready
,
10247 PENT_PAIR_UV
, *e_ready
);
10249 pair2
= PENT_PAIR_UV
;
10252 if (pair2
== PENT_PAIR_NP
)
10255 /* Found something! Decide if we need to swap the order. */
10256 if (pair1
== PENT_PAIR_PV
|| pair2
== PENT_PAIR_PU
10257 || (pair1
== PENT_PAIR_UV
&& pair2
== PENT_PAIR_UV
10258 && ix86_safe_memory (*e_ready
) == MEMORY_BOTH
10259 && ix86_safe_memory (*insnp
) == MEMORY_LOAD
))
10260 ix86_reorder_insn (insnp
, e_ready
);
10262 ix86_reorder_insn (insnp
, e_ready
- 1);
10266 ix86_sched_reorder_ppro (ready
, e_ready
)
10271 enum attr_ppro_uops cur_uops
;
10272 int issued_this_cycle
;
10276 /* At this point .ppro.decode contains the state of the three
10277 decoders from last "cycle". That is, those insns that were
10278 actually independent. But here we're scheduling for the
10279 decoder, and we may find things that are decodable in the
10282 memcpy (decode
, ix86_sched_data
.ppro
.decode
, sizeof (decode
));
10283 issued_this_cycle
= 0;
10286 cur_uops
= ix86_safe_ppro_uops (*insnp
);
10288 /* If the decoders are empty, and we've a complex insn at the
10289 head of the priority queue, let it issue without complaint. */
10290 if (decode
[0] == NULL
)
10292 if (cur_uops
== PPRO_UOPS_MANY
)
10294 decode
[0] = *insnp
;
10298 /* Otherwise, search for a 2-4 uop unsn to issue. */
10299 while (cur_uops
!= PPRO_UOPS_FEW
)
10301 if (insnp
== ready
)
10303 cur_uops
= ix86_safe_ppro_uops (*--insnp
);
10306 /* If so, move it to the head of the line. */
10307 if (cur_uops
== PPRO_UOPS_FEW
)
10308 ix86_reorder_insn (insnp
, e_ready
);
10310 /* Issue the head of the queue. */
10311 issued_this_cycle
= 1;
10312 decode
[0] = *e_ready
--;
10315 /* Look for simple insns to fill in the other two slots. */
10316 for (i
= 1; i
< 3; ++i
)
10317 if (decode
[i
] == NULL
)
10319 if (ready
>= e_ready
)
10323 cur_uops
= ix86_safe_ppro_uops (*insnp
);
10324 while (cur_uops
!= PPRO_UOPS_ONE
)
10326 if (insnp
== ready
)
10328 cur_uops
= ix86_safe_ppro_uops (*--insnp
);
10331 /* Found one. Move it to the head of the queue and issue it. */
10332 if (cur_uops
== PPRO_UOPS_ONE
)
10334 ix86_reorder_insn (insnp
, e_ready
);
10335 decode
[i
] = *e_ready
--;
10336 issued_this_cycle
++;
10340 /* ??? Didn't find one. Ideally, here we would do a lazy split
10341 of 2-uop insns, issue one and queue the other. */
10345 if (issued_this_cycle
== 0)
10346 issued_this_cycle
= 1;
10347 ix86_sched_data
.ppro
.issued_this_cycle
= issued_this_cycle
;
10350 /* We are about to being issuing insns for this clock cycle.
10351 Override the default sort algorithm to better slot instructions. */
10353 ix86_sched_reorder (dump
, sched_verbose
, ready
, n_readyp
, clock_var
)
10354 FILE *dump ATTRIBUTE_UNUSED
;
10355 int sched_verbose ATTRIBUTE_UNUSED
;
10358 int clock_var ATTRIBUTE_UNUSED
;
10360 int n_ready
= *n_readyp
;
10361 rtx
*e_ready
= ready
+ n_ready
- 1;
10371 case PROCESSOR_PENTIUM
:
10372 ix86_sched_reorder_pentium (ready
, e_ready
);
10375 case PROCESSOR_PENTIUMPRO
:
10376 ix86_sched_reorder_ppro (ready
, e_ready
);
10381 return ix86_issue_rate ();
10384 /* We are about to issue INSN. Return the number of insns left on the
10385 ready queue that can be issued this cycle. */
10388 ix86_variable_issue (dump
, sched_verbose
, insn
, can_issue_more
)
10392 int can_issue_more
;
10398 return can_issue_more
- 1;
10400 case PROCESSOR_PENTIUMPRO
:
10402 enum attr_ppro_uops uops
= ix86_safe_ppro_uops (insn
);
10404 if (uops
== PPRO_UOPS_MANY
)
10407 ix86_dump_ppro_packet (dump
);
10408 ix86_sched_data
.ppro
.decode
[0] = insn
;
10409 ix86_sched_data
.ppro
.decode
[1] = NULL
;
10410 ix86_sched_data
.ppro
.decode
[2] = NULL
;
10412 ix86_dump_ppro_packet (dump
);
10413 ix86_sched_data
.ppro
.decode
[0] = NULL
;
10415 else if (uops
== PPRO_UOPS_FEW
)
10418 ix86_dump_ppro_packet (dump
);
10419 ix86_sched_data
.ppro
.decode
[0] = insn
;
10420 ix86_sched_data
.ppro
.decode
[1] = NULL
;
10421 ix86_sched_data
.ppro
.decode
[2] = NULL
;
10425 for (i
= 0; i
< 3; ++i
)
10426 if (ix86_sched_data
.ppro
.decode
[i
] == NULL
)
10428 ix86_sched_data
.ppro
.decode
[i
] = insn
;
10436 ix86_dump_ppro_packet (dump
);
10437 ix86_sched_data
.ppro
.decode
[0] = NULL
;
10438 ix86_sched_data
.ppro
.decode
[1] = NULL
;
10439 ix86_sched_data
.ppro
.decode
[2] = NULL
;
10443 return --ix86_sched_data
.ppro
.issued_this_cycle
;
10447 /* Walk through INSNS and look for MEM references whose address is DSTREG or
10448 SRCREG and set the memory attribute to those of DSTREF and SRCREF, as
10452 ix86_set_move_mem_attrs (insns
, dstref
, srcref
, dstreg
, srcreg
)
10454 rtx dstref
, srcref
, dstreg
, srcreg
;
10458 for (insn
= insns
; insn
!= 0 ; insn
= NEXT_INSN (insn
))
10460 ix86_set_move_mem_attrs_1 (PATTERN (insn
), dstref
, srcref
,
10464 /* Subroutine of above to actually do the updating by recursively walking
10468 ix86_set_move_mem_attrs_1 (x
, dstref
, srcref
, dstreg
, srcreg
)
10470 rtx dstref
, srcref
, dstreg
, srcreg
;
10472 enum rtx_code code
= GET_CODE (x
);
10473 const char *format_ptr
= GET_RTX_FORMAT (code
);
10476 if (code
== MEM
&& XEXP (x
, 0) == dstreg
)
10477 MEM_COPY_ATTRIBUTES (x
, dstref
);
10478 else if (code
== MEM
&& XEXP (x
, 0) == srcreg
)
10479 MEM_COPY_ATTRIBUTES (x
, srcref
);
10481 for (i
= 0; i
< GET_RTX_LENGTH (code
); i
++, format_ptr
++)
10483 if (*format_ptr
== 'e')
10484 ix86_set_move_mem_attrs_1 (XEXP (x
, i
), dstref
, srcref
,
10486 else if (*format_ptr
== 'E')
10487 for (j
= XVECLEN (x
, i
) - 1; j
>= 0; j
--)
10488 ix86_set_move_mem_attrs_1 (XVECEXP (x
, i
, j
), dstref
, srcref
,
10493 /* Compute the alignment given to a constant that is being placed in memory.
10494 EXP is the constant and ALIGN is the alignment that the object would
10496 The value of this function is used instead of that alignment to align
10500 ix86_constant_alignment (exp
, align
)
10504 if (TREE_CODE (exp
) == REAL_CST
)
10506 if (TYPE_MODE (TREE_TYPE (exp
)) == DFmode
&& align
< 64)
10508 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp
))) && align
< 128)
10511 else if (TREE_CODE (exp
) == STRING_CST
&& TREE_STRING_LENGTH (exp
) >= 31
10518 /* Compute the alignment for a static variable.
10519 TYPE is the data type, and ALIGN is the alignment that
10520 the object would ordinarily have. The value of this function is used
10521 instead of that alignment to align the object. */
10524 ix86_data_alignment (type
, align
)
10528 if (AGGREGATE_TYPE_P (type
)
10529 && TYPE_SIZE (type
)
10530 && TREE_CODE (TYPE_SIZE (type
)) == INTEGER_CST
10531 && (TREE_INT_CST_LOW (TYPE_SIZE (type
)) >= 256
10532 || TREE_INT_CST_HIGH (TYPE_SIZE (type
))) && align
< 256)
10535 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
10536 to 16byte boundary. */
10539 if (AGGREGATE_TYPE_P (type
)
10540 && TYPE_SIZE (type
)
10541 && TREE_CODE (TYPE_SIZE (type
)) == INTEGER_CST
10542 && (TREE_INT_CST_LOW (TYPE_SIZE (type
)) >= 128
10543 || TREE_INT_CST_HIGH (TYPE_SIZE (type
))) && align
< 128)
10547 if (TREE_CODE (type
) == ARRAY_TYPE
)
10549 if (TYPE_MODE (TREE_TYPE (type
)) == DFmode
&& align
< 64)
10551 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type
))) && align
< 128)
10554 else if (TREE_CODE (type
) == COMPLEX_TYPE
)
10557 if (TYPE_MODE (type
) == DCmode
&& align
< 64)
10559 if (TYPE_MODE (type
) == XCmode
&& align
< 128)
10562 else if ((TREE_CODE (type
) == RECORD_TYPE
10563 || TREE_CODE (type
) == UNION_TYPE
10564 || TREE_CODE (type
) == QUAL_UNION_TYPE
)
10565 && TYPE_FIELDS (type
))
10567 if (DECL_MODE (TYPE_FIELDS (type
)) == DFmode
&& align
< 64)
10569 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type
))) && align
< 128)
10572 else if (TREE_CODE (type
) == REAL_TYPE
|| TREE_CODE (type
) == VECTOR_TYPE
10573 || TREE_CODE (type
) == INTEGER_TYPE
)
10575 if (TYPE_MODE (type
) == DFmode
&& align
< 64)
10577 if (ALIGN_MODE_128 (TYPE_MODE (type
)) && align
< 128)
10584 /* Compute the alignment for a local variable.
10585 TYPE is the data type, and ALIGN is the alignment that
10586 the object would ordinarily have. The value of this macro is used
10587 instead of that alignment to align the object. */
10590 ix86_local_alignment (type
, align
)
10594 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
10595 to 16byte boundary. */
10598 if (AGGREGATE_TYPE_P (type
)
10599 && TYPE_SIZE (type
)
10600 && TREE_CODE (TYPE_SIZE (type
)) == INTEGER_CST
10601 && (TREE_INT_CST_LOW (TYPE_SIZE (type
)) >= 16
10602 || TREE_INT_CST_HIGH (TYPE_SIZE (type
))) && align
< 128)
10605 if (TREE_CODE (type
) == ARRAY_TYPE
)
10607 if (TYPE_MODE (TREE_TYPE (type
)) == DFmode
&& align
< 64)
10609 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type
))) && align
< 128)
10612 else if (TREE_CODE (type
) == COMPLEX_TYPE
)
10614 if (TYPE_MODE (type
) == DCmode
&& align
< 64)
10616 if (TYPE_MODE (type
) == XCmode
&& align
< 128)
10619 else if ((TREE_CODE (type
) == RECORD_TYPE
10620 || TREE_CODE (type
) == UNION_TYPE
10621 || TREE_CODE (type
) == QUAL_UNION_TYPE
)
10622 && TYPE_FIELDS (type
))
10624 if (DECL_MODE (TYPE_FIELDS (type
)) == DFmode
&& align
< 64)
10626 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type
))) && align
< 128)
10629 else if (TREE_CODE (type
) == REAL_TYPE
|| TREE_CODE (type
) == VECTOR_TYPE
10630 || TREE_CODE (type
) == INTEGER_TYPE
)
10633 if (TYPE_MODE (type
) == DFmode
&& align
< 64)
10635 if (ALIGN_MODE_128 (TYPE_MODE (type
)) && align
< 128)
10641 /* Emit RTL insns to initialize the variable parts of a trampoline.
10642 FNADDR is an RTX for the address of the function's pure code.
10643 CXT is an RTX for the static chain value for the function. */
10645 x86_initialize_trampoline (tramp
, fnaddr
, cxt
)
10646 rtx tramp
, fnaddr
, cxt
;
10650 /* Compute offset from the end of the jmp to the target function. */
10651 rtx disp
= expand_binop (SImode
, sub_optab
, fnaddr
,
10652 plus_constant (tramp
, 10),
10653 NULL_RTX
, 1, OPTAB_DIRECT
);
10654 emit_move_insn (gen_rtx_MEM (QImode
, tramp
),
10655 GEN_INT (trunc_int_for_mode (0xb9, QImode
)));
10656 emit_move_insn (gen_rtx_MEM (SImode
, plus_constant (tramp
, 1)), cxt
);
10657 emit_move_insn (gen_rtx_MEM (QImode
, plus_constant (tramp
, 5)),
10658 GEN_INT (trunc_int_for_mode (0xe9, QImode
)));
10659 emit_move_insn (gen_rtx_MEM (SImode
, plus_constant (tramp
, 6)), disp
);
10664 /* Try to load address using shorter movl instead of movabs.
10665 We may want to support movq for kernel mode, but kernel does not use
10666 trampolines at the moment. */
10667 if (x86_64_zero_extended_value (fnaddr
))
10669 fnaddr
= copy_to_mode_reg (DImode
, fnaddr
);
10670 emit_move_insn (gen_rtx_MEM (HImode
, plus_constant (tramp
, offset
)),
10671 GEN_INT (trunc_int_for_mode (0xbb41, HImode
)));
10672 emit_move_insn (gen_rtx_MEM (SImode
, plus_constant (tramp
, offset
+ 2)),
10673 gen_lowpart (SImode
, fnaddr
));
10678 emit_move_insn (gen_rtx_MEM (HImode
, plus_constant (tramp
, offset
)),
10679 GEN_INT (trunc_int_for_mode (0xbb49, HImode
)));
10680 emit_move_insn (gen_rtx_MEM (DImode
, plus_constant (tramp
, offset
+ 2)),
10684 /* Load static chain using movabs to r10. */
10685 emit_move_insn (gen_rtx_MEM (HImode
, plus_constant (tramp
, offset
)),
10686 GEN_INT (trunc_int_for_mode (0xba49, HImode
)));
10687 emit_move_insn (gen_rtx_MEM (DImode
, plus_constant (tramp
, offset
+ 2)),
10690 /* Jump to the r11 */
10691 emit_move_insn (gen_rtx_MEM (HImode
, plus_constant (tramp
, offset
)),
10692 GEN_INT (trunc_int_for_mode (0xff49, HImode
)));
10693 emit_move_insn (gen_rtx_MEM (QImode
, plus_constant (tramp
, offset
+2)),
10694 GEN_INT (trunc_int_for_mode (0xe3, HImode
)));
10696 if (offset
> TRAMPOLINE_SIZE
)
10701 #define def_builtin(MASK, NAME, TYPE, CODE) \
10703 if ((MASK) & target_flags) \
10704 builtin_function ((NAME), (TYPE), (CODE), BUILT_IN_MD, NULL); \
10707 struct builtin_description
10710 enum insn_code icode
;
10712 enum ix86_builtins code
;
10713 enum rtx_code comparison
;
10717 static struct builtin_description bdesc_comi
[] =
10719 { MASK_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS
, EQ
, 0 },
10720 { MASK_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS
, LT
, 0 },
10721 { MASK_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS
, LE
, 0 },
10722 { MASK_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS
, LT
, 1 },
10723 { MASK_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS
, LE
, 1 },
10724 { MASK_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS
, NE
, 0 },
10725 { MASK_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS
, EQ
, 0 },
10726 { MASK_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS
, LT
, 0 },
10727 { MASK_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS
, LE
, 0 },
10728 { MASK_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS
, LT
, 1 },
10729 { MASK_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS
, LE
, 1 },
10730 { MASK_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS
, NE
, 0 }
10733 static struct builtin_description bdesc_2arg
[] =
10736 { MASK_SSE
, CODE_FOR_addv4sf3
, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS
, 0, 0 },
10737 { MASK_SSE
, CODE_FOR_subv4sf3
, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS
, 0, 0 },
10738 { MASK_SSE
, CODE_FOR_mulv4sf3
, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS
, 0, 0 },
10739 { MASK_SSE
, CODE_FOR_divv4sf3
, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS
, 0, 0 },
10740 { MASK_SSE
, CODE_FOR_vmaddv4sf3
, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS
, 0, 0 },
10741 { MASK_SSE
, CODE_FOR_vmsubv4sf3
, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS
, 0, 0 },
10742 { MASK_SSE
, CODE_FOR_vmmulv4sf3
, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS
, 0, 0 },
10743 { MASK_SSE
, CODE_FOR_vmdivv4sf3
, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS
, 0, 0 },
10745 { MASK_SSE
, CODE_FOR_maskcmpv4sf3
, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS
, EQ
, 0 },
10746 { MASK_SSE
, CODE_FOR_maskcmpv4sf3
, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS
, LT
, 0 },
10747 { MASK_SSE
, CODE_FOR_maskcmpv4sf3
, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS
, LE
, 0 },
10748 { MASK_SSE
, CODE_FOR_maskcmpv4sf3
, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS
, LT
, 1 },
10749 { MASK_SSE
, CODE_FOR_maskcmpv4sf3
, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS
, LE
, 1 },
10750 { MASK_SSE
, CODE_FOR_maskcmpv4sf3
, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS
, UNORDERED
, 0 },
10751 { MASK_SSE
, CODE_FOR_maskncmpv4sf3
, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS
, EQ
, 0 },
10752 { MASK_SSE
, CODE_FOR_maskncmpv4sf3
, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS
, LT
, 0 },
10753 { MASK_SSE
, CODE_FOR_maskncmpv4sf3
, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS
, LE
, 0 },
10754 { MASK_SSE
, CODE_FOR_maskncmpv4sf3
, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS
, LT
, 1 },
10755 { MASK_SSE
, CODE_FOR_maskncmpv4sf3
, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS
, LE
, 1 },
10756 { MASK_SSE
, CODE_FOR_maskncmpv4sf3
, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS
, UNORDERED
, 0 },
10757 { MASK_SSE
, CODE_FOR_vmmaskcmpv4sf3
, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS
, EQ
, 0 },
10758 { MASK_SSE
, CODE_FOR_vmmaskcmpv4sf3
, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS
, LT
, 0 },
10759 { MASK_SSE
, CODE_FOR_vmmaskcmpv4sf3
, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS
, LE
, 0 },
10760 { MASK_SSE
, CODE_FOR_vmmaskcmpv4sf3
, "__builtin_ia32_cmpgtss", IX86_BUILTIN_CMPGTSS
, LT
, 1 },
10761 { MASK_SSE
, CODE_FOR_vmmaskcmpv4sf3
, "__builtin_ia32_cmpgess", IX86_BUILTIN_CMPGESS
, LE
, 1 },
10762 { MASK_SSE
, CODE_FOR_vmmaskcmpv4sf3
, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS
, UNORDERED
, 0 },
10763 { MASK_SSE
, CODE_FOR_vmmaskncmpv4sf3
, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS
, EQ
, 0 },
10764 { MASK_SSE
, CODE_FOR_vmmaskncmpv4sf3
, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS
, LT
, 0 },
10765 { MASK_SSE
, CODE_FOR_vmmaskncmpv4sf3
, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS
, LE
, 0 },
10766 { MASK_SSE
, CODE_FOR_vmmaskncmpv4sf3
, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS
, LT
, 1 },
10767 { MASK_SSE
, CODE_FOR_vmmaskncmpv4sf3
, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS
, LE
, 1 },
10768 { MASK_SSE
, CODE_FOR_vmmaskncmpv4sf3
, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS
, UNORDERED
, 0 },
10770 { MASK_SSE
, CODE_FOR_sminv4sf3
, "__builtin_ia32_minps", IX86_BUILTIN_MINPS
, 0, 0 },
10771 { MASK_SSE
, CODE_FOR_smaxv4sf3
, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS
, 0, 0 },
10772 { MASK_SSE
, CODE_FOR_vmsminv4sf3
, "__builtin_ia32_minss", IX86_BUILTIN_MINSS
, 0, 0 },
10773 { MASK_SSE
, CODE_FOR_vmsmaxv4sf3
, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS
, 0, 0 },
10775 { MASK_SSE
, CODE_FOR_sse_andti3
, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS
, 0, 0 },
10776 { MASK_SSE
, CODE_FOR_sse_nandti3
, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS
, 0, 0 },
10777 { MASK_SSE
, CODE_FOR_sse_iorti3
, "__builtin_ia32_orps", IX86_BUILTIN_ORPS
, 0, 0 },
10778 { MASK_SSE
, CODE_FOR_sse_xorti3
, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS
, 0, 0 },
10780 { MASK_SSE
, CODE_FOR_sse_movss
, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS
, 0, 0 },
10781 { MASK_SSE
, CODE_FOR_sse_movhlps
, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS
, 0, 0 },
10782 { MASK_SSE
, CODE_FOR_sse_movlhps
, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS
, 0, 0 },
10783 { MASK_SSE
, CODE_FOR_sse_unpckhps
, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS
, 0, 0 },
10784 { MASK_SSE
, CODE_FOR_sse_unpcklps
, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS
, 0, 0 },
10787 { MASK_MMX
, CODE_FOR_addv8qi3
, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB
, 0, 0 },
10788 { MASK_MMX
, CODE_FOR_addv4hi3
, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW
, 0, 0 },
10789 { MASK_MMX
, CODE_FOR_addv2si3
, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD
, 0, 0 },
10790 { MASK_MMX
, CODE_FOR_subv8qi3
, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB
, 0, 0 },
10791 { MASK_MMX
, CODE_FOR_subv4hi3
, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW
, 0, 0 },
10792 { MASK_MMX
, CODE_FOR_subv2si3
, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD
, 0, 0 },
10794 { MASK_MMX
, CODE_FOR_ssaddv8qi3
, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB
, 0, 0 },
10795 { MASK_MMX
, CODE_FOR_ssaddv4hi3
, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW
, 0, 0 },
10796 { MASK_MMX
, CODE_FOR_sssubv8qi3
, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB
, 0, 0 },
10797 { MASK_MMX
, CODE_FOR_sssubv4hi3
, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW
, 0, 0 },
10798 { MASK_MMX
, CODE_FOR_usaddv8qi3
, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB
, 0, 0 },
10799 { MASK_MMX
, CODE_FOR_usaddv4hi3
, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW
, 0, 0 },
10800 { MASK_MMX
, CODE_FOR_ussubv8qi3
, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB
, 0, 0 },
10801 { MASK_MMX
, CODE_FOR_ussubv4hi3
, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW
, 0, 0 },
10803 { MASK_MMX
, CODE_FOR_mulv4hi3
, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW
, 0, 0 },
10804 { MASK_MMX
, CODE_FOR_smulv4hi3_highpart
, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW
, 0, 0 },
10805 { MASK_SSE
| MASK_3DNOW_A
, CODE_FOR_umulv4hi3_highpart
, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW
, 0, 0 },
10807 { MASK_MMX
, CODE_FOR_mmx_anddi3
, "__builtin_ia32_pand", IX86_BUILTIN_PAND
, 0, 0 },
10808 { MASK_MMX
, CODE_FOR_mmx_nanddi3
, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN
, 0, 0 },
10809 { MASK_MMX
, CODE_FOR_mmx_iordi3
, "__builtin_ia32_por", IX86_BUILTIN_POR
, 0, 0 },
10810 { MASK_MMX
, CODE_FOR_mmx_xordi3
, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR
, 0, 0 },
10812 { MASK_SSE
| MASK_3DNOW_A
, CODE_FOR_mmx_uavgv8qi3
, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB
, 0, 0 },
10813 { MASK_SSE
| MASK_3DNOW_A
, CODE_FOR_mmx_uavgv4hi3
, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW
, 0, 0 },
10815 { MASK_MMX
, CODE_FOR_eqv8qi3
, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB
, 0, 0 },
10816 { MASK_MMX
, CODE_FOR_eqv4hi3
, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW
, 0, 0 },
10817 { MASK_MMX
, CODE_FOR_eqv2si3
, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD
, 0, 0 },
10818 { MASK_MMX
, CODE_FOR_gtv8qi3
, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB
, 0, 0 },
10819 { MASK_MMX
, CODE_FOR_gtv4hi3
, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW
, 0, 0 },
10820 { MASK_MMX
, CODE_FOR_gtv2si3
, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD
, 0, 0 },
10822 { MASK_SSE
| MASK_3DNOW_A
, CODE_FOR_umaxv8qi3
, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB
, 0, 0 },
10823 { MASK_SSE
| MASK_3DNOW_A
, CODE_FOR_smaxv4hi3
, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW
, 0, 0 },
10824 { MASK_SSE
| MASK_3DNOW_A
, CODE_FOR_uminv8qi3
, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB
, 0, 0 },
10825 { MASK_SSE
| MASK_3DNOW_A
, CODE_FOR_sminv4hi3
, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW
, 0, 0 },
10827 { MASK_MMX
, CODE_FOR_mmx_punpckhbw
, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW
, 0, 0 },
10828 { MASK_MMX
, CODE_FOR_mmx_punpckhwd
, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD
, 0, 0 },
10829 { MASK_MMX
, CODE_FOR_mmx_punpckhdq
, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ
, 0, 0 },
10830 { MASK_MMX
, CODE_FOR_mmx_punpcklbw
, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW
, 0, 0 },
10831 { MASK_MMX
, CODE_FOR_mmx_punpcklwd
, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD
, 0, 0 },
10832 { MASK_MMX
, CODE_FOR_mmx_punpckldq
, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ
, 0, 0 },
10835 { MASK_MMX
, CODE_FOR_mmx_packsswb
, 0, IX86_BUILTIN_PACKSSWB
, 0, 0 },
10836 { MASK_MMX
, CODE_FOR_mmx_packssdw
, 0, IX86_BUILTIN_PACKSSDW
, 0, 0 },
10837 { MASK_MMX
, CODE_FOR_mmx_packuswb
, 0, IX86_BUILTIN_PACKUSWB
, 0, 0 },
10839 { MASK_SSE
, CODE_FOR_cvtpi2ps
, 0, IX86_BUILTIN_CVTPI2PS
, 0, 0 },
10840 { MASK_SSE
, CODE_FOR_cvtsi2ss
, 0, IX86_BUILTIN_CVTSI2SS
, 0, 0 },
10842 { MASK_MMX
, CODE_FOR_ashlv4hi3
, 0, IX86_BUILTIN_PSLLW
, 0, 0 },
10843 { MASK_MMX
, CODE_FOR_ashlv4hi3
, 0, IX86_BUILTIN_PSLLWI
, 0, 0 },
10844 { MASK_MMX
, CODE_FOR_ashlv2si3
, 0, IX86_BUILTIN_PSLLD
, 0, 0 },
10845 { MASK_MMX
, CODE_FOR_ashlv2si3
, 0, IX86_BUILTIN_PSLLDI
, 0, 0 },
10846 { MASK_MMX
, CODE_FOR_mmx_ashldi3
, 0, IX86_BUILTIN_PSLLQ
, 0, 0 },
10847 { MASK_MMX
, CODE_FOR_mmx_ashldi3
, 0, IX86_BUILTIN_PSLLQI
, 0, 0 },
10849 { MASK_MMX
, CODE_FOR_lshrv4hi3
, 0, IX86_BUILTIN_PSRLW
, 0, 0 },
10850 { MASK_MMX
, CODE_FOR_lshrv4hi3
, 0, IX86_BUILTIN_PSRLWI
, 0, 0 },
10851 { MASK_MMX
, CODE_FOR_lshrv2si3
, 0, IX86_BUILTIN_PSRLD
, 0, 0 },
10852 { MASK_MMX
, CODE_FOR_lshrv2si3
, 0, IX86_BUILTIN_PSRLDI
, 0, 0 },
10853 { MASK_MMX
, CODE_FOR_mmx_lshrdi3
, 0, IX86_BUILTIN_PSRLQ
, 0, 0 },
10854 { MASK_MMX
, CODE_FOR_mmx_lshrdi3
, 0, IX86_BUILTIN_PSRLQI
, 0, 0 },
10856 { MASK_MMX
, CODE_FOR_ashrv4hi3
, 0, IX86_BUILTIN_PSRAW
, 0, 0 },
10857 { MASK_MMX
, CODE_FOR_ashrv4hi3
, 0, IX86_BUILTIN_PSRAWI
, 0, 0 },
10858 { MASK_MMX
, CODE_FOR_ashrv2si3
, 0, IX86_BUILTIN_PSRAD
, 0, 0 },
10859 { MASK_MMX
, CODE_FOR_ashrv2si3
, 0, IX86_BUILTIN_PSRADI
, 0, 0 },
10861 { MASK_SSE
, CODE_FOR_mmx_psadbw
, 0, IX86_BUILTIN_PSADBW
, 0, 0 },
10862 { MASK_MMX
, CODE_FOR_mmx_pmaddwd
, 0, IX86_BUILTIN_PMADDWD
, 0, 0 }
10866 static struct builtin_description bdesc_1arg
[] =
10868 { MASK_SSE
| MASK_3DNOW_A
, CODE_FOR_mmx_pmovmskb
, 0, IX86_BUILTIN_PMOVMSKB
, 0, 0 },
10869 { MASK_SSE
, CODE_FOR_sse_movmskps
, 0, IX86_BUILTIN_MOVMSKPS
, 0, 0 },
10871 { MASK_SSE
, CODE_FOR_sqrtv4sf2
, 0, IX86_BUILTIN_SQRTPS
, 0, 0 },
10872 { MASK_SSE
, CODE_FOR_rsqrtv4sf2
, 0, IX86_BUILTIN_RSQRTPS
, 0, 0 },
10873 { MASK_SSE
, CODE_FOR_rcpv4sf2
, 0, IX86_BUILTIN_RCPPS
, 0, 0 },
10875 { MASK_SSE
, CODE_FOR_cvtps2pi
, 0, IX86_BUILTIN_CVTPS2PI
, 0, 0 },
10876 { MASK_SSE
, CODE_FOR_cvtss2si
, 0, IX86_BUILTIN_CVTSS2SI
, 0, 0 },
10877 { MASK_SSE
, CODE_FOR_cvttps2pi
, 0, IX86_BUILTIN_CVTTPS2PI
, 0, 0 },
10878 { MASK_SSE
, CODE_FOR_cvttss2si
, 0, IX86_BUILTIN_CVTTSS2SI
, 0, 0 }
10883 ix86_init_builtins ()
10886 ix86_init_mmx_sse_builtins ();
10889 /* Set up all the MMX/SSE builtins. This is not called if TARGET_MMX
10890 is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX
10893 ix86_init_mmx_sse_builtins ()
10895 struct builtin_description
* d
;
10897 tree endlink
= void_list_node
;
10899 tree pchar_type_node
= build_pointer_type (char_type_node
);
10900 tree pfloat_type_node
= build_pointer_type (float_type_node
);
10901 tree pv2si_type_node
= build_pointer_type (V2SI_type_node
);
10902 tree pdi_type_node
= build_pointer_type (long_long_unsigned_type_node
);
10905 tree int_ftype_v4sf_v4sf
10906 = build_function_type (integer_type_node
,
10907 tree_cons (NULL_TREE
, V4SF_type_node
,
10908 tree_cons (NULL_TREE
,
10911 tree v4si_ftype_v4sf_v4sf
10912 = build_function_type (V4SI_type_node
,
10913 tree_cons (NULL_TREE
, V4SF_type_node
,
10914 tree_cons (NULL_TREE
,
10917 /* MMX/SSE/integer conversions. */
10918 tree int_ftype_v4sf
10919 = build_function_type (integer_type_node
,
10920 tree_cons (NULL_TREE
, V4SF_type_node
,
10922 tree int_ftype_v8qi
10923 = build_function_type (integer_type_node
,
10924 tree_cons (NULL_TREE
, V8QI_type_node
,
10926 tree int_ftype_v2si
10927 = build_function_type (integer_type_node
,
10928 tree_cons (NULL_TREE
, V2SI_type_node
,
10930 tree v2si_ftype_int
10931 = build_function_type (V2SI_type_node
,
10932 tree_cons (NULL_TREE
, integer_type_node
,
10934 tree v4sf_ftype_v4sf_int
10935 = build_function_type (V4SF_type_node
,
10936 tree_cons (NULL_TREE
, V4SF_type_node
,
10937 tree_cons (NULL_TREE
, integer_type_node
,
10939 tree v4sf_ftype_v4sf_v2si
10940 = build_function_type (V4SF_type_node
,
10941 tree_cons (NULL_TREE
, V4SF_type_node
,
10942 tree_cons (NULL_TREE
, V2SI_type_node
,
10944 tree int_ftype_v4hi_int
10945 = build_function_type (integer_type_node
,
10946 tree_cons (NULL_TREE
, V4HI_type_node
,
10947 tree_cons (NULL_TREE
, integer_type_node
,
10949 tree v4hi_ftype_v4hi_int_int
10950 = build_function_type (V4HI_type_node
,
10951 tree_cons (NULL_TREE
, V4HI_type_node
,
10952 tree_cons (NULL_TREE
, integer_type_node
,
10953 tree_cons (NULL_TREE
,
10956 /* Miscellaneous. */
10957 tree v8qi_ftype_v4hi_v4hi
10958 = build_function_type (V8QI_type_node
,
10959 tree_cons (NULL_TREE
, V4HI_type_node
,
10960 tree_cons (NULL_TREE
, V4HI_type_node
,
10962 tree v4hi_ftype_v2si_v2si
10963 = build_function_type (V4HI_type_node
,
10964 tree_cons (NULL_TREE
, V2SI_type_node
,
10965 tree_cons (NULL_TREE
, V2SI_type_node
,
10967 tree v4sf_ftype_v4sf_v4sf_int
10968 = build_function_type (V4SF_type_node
,
10969 tree_cons (NULL_TREE
, V4SF_type_node
,
10970 tree_cons (NULL_TREE
, V4SF_type_node
,
10971 tree_cons (NULL_TREE
,
10974 tree v4hi_ftype_v8qi_v8qi
10975 = build_function_type (V4HI_type_node
,
10976 tree_cons (NULL_TREE
, V8QI_type_node
,
10977 tree_cons (NULL_TREE
, V8QI_type_node
,
10979 tree v2si_ftype_v4hi_v4hi
10980 = build_function_type (V2SI_type_node
,
10981 tree_cons (NULL_TREE
, V4HI_type_node
,
10982 tree_cons (NULL_TREE
, V4HI_type_node
,
10984 tree v4hi_ftype_v4hi_int
10985 = build_function_type (V4HI_type_node
,
10986 tree_cons (NULL_TREE
, V4HI_type_node
,
10987 tree_cons (NULL_TREE
, integer_type_node
,
10989 tree v4hi_ftype_v4hi_di
10990 = build_function_type (V4HI_type_node
,
10991 tree_cons (NULL_TREE
, V4HI_type_node
,
10992 tree_cons (NULL_TREE
,
10993 long_long_integer_type_node
,
10995 tree v2si_ftype_v2si_di
10996 = build_function_type (V2SI_type_node
,
10997 tree_cons (NULL_TREE
, V2SI_type_node
,
10998 tree_cons (NULL_TREE
,
10999 long_long_integer_type_node
,
11001 tree void_ftype_void
11002 = build_function_type (void_type_node
, endlink
);
11003 tree void_ftype_pchar_int
11004 = build_function_type (void_type_node
,
11005 tree_cons (NULL_TREE
, pchar_type_node
,
11006 tree_cons (NULL_TREE
, integer_type_node
,
11008 tree void_ftype_unsigned
11009 = build_function_type (void_type_node
,
11010 tree_cons (NULL_TREE
, unsigned_type_node
,
11012 tree unsigned_ftype_void
11013 = build_function_type (unsigned_type_node
, endlink
);
11015 = build_function_type (long_long_unsigned_type_node
, endlink
);
11017 = build_function_type (intTI_type_node
, endlink
);
11018 tree v2si_ftype_v4sf
11019 = build_function_type (V2SI_type_node
,
11020 tree_cons (NULL_TREE
, V4SF_type_node
,
11022 /* Loads/stores. */
11023 tree maskmovq_args
= tree_cons (NULL_TREE
, V8QI_type_node
,
11024 tree_cons (NULL_TREE
, V8QI_type_node
,
11025 tree_cons (NULL_TREE
,
11028 tree void_ftype_v8qi_v8qi_pchar
11029 = build_function_type (void_type_node
, maskmovq_args
);
11030 tree v4sf_ftype_pfloat
11031 = build_function_type (V4SF_type_node
,
11032 tree_cons (NULL_TREE
, pfloat_type_node
,
11034 tree v4sf_ftype_float
11035 = build_function_type (V4SF_type_node
,
11036 tree_cons (NULL_TREE
, float_type_node
,
11038 tree v4sf_ftype_float_float_float_float
11039 = build_function_type (V4SF_type_node
,
11040 tree_cons (NULL_TREE
, float_type_node
,
11041 tree_cons (NULL_TREE
, float_type_node
,
11042 tree_cons (NULL_TREE
,
11044 tree_cons (NULL_TREE
,
11047 /* @@@ the type is bogus */
11048 tree v4sf_ftype_v4sf_pv2si
11049 = build_function_type (V4SF_type_node
,
11050 tree_cons (NULL_TREE
, V4SF_type_node
,
11051 tree_cons (NULL_TREE
, pv2si_type_node
,
11053 tree void_ftype_pv2si_v4sf
11054 = build_function_type (void_type_node
,
11055 tree_cons (NULL_TREE
, pv2si_type_node
,
11056 tree_cons (NULL_TREE
, V4SF_type_node
,
11058 tree void_ftype_pfloat_v4sf
11059 = build_function_type (void_type_node
,
11060 tree_cons (NULL_TREE
, pfloat_type_node
,
11061 tree_cons (NULL_TREE
, V4SF_type_node
,
11063 tree void_ftype_pdi_di
11064 = build_function_type (void_type_node
,
11065 tree_cons (NULL_TREE
, pdi_type_node
,
11066 tree_cons (NULL_TREE
,
11067 long_long_unsigned_type_node
,
11069 /* Normal vector unops. */
11070 tree v4sf_ftype_v4sf
11071 = build_function_type (V4SF_type_node
,
11072 tree_cons (NULL_TREE
, V4SF_type_node
,
11075 /* Normal vector binops. */
11076 tree v4sf_ftype_v4sf_v4sf
11077 = build_function_type (V4SF_type_node
,
11078 tree_cons (NULL_TREE
, V4SF_type_node
,
11079 tree_cons (NULL_TREE
, V4SF_type_node
,
11081 tree v8qi_ftype_v8qi_v8qi
11082 = build_function_type (V8QI_type_node
,
11083 tree_cons (NULL_TREE
, V8QI_type_node
,
11084 tree_cons (NULL_TREE
, V8QI_type_node
,
11086 tree v4hi_ftype_v4hi_v4hi
11087 = build_function_type (V4HI_type_node
,
11088 tree_cons (NULL_TREE
, V4HI_type_node
,
11089 tree_cons (NULL_TREE
, V4HI_type_node
,
11091 tree v2si_ftype_v2si_v2si
11092 = build_function_type (V2SI_type_node
,
11093 tree_cons (NULL_TREE
, V2SI_type_node
,
11094 tree_cons (NULL_TREE
, V2SI_type_node
,
11096 tree ti_ftype_ti_ti
11097 = build_function_type (intTI_type_node
,
11098 tree_cons (NULL_TREE
, intTI_type_node
,
11099 tree_cons (NULL_TREE
, intTI_type_node
,
11101 tree di_ftype_di_di
11102 = build_function_type (long_long_unsigned_type_node
,
11103 tree_cons (NULL_TREE
, long_long_unsigned_type_node
,
11104 tree_cons (NULL_TREE
,
11105 long_long_unsigned_type_node
,
11108 tree v2si_ftype_v2sf
11109 = build_function_type (V2SI_type_node
,
11110 tree_cons (NULL_TREE
, V2SF_type_node
,
11112 tree v2sf_ftype_v2si
11113 = build_function_type (V2SF_type_node
,
11114 tree_cons (NULL_TREE
, V2SI_type_node
,
11116 tree v2si_ftype_v2si
11117 = build_function_type (V2SI_type_node
,
11118 tree_cons (NULL_TREE
, V2SI_type_node
,
11120 tree v2sf_ftype_v2sf
11121 = build_function_type (V2SF_type_node
,
11122 tree_cons (NULL_TREE
, V2SF_type_node
,
11124 tree v2sf_ftype_v2sf_v2sf
11125 = build_function_type (V2SF_type_node
,
11126 tree_cons (NULL_TREE
, V2SF_type_node
,
11127 tree_cons (NULL_TREE
,
11130 tree v2si_ftype_v2sf_v2sf
11131 = build_function_type (V2SI_type_node
,
11132 tree_cons (NULL_TREE
, V2SF_type_node
,
11133 tree_cons (NULL_TREE
,
11137 tree void_ftype_pchar
11138 = build_function_type (void_type_node
,
11139 tree_cons (NULL_TREE
, pchar_type_node
,
11142 /* Add all builtins that are more or less simple operations on two
11144 for (i
= 0, d
= bdesc_2arg
; i
< sizeof (bdesc_2arg
) / sizeof *d
; i
++, d
++)
11146 /* Use one of the operands; the target can have a different mode for
11147 mask-generating compares. */
11148 enum machine_mode mode
;
11153 mode
= insn_data
[d
->icode
].operand
[1].mode
;
11158 type
= v4sf_ftype_v4sf_v4sf
;
11161 type
= v8qi_ftype_v8qi_v8qi
;
11164 type
= v4hi_ftype_v4hi_v4hi
;
11167 type
= v2si_ftype_v2si_v2si
;
11170 type
= ti_ftype_ti_ti
;
11173 type
= di_ftype_di_di
;
11180 /* Override for comparisons. */
11181 if (d
->icode
== CODE_FOR_maskcmpv4sf3
11182 || d
->icode
== CODE_FOR_maskncmpv4sf3
11183 || d
->icode
== CODE_FOR_vmmaskcmpv4sf3
11184 || d
->icode
== CODE_FOR_vmmaskncmpv4sf3
)
11185 type
= v4si_ftype_v4sf_v4sf
;
11187 def_builtin (d
->mask
, d
->name
, type
, d
->code
);
11190 /* Add the remaining MMX insns with somewhat more complicated types. */
11191 def_builtin (MASK_MMX
, "__builtin_ia32_m_from_int", v2si_ftype_int
, IX86_BUILTIN_M_FROM_INT
);
11192 def_builtin (MASK_MMX
, "__builtin_ia32_m_to_int", int_ftype_v2si
, IX86_BUILTIN_M_TO_INT
);
11193 def_builtin (MASK_MMX
, "__builtin_ia32_mmx_zero", di_ftype_void
, IX86_BUILTIN_MMX_ZERO
);
11194 def_builtin (MASK_MMX
, "__builtin_ia32_emms", void_ftype_void
, IX86_BUILTIN_EMMS
);
11195 def_builtin (MASK_MMX
, "__builtin_ia32_ldmxcsr", void_ftype_unsigned
, IX86_BUILTIN_LDMXCSR
);
11196 def_builtin (MASK_MMX
, "__builtin_ia32_stmxcsr", unsigned_ftype_void
, IX86_BUILTIN_STMXCSR
);
11197 def_builtin (MASK_MMX
, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di
, IX86_BUILTIN_PSLLW
);
11198 def_builtin (MASK_MMX
, "__builtin_ia32_pslld", v2si_ftype_v2si_di
, IX86_BUILTIN_PSLLD
);
11199 def_builtin (MASK_MMX
, "__builtin_ia32_psllq", di_ftype_di_di
, IX86_BUILTIN_PSLLQ
);
11201 def_builtin (MASK_MMX
, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_di
, IX86_BUILTIN_PSRLW
);
11202 def_builtin (MASK_MMX
, "__builtin_ia32_psrld", v2si_ftype_v2si_di
, IX86_BUILTIN_PSRLD
);
11203 def_builtin (MASK_MMX
, "__builtin_ia32_psrlq", di_ftype_di_di
, IX86_BUILTIN_PSRLQ
);
11205 def_builtin (MASK_MMX
, "__builtin_ia32_psraw", v4hi_ftype_v4hi_di
, IX86_BUILTIN_PSRAW
);
11206 def_builtin (MASK_MMX
, "__builtin_ia32_psrad", v2si_ftype_v2si_di
, IX86_BUILTIN_PSRAD
);
11208 def_builtin (MASK_MMX
, "__builtin_ia32_pshufw", v4hi_ftype_v4hi_int
, IX86_BUILTIN_PSHUFW
);
11209 def_builtin (MASK_MMX
, "__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi
, IX86_BUILTIN_PMADDWD
);
11211 /* comi/ucomi insns. */
11212 for (i
= 0, d
= bdesc_comi
; i
< sizeof (bdesc_comi
) / sizeof *d
; i
++, d
++)
11213 def_builtin (d
->mask
, d
->name
, int_ftype_v4sf_v4sf
, d
->code
);
11215 def_builtin (MASK_MMX
, "__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi
, IX86_BUILTIN_PACKSSWB
);
11216 def_builtin (MASK_MMX
, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si
, IX86_BUILTIN_PACKSSDW
);
11217 def_builtin (MASK_MMX
, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi
, IX86_BUILTIN_PACKUSWB
);
11219 def_builtin (MASK_SSE
, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si
, IX86_BUILTIN_CVTPI2PS
);
11220 def_builtin (MASK_SSE
, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf
, IX86_BUILTIN_CVTPS2PI
);
11221 def_builtin (MASK_SSE
, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int
, IX86_BUILTIN_CVTSI2SS
);
11222 def_builtin (MASK_SSE
, "__builtin_ia32_cvtss2si", int_ftype_v4sf
, IX86_BUILTIN_CVTSS2SI
);
11223 def_builtin (MASK_SSE
, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf
, IX86_BUILTIN_CVTTPS2PI
);
11224 def_builtin (MASK_SSE
, "__builtin_ia32_cvttss2si", int_ftype_v4sf
, IX86_BUILTIN_CVTTSS2SI
);
11226 def_builtin (MASK_SSE
| MASK_3DNOW_A
, "__builtin_ia32_pextrw", int_ftype_v4hi_int
, IX86_BUILTIN_PEXTRW
);
11227 def_builtin (MASK_SSE
| MASK_3DNOW_A
, "__builtin_ia32_pinsrw", v4hi_ftype_v4hi_int_int
, IX86_BUILTIN_PINSRW
);
11229 def_builtin (MASK_SSE
| MASK_3DNOW_A
, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar
, IX86_BUILTIN_MASKMOVQ
);
11231 def_builtin (MASK_SSE
, "__builtin_ia32_loadaps", v4sf_ftype_pfloat
, IX86_BUILTIN_LOADAPS
);
11232 def_builtin (MASK_SSE
, "__builtin_ia32_loadups", v4sf_ftype_pfloat
, IX86_BUILTIN_LOADUPS
);
11233 def_builtin (MASK_SSE
, "__builtin_ia32_loadss", v4sf_ftype_pfloat
, IX86_BUILTIN_LOADSS
);
11234 def_builtin (MASK_SSE
, "__builtin_ia32_storeaps", void_ftype_pfloat_v4sf
, IX86_BUILTIN_STOREAPS
);
11235 def_builtin (MASK_SSE
, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf
, IX86_BUILTIN_STOREUPS
);
11236 def_builtin (MASK_SSE
, "__builtin_ia32_storess", void_ftype_pfloat_v4sf
, IX86_BUILTIN_STORESS
);
11238 def_builtin (MASK_SSE
, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si
, IX86_BUILTIN_LOADHPS
);
11239 def_builtin (MASK_SSE
, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si
, IX86_BUILTIN_LOADLPS
);
11240 def_builtin (MASK_SSE
, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf
, IX86_BUILTIN_STOREHPS
);
11241 def_builtin (MASK_SSE
, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf
, IX86_BUILTIN_STORELPS
);
11243 def_builtin (MASK_SSE
, "__builtin_ia32_movmskps", int_ftype_v4sf
, IX86_BUILTIN_MOVMSKPS
);
11244 def_builtin (MASK_SSE
| MASK_3DNOW_A
, "__builtin_ia32_pmovmskb", int_ftype_v8qi
, IX86_BUILTIN_PMOVMSKB
);
11245 def_builtin (MASK_SSE
, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf
, IX86_BUILTIN_MOVNTPS
);
11246 def_builtin (MASK_SSE
| MASK_3DNOW_A
, "__builtin_ia32_movntq", void_ftype_pdi_di
, IX86_BUILTIN_MOVNTQ
);
11248 def_builtin (MASK_SSE
| MASK_3DNOW_A
, "__builtin_ia32_sfence", void_ftype_void
, IX86_BUILTIN_SFENCE
);
11249 def_builtin (MASK_SSE
| MASK_3DNOW_A
, "__builtin_ia32_prefetch", void_ftype_pchar_int
, IX86_BUILTIN_PREFETCH
);
11251 def_builtin (MASK_SSE
| MASK_3DNOW_A
, "__builtin_ia32_psadbw", v4hi_ftype_v8qi_v8qi
, IX86_BUILTIN_PSADBW
);
11253 def_builtin (MASK_SSE
, "__builtin_ia32_rcpps", v4sf_ftype_v4sf
, IX86_BUILTIN_RCPPS
);
11254 def_builtin (MASK_SSE
, "__builtin_ia32_rcpss", v4sf_ftype_v4sf
, IX86_BUILTIN_RCPSS
);
11255 def_builtin (MASK_SSE
, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf
, IX86_BUILTIN_RSQRTPS
);
11256 def_builtin (MASK_SSE
, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf
, IX86_BUILTIN_RSQRTSS
);
11257 def_builtin (MASK_SSE
, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf
, IX86_BUILTIN_SQRTPS
);
11258 def_builtin (MASK_SSE
, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf
, IX86_BUILTIN_SQRTSS
);
11260 def_builtin (MASK_SSE
, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int
, IX86_BUILTIN_SHUFPS
);
11262 /* Original 3DNow! */
11263 def_builtin (MASK_3DNOW
, "__builtin_ia32_femms", void_ftype_void
, IX86_BUILTIN_FEMMS
);
11264 def_builtin (MASK_3DNOW
, "__builtin_ia32_pavgusb", v8qi_ftype_v8qi_v8qi
, IX86_BUILTIN_PAVGUSB
);
11265 def_builtin (MASK_3DNOW
, "__builtin_ia32_pf2id", v2si_ftype_v2sf
, IX86_BUILTIN_PF2ID
);
11266 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfacc", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFACC
);
11267 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfadd", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFADD
);
11268 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfcmpeq", v2si_ftype_v2sf_v2sf
, IX86_BUILTIN_PFCMPEQ
);
11269 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfcmpge", v2si_ftype_v2sf_v2sf
, IX86_BUILTIN_PFCMPGE
);
11270 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfcmpgt", v2si_ftype_v2sf_v2sf
, IX86_BUILTIN_PFCMPGT
);
11271 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfmax", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFMAX
);
11272 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfmin", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFMIN
);
11273 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfmul", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFMUL
);
11274 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfrcp", v2sf_ftype_v2sf
, IX86_BUILTIN_PFRCP
);
11275 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfrcpit1", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFRCPIT1
);
11276 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfrcpit2", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFRCPIT2
);
11277 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfrsqrt", v2sf_ftype_v2sf
, IX86_BUILTIN_PFRSQRT
);
11278 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfrsqit1", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFRSQIT1
);
11279 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfsub", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFSUB
);
11280 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFSUBR
);
11281 def_builtin (MASK_3DNOW
, "__builtin_ia32_pi2fd", v2sf_ftype_v2si
, IX86_BUILTIN_PI2FD
);
11282 def_builtin (MASK_3DNOW
, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi
, IX86_BUILTIN_PMULHRW
);
11283 def_builtin (MASK_3DNOW
, "__builtin_ia32_prefetch_3dnow", void_ftype_pchar
, IX86_BUILTIN_PREFETCH_3DNOW
);
11284 def_builtin (MASK_3DNOW
, "__builtin_ia32_prefetchw", void_ftype_pchar
, IX86_BUILTIN_PREFETCHW
);
11286 /* 3DNow! extension as used in the Athlon CPU. */
11287 def_builtin (MASK_3DNOW_A
, "__builtin_ia32_pf2iw", v2si_ftype_v2sf
, IX86_BUILTIN_PF2IW
);
11288 def_builtin (MASK_3DNOW_A
, "__builtin_ia32_pfnacc", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFNACC
);
11289 def_builtin (MASK_3DNOW_A
, "__builtin_ia32_pfpnacc", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFPNACC
);
11290 def_builtin (MASK_3DNOW_A
, "__builtin_ia32_pi2fw", v2sf_ftype_v2si
, IX86_BUILTIN_PI2FW
);
11291 def_builtin (MASK_3DNOW_A
, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf
, IX86_BUILTIN_PSWAPDSF
);
11292 def_builtin (MASK_3DNOW_A
, "__builtin_ia32_pswapdsi", v2si_ftype_v2si
, IX86_BUILTIN_PSWAPDSI
);
11294 /* Composite intrinsics. */
11295 def_builtin (MASK_SSE
, "__builtin_ia32_setps1", v4sf_ftype_float
, IX86_BUILTIN_SETPS1
);
11296 def_builtin (MASK_SSE
, "__builtin_ia32_setps", v4sf_ftype_float_float_float_float
, IX86_BUILTIN_SETPS
);
11297 def_builtin (MASK_SSE
, "__builtin_ia32_setzerops", ti_ftype_void
, IX86_BUILTIN_CLRPS
);
11298 def_builtin (MASK_SSE
, "__builtin_ia32_loadps1", v4sf_ftype_pfloat
, IX86_BUILTIN_LOADPS1
);
11299 def_builtin (MASK_SSE
, "__builtin_ia32_loadrps", v4sf_ftype_pfloat
, IX86_BUILTIN_LOADRPS
);
11300 def_builtin (MASK_SSE
, "__builtin_ia32_storeps1", void_ftype_pfloat_v4sf
, IX86_BUILTIN_STOREPS1
);
11301 def_builtin (MASK_SSE
, "__builtin_ia32_storerps", void_ftype_pfloat_v4sf
, IX86_BUILTIN_STORERPS
);
11304 /* Errors in the source file can cause expand_expr to return const0_rtx
11305 where we expect a vector. To avoid crashing, use one of the vector
11306 clear instructions. */
11308 safe_vector_operand (x
, mode
)
11310 enum machine_mode mode
;
11312 if (x
!= const0_rtx
)
11314 x
= gen_reg_rtx (mode
);
11316 if (VALID_MMX_REG_MODE (mode
) || VALID_MMX_REG_MODE_3DNOW (mode
))
11317 emit_insn (gen_mmx_clrdi (mode
== DImode
? x
11318 : gen_rtx_SUBREG (DImode
, x
, 0)));
11320 emit_insn (gen_sse_clrti (mode
== TImode
? x
11321 : gen_rtx_SUBREG (TImode
, x
, 0)));
11325 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
11328 ix86_expand_binop_builtin (icode
, arglist
, target
)
11329 enum insn_code icode
;
11334 tree arg0
= TREE_VALUE (arglist
);
11335 tree arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
11336 rtx op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
11337 rtx op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
11338 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
11339 enum machine_mode mode0
= insn_data
[icode
].operand
[1].mode
;
11340 enum machine_mode mode1
= insn_data
[icode
].operand
[2].mode
;
11342 if (VECTOR_MODE_P (mode0
))
11343 op0
= safe_vector_operand (op0
, mode0
);
11344 if (VECTOR_MODE_P (mode1
))
11345 op1
= safe_vector_operand (op1
, mode1
);
11348 || GET_MODE (target
) != tmode
11349 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
11350 target
= gen_reg_rtx (tmode
);
11352 /* In case the insn wants input operands in modes different from
11353 the result, abort. */
11354 if (GET_MODE (op0
) != mode0
|| GET_MODE (op1
) != mode1
)
11357 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
11358 op0
= copy_to_mode_reg (mode0
, op0
);
11359 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
11360 op1
= copy_to_mode_reg (mode1
, op1
);
11362 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
11369 /* Subroutine of ix86_expand_builtin to take care of stores. */
11372 ix86_expand_store_builtin (icode
, arglist
, shuffle
)
11373 enum insn_code icode
;
11378 tree arg0
= TREE_VALUE (arglist
);
11379 tree arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
11380 rtx op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
11381 rtx op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
11382 enum machine_mode mode0
= insn_data
[icode
].operand
[0].mode
;
11383 enum machine_mode mode1
= insn_data
[icode
].operand
[1].mode
;
11385 if (VECTOR_MODE_P (mode1
))
11386 op1
= safe_vector_operand (op1
, mode1
);
11388 op0
= gen_rtx_MEM (mode0
, copy_to_mode_reg (Pmode
, op0
));
11389 if (shuffle
>= 0 || ! (*insn_data
[icode
].operand
[1].predicate
) (op1
, mode1
))
11390 op1
= copy_to_mode_reg (mode1
, op1
);
11392 emit_insn (gen_sse_shufps (op1
, op1
, op1
, GEN_INT (shuffle
)));
11393 pat
= GEN_FCN (icode
) (op0
, op1
);
11399 /* Subroutine of ix86_expand_builtin to take care of unop insns. */
11402 ix86_expand_unop_builtin (icode
, arglist
, target
, do_load
)
11403 enum insn_code icode
;
11409 tree arg0
= TREE_VALUE (arglist
);
11410 rtx op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
11411 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
11412 enum machine_mode mode0
= insn_data
[icode
].operand
[1].mode
;
11415 || GET_MODE (target
) != tmode
11416 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
11417 target
= gen_reg_rtx (tmode
);
11419 op0
= gen_rtx_MEM (mode0
, copy_to_mode_reg (Pmode
, op0
));
11422 if (VECTOR_MODE_P (mode0
))
11423 op0
= safe_vector_operand (op0
, mode0
);
11425 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
11426 op0
= copy_to_mode_reg (mode0
, op0
);
11429 pat
= GEN_FCN (icode
) (target
, op0
);
11436 /* Subroutine of ix86_expand_builtin to take care of three special unop insns:
11437 sqrtss, rsqrtss, rcpss. */
11440 ix86_expand_unop1_builtin (icode
, arglist
, target
)
11441 enum insn_code icode
;
11446 tree arg0
= TREE_VALUE (arglist
);
11447 rtx op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
11448 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
11449 enum machine_mode mode0
= insn_data
[icode
].operand
[1].mode
;
11452 || GET_MODE (target
) != tmode
11453 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
11454 target
= gen_reg_rtx (tmode
);
11456 if (VECTOR_MODE_P (mode0
))
11457 op0
= safe_vector_operand (op0
, mode0
);
11459 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
11460 op0
= copy_to_mode_reg (mode0
, op0
);
11462 pat
= GEN_FCN (icode
) (target
, op0
, op0
);
11469 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
11472 ix86_expand_sse_compare (d
, arglist
, target
)
11473 struct builtin_description
*d
;
11478 tree arg0
= TREE_VALUE (arglist
);
11479 tree arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
11480 rtx op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
11481 rtx op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
11483 enum machine_mode tmode
= insn_data
[d
->icode
].operand
[0].mode
;
11484 enum machine_mode mode0
= insn_data
[d
->icode
].operand
[1].mode
;
11485 enum machine_mode mode1
= insn_data
[d
->icode
].operand
[2].mode
;
11486 enum rtx_code comparison
= d
->comparison
;
11488 if (VECTOR_MODE_P (mode0
))
11489 op0
= safe_vector_operand (op0
, mode0
);
11490 if (VECTOR_MODE_P (mode1
))
11491 op1
= safe_vector_operand (op1
, mode1
);
11493 /* Swap operands if we have a comparison that isn't available in
11497 rtx tmp
= gen_reg_rtx (mode1
);
11498 emit_move_insn (tmp
, op1
);
11504 || GET_MODE (target
) != tmode
11505 || ! (*insn_data
[d
->icode
].operand
[0].predicate
) (target
, tmode
))
11506 target
= gen_reg_rtx (tmode
);
11508 if (! (*insn_data
[d
->icode
].operand
[1].predicate
) (op0
, mode0
))
11509 op0
= copy_to_mode_reg (mode0
, op0
);
11510 if (! (*insn_data
[d
->icode
].operand
[2].predicate
) (op1
, mode1
))
11511 op1
= copy_to_mode_reg (mode1
, op1
);
11513 op2
= gen_rtx_fmt_ee (comparison
, mode0
, op0
, op1
);
11514 pat
= GEN_FCN (d
->icode
) (target
, op0
, op1
, op2
);
11521 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
11524 ix86_expand_sse_comi (d
, arglist
, target
)
11525 struct builtin_description
*d
;
11530 tree arg0
= TREE_VALUE (arglist
);
11531 tree arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
11532 rtx op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
11533 rtx op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
11535 enum machine_mode mode0
= insn_data
[d
->icode
].operand
[0].mode
;
11536 enum machine_mode mode1
= insn_data
[d
->icode
].operand
[1].mode
;
11537 enum rtx_code comparison
= d
->comparison
;
11539 if (VECTOR_MODE_P (mode0
))
11540 op0
= safe_vector_operand (op0
, mode0
);
11541 if (VECTOR_MODE_P (mode1
))
11542 op1
= safe_vector_operand (op1
, mode1
);
11544 /* Swap operands if we have a comparison that isn't available in
11553 target
= gen_reg_rtx (SImode
);
11554 emit_move_insn (target
, const0_rtx
);
11555 target
= gen_rtx_SUBREG (QImode
, target
, 0);
11557 if (! (*insn_data
[d
->icode
].operand
[0].predicate
) (op0
, mode0
))
11558 op0
= copy_to_mode_reg (mode0
, op0
);
11559 if (! (*insn_data
[d
->icode
].operand
[1].predicate
) (op1
, mode1
))
11560 op1
= copy_to_mode_reg (mode1
, op1
);
11562 op2
= gen_rtx_fmt_ee (comparison
, mode0
, op0
, op1
);
11563 pat
= GEN_FCN (d
->icode
) (op0
, op1
, op2
);
11567 emit_insn (gen_setcc_2 (target
, op2
));
11572 /* Expand an expression EXP that calls a built-in function,
11573 with result going to TARGET if that's convenient
11574 (and in mode MODE if that's convenient).
11575 SUBTARGET may be used as the target for computing one of EXP's operands.
11576 IGNORE is nonzero if the value is to be ignored. */
11579 ix86_expand_builtin (exp
, target
, subtarget
, mode
, ignore
)
11582 rtx subtarget ATTRIBUTE_UNUSED
;
11583 enum machine_mode mode ATTRIBUTE_UNUSED
;
11584 int ignore ATTRIBUTE_UNUSED
;
11586 struct builtin_description
*d
;
11588 enum insn_code icode
;
11589 tree fndecl
= TREE_OPERAND (TREE_OPERAND (exp
, 0), 0);
11590 tree arglist
= TREE_OPERAND (exp
, 1);
11591 tree arg0
, arg1
, arg2
, arg3
;
11592 rtx op0
, op1
, op2
, pat
;
11593 enum machine_mode tmode
, mode0
, mode1
, mode2
;
11594 unsigned int fcode
= DECL_FUNCTION_CODE (fndecl
);
11598 case IX86_BUILTIN_EMMS
:
11599 emit_insn (gen_emms ());
11602 case IX86_BUILTIN_SFENCE
:
11603 emit_insn (gen_sfence ());
11606 case IX86_BUILTIN_M_FROM_INT
:
11607 target
= gen_reg_rtx (DImode
);
11608 op0
= expand_expr (TREE_VALUE (arglist
), NULL_RTX
, VOIDmode
, 0);
11609 emit_move_insn (gen_rtx_SUBREG (SImode
, target
, 0), op0
);
11612 case IX86_BUILTIN_M_TO_INT
:
11613 op0
= expand_expr (TREE_VALUE (arglist
), NULL_RTX
, VOIDmode
, 0);
11614 op0
= copy_to_mode_reg (DImode
, op0
);
11615 target
= gen_reg_rtx (SImode
);
11616 emit_move_insn (target
, gen_rtx_SUBREG (SImode
, op0
, 0));
11619 case IX86_BUILTIN_PEXTRW
:
11620 icode
= CODE_FOR_mmx_pextrw
;
11621 arg0
= TREE_VALUE (arglist
);
11622 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
11623 op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
11624 op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
11625 tmode
= insn_data
[icode
].operand
[0].mode
;
11626 mode0
= insn_data
[icode
].operand
[1].mode
;
11627 mode1
= insn_data
[icode
].operand
[2].mode
;
11629 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
11630 op0
= copy_to_mode_reg (mode0
, op0
);
11631 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
11633 /* @@@ better error message */
11634 error ("selector must be an immediate");
11638 || GET_MODE (target
) != tmode
11639 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
11640 target
= gen_reg_rtx (tmode
);
11641 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
11647 case IX86_BUILTIN_PINSRW
:
11648 icode
= CODE_FOR_mmx_pinsrw
;
11649 arg0
= TREE_VALUE (arglist
);
11650 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
11651 arg2
= TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist
)));
11652 op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
11653 op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
11654 op2
= expand_expr (arg2
, NULL_RTX
, VOIDmode
, 0);
11655 tmode
= insn_data
[icode
].operand
[0].mode
;
11656 mode0
= insn_data
[icode
].operand
[1].mode
;
11657 mode1
= insn_data
[icode
].operand
[2].mode
;
11658 mode2
= insn_data
[icode
].operand
[3].mode
;
11660 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
11661 op0
= copy_to_mode_reg (mode0
, op0
);
11662 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
11663 op1
= copy_to_mode_reg (mode1
, op1
);
11664 if (! (*insn_data
[icode
].operand
[3].predicate
) (op2
, mode2
))
11666 /* @@@ better error message */
11667 error ("selector must be an immediate");
11671 || GET_MODE (target
) != tmode
11672 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
11673 target
= gen_reg_rtx (tmode
);
11674 pat
= GEN_FCN (icode
) (target
, op0
, op1
, op2
);
11680 case IX86_BUILTIN_MASKMOVQ
:
11681 icode
= CODE_FOR_mmx_maskmovq
;
11682 /* Note the arg order is different from the operand order. */
11683 arg1
= TREE_VALUE (arglist
);
11684 arg2
= TREE_VALUE (TREE_CHAIN (arglist
));
11685 arg0
= TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist
)));
11686 op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
11687 op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
11688 op2
= expand_expr (arg2
, NULL_RTX
, VOIDmode
, 0);
11689 mode0
= insn_data
[icode
].operand
[0].mode
;
11690 mode1
= insn_data
[icode
].operand
[1].mode
;
11691 mode2
= insn_data
[icode
].operand
[2].mode
;
11693 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
11694 op0
= copy_to_mode_reg (mode0
, op0
);
11695 if (! (*insn_data
[icode
].operand
[1].predicate
) (op1
, mode1
))
11696 op1
= copy_to_mode_reg (mode1
, op1
);
11697 if (! (*insn_data
[icode
].operand
[2].predicate
) (op2
, mode2
))
11698 op2
= copy_to_mode_reg (mode2
, op2
);
11699 pat
= GEN_FCN (icode
) (op0
, op1
, op2
);
11705 case IX86_BUILTIN_SQRTSS
:
11706 return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv4sf2
, arglist
, target
);
11707 case IX86_BUILTIN_RSQRTSS
:
11708 return ix86_expand_unop1_builtin (CODE_FOR_vmrsqrtv4sf2
, arglist
, target
);
11709 case IX86_BUILTIN_RCPSS
:
11710 return ix86_expand_unop1_builtin (CODE_FOR_vmrcpv4sf2
, arglist
, target
);
11712 case IX86_BUILTIN_LOADAPS
:
11713 return ix86_expand_unop_builtin (CODE_FOR_sse_movaps
, arglist
, target
, 1);
11715 case IX86_BUILTIN_LOADUPS
:
11716 return ix86_expand_unop_builtin (CODE_FOR_sse_movups
, arglist
, target
, 1);
11718 case IX86_BUILTIN_STOREAPS
:
11719 return ix86_expand_store_builtin (CODE_FOR_sse_movaps
, arglist
, -1);
11720 case IX86_BUILTIN_STOREUPS
:
11721 return ix86_expand_store_builtin (CODE_FOR_sse_movups
, arglist
, -1);
11723 case IX86_BUILTIN_LOADSS
:
11724 return ix86_expand_unop_builtin (CODE_FOR_sse_loadss
, arglist
, target
, 1);
11726 case IX86_BUILTIN_STORESS
:
11727 return ix86_expand_store_builtin (CODE_FOR_sse_storess
, arglist
, -1);
11729 case IX86_BUILTIN_LOADHPS
:
11730 case IX86_BUILTIN_LOADLPS
:
11731 icode
= (fcode
== IX86_BUILTIN_LOADHPS
11732 ? CODE_FOR_sse_movhps
: CODE_FOR_sse_movlps
);
11733 arg0
= TREE_VALUE (arglist
);
11734 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
11735 op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
11736 op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
11737 tmode
= insn_data
[icode
].operand
[0].mode
;
11738 mode0
= insn_data
[icode
].operand
[1].mode
;
11739 mode1
= insn_data
[icode
].operand
[2].mode
;
11741 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
11742 op0
= copy_to_mode_reg (mode0
, op0
);
11743 op1
= gen_rtx_MEM (mode1
, copy_to_mode_reg (Pmode
, op1
));
11745 || GET_MODE (target
) != tmode
11746 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
11747 target
= gen_reg_rtx (tmode
);
11748 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
11754 case IX86_BUILTIN_STOREHPS
:
11755 case IX86_BUILTIN_STORELPS
:
11756 icode
= (fcode
== IX86_BUILTIN_STOREHPS
11757 ? CODE_FOR_sse_movhps
: CODE_FOR_sse_movlps
);
11758 arg0
= TREE_VALUE (arglist
);
11759 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
11760 op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
11761 op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
11762 mode0
= insn_data
[icode
].operand
[1].mode
;
11763 mode1
= insn_data
[icode
].operand
[2].mode
;
11765 op0
= gen_rtx_MEM (mode0
, copy_to_mode_reg (Pmode
, op0
));
11766 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
11767 op1
= copy_to_mode_reg (mode1
, op1
);
11769 pat
= GEN_FCN (icode
) (op0
, op0
, op1
);
11775 case IX86_BUILTIN_MOVNTPS
:
11776 return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf
, arglist
, -1);
11777 case IX86_BUILTIN_MOVNTQ
:
11778 return ix86_expand_store_builtin (CODE_FOR_sse_movntdi
, arglist
, -1);
11780 case IX86_BUILTIN_LDMXCSR
:
11781 op0
= expand_expr (TREE_VALUE (arglist
), NULL_RTX
, VOIDmode
, 0);
11782 target
= assign_386_stack_local (SImode
, 0);
11783 emit_move_insn (target
, op0
);
11784 emit_insn (gen_ldmxcsr (target
));
11787 case IX86_BUILTIN_STMXCSR
:
11788 target
= assign_386_stack_local (SImode
, 0);
11789 emit_insn (gen_stmxcsr (target
));
11790 return copy_to_mode_reg (SImode
, target
);
11792 case IX86_BUILTIN_PREFETCH
:
11793 icode
= CODE_FOR_prefetch
;
11794 arg0
= TREE_VALUE (arglist
);
11795 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
11796 op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
11797 op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
11798 mode0
= insn_data
[icode
].operand
[0].mode
;
11799 mode1
= insn_data
[icode
].operand
[1].mode
;
11801 if (! (*insn_data
[icode
].operand
[1].predicate
) (op1
, mode1
))
11803 /* @@@ better error message */
11804 error ("selector must be an immediate");
11808 op0
= copy_to_mode_reg (Pmode
, op0
);
11809 pat
= GEN_FCN (icode
) (op0
, op1
);
11815 case IX86_BUILTIN_SHUFPS
:
11816 icode
= CODE_FOR_sse_shufps
;
11817 arg0
= TREE_VALUE (arglist
);
11818 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
11819 arg2
= TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist
)));
11820 op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
11821 op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
11822 op2
= expand_expr (arg2
, NULL_RTX
, VOIDmode
, 0);
11823 tmode
= insn_data
[icode
].operand
[0].mode
;
11824 mode0
= insn_data
[icode
].operand
[1].mode
;
11825 mode1
= insn_data
[icode
].operand
[2].mode
;
11826 mode2
= insn_data
[icode
].operand
[3].mode
;
11828 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
11829 op0
= copy_to_mode_reg (mode0
, op0
);
11830 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
11831 op1
= copy_to_mode_reg (mode1
, op1
);
11832 if (! (*insn_data
[icode
].operand
[3].predicate
) (op2
, mode2
))
11834 /* @@@ better error message */
11835 error ("mask must be an immediate");
11839 || GET_MODE (target
) != tmode
11840 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
11841 target
= gen_reg_rtx (tmode
);
11842 pat
= GEN_FCN (icode
) (target
, op0
, op1
, op2
);
11848 case IX86_BUILTIN_PSHUFW
:
11849 icode
= CODE_FOR_mmx_pshufw
;
11850 arg0
= TREE_VALUE (arglist
);
11851 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
11852 op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
11853 op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
11854 tmode
= insn_data
[icode
].operand
[0].mode
;
11855 mode0
= insn_data
[icode
].operand
[2].mode
;
11856 mode1
= insn_data
[icode
].operand
[3].mode
;
11858 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
11859 op0
= copy_to_mode_reg (mode0
, op0
);
11860 if (! (*insn_data
[icode
].operand
[3].predicate
) (op1
, mode1
))
11862 /* @@@ better error message */
11863 error ("mask must be an immediate");
11867 || GET_MODE (target
) != tmode
11868 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
11869 target
= gen_reg_rtx (tmode
);
11870 pat
= GEN_FCN (icode
) (target
, target
, op0
, op1
);
11876 case IX86_BUILTIN_FEMMS
:
11877 emit_insn (gen_femms ());
11880 case IX86_BUILTIN_PAVGUSB
:
11881 return ix86_expand_binop_builtin (CODE_FOR_pavgusb
, arglist
, target
);
11883 case IX86_BUILTIN_PF2ID
:
11884 return ix86_expand_unop_builtin (CODE_FOR_pf2id
, arglist
, target
, 0);
11886 case IX86_BUILTIN_PFACC
:
11887 return ix86_expand_binop_builtin (CODE_FOR_pfacc
, arglist
, target
);
11889 case IX86_BUILTIN_PFADD
:
11890 return ix86_expand_binop_builtin (CODE_FOR_addv2sf3
, arglist
, target
);
11892 case IX86_BUILTIN_PFCMPEQ
:
11893 return ix86_expand_binop_builtin (CODE_FOR_eqv2sf3
, arglist
, target
);
11895 case IX86_BUILTIN_PFCMPGE
:
11896 return ix86_expand_binop_builtin (CODE_FOR_gev2sf3
, arglist
, target
);
11898 case IX86_BUILTIN_PFCMPGT
:
11899 return ix86_expand_binop_builtin (CODE_FOR_gtv2sf3
, arglist
, target
);
11901 case IX86_BUILTIN_PFMAX
:
11902 return ix86_expand_binop_builtin (CODE_FOR_pfmaxv2sf3
, arglist
, target
);
11904 case IX86_BUILTIN_PFMIN
:
11905 return ix86_expand_binop_builtin (CODE_FOR_pfminv2sf3
, arglist
, target
);
11907 case IX86_BUILTIN_PFMUL
:
11908 return ix86_expand_binop_builtin (CODE_FOR_mulv2sf3
, arglist
, target
);
11910 case IX86_BUILTIN_PFRCP
:
11911 return ix86_expand_unop_builtin (CODE_FOR_pfrcpv2sf2
, arglist
, target
, 0);
11913 case IX86_BUILTIN_PFRCPIT1
:
11914 return ix86_expand_binop_builtin (CODE_FOR_pfrcpit1v2sf3
, arglist
, target
);
11916 case IX86_BUILTIN_PFRCPIT2
:
11917 return ix86_expand_binop_builtin (CODE_FOR_pfrcpit2v2sf3
, arglist
, target
);
11919 case IX86_BUILTIN_PFRSQIT1
:
11920 return ix86_expand_binop_builtin (CODE_FOR_pfrsqit1v2sf3
, arglist
, target
);
11922 case IX86_BUILTIN_PFRSQRT
:
11923 return ix86_expand_unop_builtin (CODE_FOR_pfrsqrtv2sf2
, arglist
, target
, 0);
11925 case IX86_BUILTIN_PFSUB
:
11926 return ix86_expand_binop_builtin (CODE_FOR_subv2sf3
, arglist
, target
);
11928 case IX86_BUILTIN_PFSUBR
:
11929 return ix86_expand_binop_builtin (CODE_FOR_subrv2sf3
, arglist
, target
);
11931 case IX86_BUILTIN_PI2FD
:
11932 return ix86_expand_unop_builtin (CODE_FOR_floatv2si2
, arglist
, target
, 0);
11934 case IX86_BUILTIN_PMULHRW
:
11935 return ix86_expand_binop_builtin (CODE_FOR_pmulhrwv4hi3
, arglist
, target
);
11937 case IX86_BUILTIN_PREFETCH_3DNOW
:
11938 icode
= CODE_FOR_prefetch_3dnow
;
11939 arg0
= TREE_VALUE (arglist
);
11940 op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
11941 mode0
= insn_data
[icode
].operand
[0].mode
;
11942 pat
= GEN_FCN (icode
) (copy_to_mode_reg (Pmode
, op0
));
11948 case IX86_BUILTIN_PREFETCHW
:
11949 icode
= CODE_FOR_prefetchw
;
11950 arg0
= TREE_VALUE (arglist
);
11951 op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
11952 mode0
= insn_data
[icode
].operand
[0].mode
;
11953 pat
= GEN_FCN (icode
) (copy_to_mode_reg (Pmode
, op0
));
11959 case IX86_BUILTIN_PF2IW
:
11960 return ix86_expand_unop_builtin (CODE_FOR_pf2iw
, arglist
, target
, 0);
11962 case IX86_BUILTIN_PFNACC
:
11963 return ix86_expand_binop_builtin (CODE_FOR_pfnacc
, arglist
, target
);
11965 case IX86_BUILTIN_PFPNACC
:
11966 return ix86_expand_binop_builtin (CODE_FOR_pfpnacc
, arglist
, target
);
11968 case IX86_BUILTIN_PI2FW
:
11969 return ix86_expand_unop_builtin (CODE_FOR_pi2fw
, arglist
, target
, 0);
11971 case IX86_BUILTIN_PSWAPDSI
:
11972 return ix86_expand_unop_builtin (CODE_FOR_pswapdv2si2
, arglist
, target
, 0);
11974 case IX86_BUILTIN_PSWAPDSF
:
11975 return ix86_expand_unop_builtin (CODE_FOR_pswapdv2sf2
, arglist
, target
, 0);
11977 /* Composite intrinsics. */
11978 case IX86_BUILTIN_SETPS1
:
11979 target
= assign_386_stack_local (SFmode
, 0);
11980 arg0
= TREE_VALUE (arglist
);
11981 emit_move_insn (adjust_address (target
, SFmode
, 0),
11982 expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0));
11983 op0
= gen_reg_rtx (V4SFmode
);
11984 emit_insn (gen_sse_loadss (op0
, adjust_address (target
, V4SFmode
, 0)));
11985 emit_insn (gen_sse_shufps (op0
, op0
, op0
, GEN_INT (0)));
11988 case IX86_BUILTIN_SETPS
:
11989 target
= assign_386_stack_local (V4SFmode
, 0);
11990 arg0
= TREE_VALUE (arglist
);
11991 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
11992 arg2
= TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist
)));
11993 arg3
= TREE_VALUE (TREE_CHAIN (TREE_CHAIN (TREE_CHAIN (arglist
))));
11994 emit_move_insn (adjust_address (target
, SFmode
, 0),
11995 expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0));
11996 emit_move_insn (adjust_address (target
, SFmode
, 4),
11997 expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0));
11998 emit_move_insn (adjust_address (target
, SFmode
, 8),
11999 expand_expr (arg2
, NULL_RTX
, VOIDmode
, 0));
12000 emit_move_insn (adjust_address (target
, SFmode
, 12),
12001 expand_expr (arg3
, NULL_RTX
, VOIDmode
, 0));
12002 op0
= gen_reg_rtx (V4SFmode
);
12003 emit_insn (gen_sse_movaps (op0
, target
));
12006 case IX86_BUILTIN_CLRPS
:
12007 target
= gen_reg_rtx (TImode
);
12008 emit_insn (gen_sse_clrti (target
));
12011 case IX86_BUILTIN_LOADRPS
:
12012 target
= ix86_expand_unop_builtin (CODE_FOR_sse_movaps
, arglist
,
12013 gen_reg_rtx (V4SFmode
), 1);
12014 emit_insn (gen_sse_shufps (target
, target
, target
, GEN_INT (0x1b)));
12017 case IX86_BUILTIN_LOADPS1
:
12018 target
= ix86_expand_unop_builtin (CODE_FOR_sse_loadss
, arglist
,
12019 gen_reg_rtx (V4SFmode
), 1);
12020 emit_insn (gen_sse_shufps (target
, target
, target
, const0_rtx
));
12023 case IX86_BUILTIN_STOREPS1
:
12024 return ix86_expand_store_builtin (CODE_FOR_sse_movaps
, arglist
, 0);
12025 case IX86_BUILTIN_STORERPS
:
12026 return ix86_expand_store_builtin (CODE_FOR_sse_movaps
, arglist
, 0x1B);
12028 case IX86_BUILTIN_MMX_ZERO
:
12029 target
= gen_reg_rtx (DImode
);
12030 emit_insn (gen_mmx_clrdi (target
));
12037 for (i
= 0, d
= bdesc_2arg
; i
< sizeof (bdesc_2arg
) / sizeof *d
; i
++, d
++)
12038 if (d
->code
== fcode
)
12040 /* Compares are treated specially. */
12041 if (d
->icode
== CODE_FOR_maskcmpv4sf3
12042 || d
->icode
== CODE_FOR_vmmaskcmpv4sf3
12043 || d
->icode
== CODE_FOR_maskncmpv4sf3
12044 || d
->icode
== CODE_FOR_vmmaskncmpv4sf3
)
12045 return ix86_expand_sse_compare (d
, arglist
, target
);
12047 return ix86_expand_binop_builtin (d
->icode
, arglist
, target
);
12050 for (i
= 0, d
= bdesc_1arg
; i
< sizeof (bdesc_1arg
) / sizeof *d
; i
++, d
++)
12051 if (d
->code
== fcode
)
12052 return ix86_expand_unop_builtin (d
->icode
, arglist
, target
, 0);
12054 for (i
= 0, d
= bdesc_comi
; i
< sizeof (bdesc_comi
) / sizeof *d
; i
++, d
++)
12055 if (d
->code
== fcode
)
12056 return ix86_expand_sse_comi (d
, arglist
, target
);
12058 /* @@@ Should really do something sensible here. */
12062 /* Store OPERAND to the memory after reload is completed. This means
12063 that we can't easilly use assign_stack_local. */
12065 ix86_force_to_memory (mode
, operand
)
12066 enum machine_mode mode
;
12070 if (!reload_completed
)
12072 if (TARGET_64BIT
&& TARGET_RED_ZONE
)
12074 result
= gen_rtx_MEM (mode
,
12075 gen_rtx_PLUS (Pmode
,
12077 GEN_INT (-RED_ZONE_SIZE
)));
12078 emit_move_insn (result
, operand
);
12080 else if (TARGET_64BIT
&& !TARGET_RED_ZONE
)
12086 operand
= gen_lowpart (DImode
, operand
);
12090 gen_rtx_SET (VOIDmode
,
12091 gen_rtx_MEM (DImode
,
12092 gen_rtx_PRE_DEC (DImode
,
12093 stack_pointer_rtx
)),
12099 result
= gen_rtx_MEM (mode
, stack_pointer_rtx
);
12108 split_di (&operand
, 1, operands
, operands
+ 1);
12110 gen_rtx_SET (VOIDmode
,
12111 gen_rtx_MEM (SImode
,
12112 gen_rtx_PRE_DEC (Pmode
,
12113 stack_pointer_rtx
)),
12116 gen_rtx_SET (VOIDmode
,
12117 gen_rtx_MEM (SImode
,
12118 gen_rtx_PRE_DEC (Pmode
,
12119 stack_pointer_rtx
)),
12124 /* It is better to store HImodes as SImodes. */
12125 if (!TARGET_PARTIAL_REG_STALL
)
12126 operand
= gen_lowpart (SImode
, operand
);
12130 gen_rtx_SET (VOIDmode
,
12131 gen_rtx_MEM (GET_MODE (operand
),
12132 gen_rtx_PRE_DEC (SImode
,
12133 stack_pointer_rtx
)),
12139 result
= gen_rtx_MEM (mode
, stack_pointer_rtx
);
12144 /* Free operand from the memory. */
12146 ix86_free_from_memory (mode
)
12147 enum machine_mode mode
;
12149 if (!TARGET_64BIT
|| !TARGET_RED_ZONE
)
12153 if (mode
== DImode
|| TARGET_64BIT
)
12155 else if (mode
== HImode
&& TARGET_PARTIAL_REG_STALL
)
12159 /* Use LEA to deallocate stack space. In peephole2 it will be converted
12160 to pop or add instruction if registers are available. */
12161 emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
12162 gen_rtx_PLUS (Pmode
, stack_pointer_rtx
,
12167 /* Put float CONST_DOUBLE in the constant pool instead of fp regs.
12168 QImode must go into class Q_REGS.
12169 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
12170 movdf to do mem-to-mem moves through integer regs. */
12172 ix86_preferred_reload_class (x
, class)
12174 enum reg_class
class;
12176 if (GET_CODE (x
) == CONST_DOUBLE
&& GET_MODE (x
) != VOIDmode
)
12178 /* SSE can't load any constant directly yet. */
12179 if (SSE_CLASS_P (class))
12181 /* Floats can load 0 and 1. */
12182 if (MAYBE_FLOAT_CLASS_P (class) && standard_80387_constant_p (x
))
12184 /* Limit class to non-SSE. Use GENERAL_REGS if possible. */
12185 if (MAYBE_SSE_CLASS_P (class))
12186 return (reg_class_subset_p (class, GENERAL_REGS
)
12187 ? GENERAL_REGS
: FLOAT_REGS
);
12191 /* General regs can load everything. */
12192 if (reg_class_subset_p (class, GENERAL_REGS
))
12193 return GENERAL_REGS
;
12194 /* In case we haven't resolved FLOAT or SSE yet, give up. */
12195 if (MAYBE_FLOAT_CLASS_P (class) || MAYBE_SSE_CLASS_P (class))
12198 if (MAYBE_MMX_CLASS_P (class) && CONSTANT_P (x
))
12200 if (GET_MODE (x
) == QImode
&& ! reg_class_subset_p (class, Q_REGS
))
12205 /* If we are copying between general and FP registers, we need a memory
12206 location. The same is true for SSE and MMX registers.
12208 The macro can't work reliably when one of the CLASSES is class containing
12209 registers from multiple units (SSE, MMX, integer). We avoid this by never
12210 combining those units in single alternative in the machine description.
12211 Ensure that this constraint holds to avoid unexpected surprises.
12213 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
12214 enforce these sanity checks. */
12216 ix86_secondary_memory_needed (class1
, class2
, mode
, strict
)
12217 enum reg_class class1
, class2
;
12218 enum machine_mode mode
;
12221 if (MAYBE_FLOAT_CLASS_P (class1
) != FLOAT_CLASS_P (class1
)
12222 || MAYBE_FLOAT_CLASS_P (class2
) != FLOAT_CLASS_P (class2
)
12223 || MAYBE_SSE_CLASS_P (class1
) != SSE_CLASS_P (class1
)
12224 || MAYBE_SSE_CLASS_P (class2
) != SSE_CLASS_P (class2
)
12225 || MAYBE_MMX_CLASS_P (class1
) != MMX_CLASS_P (class1
)
12226 || MAYBE_MMX_CLASS_P (class2
) != MMX_CLASS_P (class2
))
12233 return (FLOAT_CLASS_P (class1
) != FLOAT_CLASS_P (class2
)
12234 || (SSE_CLASS_P (class1
) != SSE_CLASS_P (class2
)
12235 && (mode
) != SImode
)
12236 || (MMX_CLASS_P (class1
) != MMX_CLASS_P (class2
)
12237 && (mode
) != SImode
));
12239 /* Return the cost of moving data from a register in class CLASS1 to
12240 one in class CLASS2.
12242 It is not required that the cost always equal 2 when FROM is the same as TO;
12243 on some machines it is expensive to move between registers if they are not
12244 general registers. */
12246 ix86_register_move_cost (mode
, class1
, class2
)
12247 enum machine_mode mode
;
12248 enum reg_class class1
, class2
;
12250 /* In case we require secondary memory, compute cost of the store followed
12251 by load. In case of copying from general_purpose_register we may emit
12252 multiple stores followed by single load causing memory size mismatch
12253 stall. Count this as arbitarily high cost of 20. */
12254 if (ix86_secondary_memory_needed (class1
, class2
, mode
, 0))
12257 if (CLASS_MAX_NREGS (class1
, mode
) > CLASS_MAX_NREGS (class2
, mode
))
12259 return (MEMORY_MOVE_COST (mode
, class1
, 0)
12260 + MEMORY_MOVE_COST (mode
, class2
, 1) + add_cost
);
12262 /* Moves between SSE/MMX and integer unit are expensive. */
12263 if (MMX_CLASS_P (class1
) != MMX_CLASS_P (class2
)
12264 || SSE_CLASS_P (class1
) != SSE_CLASS_P (class2
))
12265 return ix86_cost
->mmxsse_to_integer
;
12266 if (MAYBE_FLOAT_CLASS_P (class1
))
12267 return ix86_cost
->fp_move
;
12268 if (MAYBE_SSE_CLASS_P (class1
))
12269 return ix86_cost
->sse_move
;
12270 if (MAYBE_MMX_CLASS_P (class1
))
12271 return ix86_cost
->mmx_move
;
12275 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
12277 ix86_hard_regno_mode_ok (regno
, mode
)
12279 enum machine_mode mode
;
12281 /* Flags and only flags can only hold CCmode values. */
12282 if (CC_REGNO_P (regno
))
12283 return GET_MODE_CLASS (mode
) == MODE_CC
;
12284 if (GET_MODE_CLASS (mode
) == MODE_CC
12285 || GET_MODE_CLASS (mode
) == MODE_RANDOM
12286 || GET_MODE_CLASS (mode
) == MODE_PARTIAL_INT
)
12288 if (FP_REGNO_P (regno
))
12289 return VALID_FP_MODE_P (mode
);
12290 if (SSE_REGNO_P (regno
))
12291 return VALID_SSE_REG_MODE (mode
);
12292 if (MMX_REGNO_P (regno
))
12293 return VALID_MMX_REG_MODE (mode
) || VALID_MMX_REG_MODE_3DNOW (mode
);
12294 /* We handle both integer and floats in the general purpose registers.
12295 In future we should be able to handle vector modes as well. */
12296 if (!VALID_INT_MODE_P (mode
) && !VALID_FP_MODE_P (mode
))
12298 /* Take care for QImode values - they can be in non-QI regs, but then
12299 they do cause partial register stalls. */
12300 if (regno
< 4 || mode
!= QImode
|| TARGET_64BIT
)
12302 return reload_in_progress
|| reload_completed
|| !TARGET_PARTIAL_REG_STALL
;
12305 /* Return the cost of moving data of mode M between a
12306 register and memory. A value of 2 is the default; this cost is
12307 relative to those in `REGISTER_MOVE_COST'.
12309 If moving between registers and memory is more expensive than
12310 between two registers, you should define this macro to express the
12313 Model also increased moving costs of QImode registers in non
12317 ix86_memory_move_cost (mode
, class, in
)
12318 enum machine_mode mode
;
12319 enum reg_class
class;
12322 if (FLOAT_CLASS_P (class))
12340 return in
? ix86_cost
->fp_load
[index
] : ix86_cost
->fp_store
[index
];
12342 if (SSE_CLASS_P (class))
12345 switch (GET_MODE_SIZE (mode
))
12359 return in
? ix86_cost
->sse_load
[index
] : ix86_cost
->sse_store
[index
];
12361 if (MMX_CLASS_P (class))
12364 switch (GET_MODE_SIZE (mode
))
12375 return in
? ix86_cost
->mmx_load
[index
] : ix86_cost
->mmx_store
[index
];
12377 switch (GET_MODE_SIZE (mode
))
12381 return (Q_CLASS_P (class) ? ix86_cost
->int_load
[0]
12382 : ix86_cost
->movzbl_load
);
12384 return (Q_CLASS_P (class) ? ix86_cost
->int_store
[0]
12385 : ix86_cost
->int_store
[0] + 4);
12388 return in
? ix86_cost
->int_load
[1] : ix86_cost
->int_store
[1];
12390 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
12391 if (mode
== TFmode
)
12393 return ((in
? ix86_cost
->int_load
[2] : ix86_cost
->int_store
[2])
12394 * (int) GET_MODE_SIZE (mode
) / 4);
12398 #ifdef DO_GLOBAL_CTORS_BODY
12400 ix86_svr3_asm_out_constructor (symbol
, priority
)
12402 int priority ATTRIBUTE_UNUSED
;
12405 fputs ("\tpushl $", asm_out_file
);
12406 assemble_name (asm_out_file
, XSTR (symbol
, 0));
12407 fputc ('\n', asm_out_file
);
12411 #if defined(TARGET_ELF) && defined(TARGET_COFF)
12413 sco_asm_named_section (name
, flags
)
12415 unsigned int flags
;
12418 default_elf_asm_named_section (name
, flags
);
12420 default_coff_asm_named_section (name
, flags
);
12424 sco_asm_out_constructor (symbol
, priority
)
12429 default_named_section_asm_out_constrctor (symbol
, priority
);
12431 ix86_svr3_asm_out_constructor (symbol
, priority
);