1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002 Free Software Foundation, Inc.
5 This file is part of GNU CC.
7 GNU CC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
12 GNU CC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GNU CC; see the file COPYING. If not, write to
19 the Free Software Foundation, 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
28 #include "hard-reg-set.h"
30 #include "insn-config.h"
31 #include "conditions.h"
33 #include "insn-attr.h"
41 #include "basic-block.h"
44 #include "target-def.h"
45 #include "langhooks.h"
47 #ifndef CHECK_STACK_LIMIT
48 #define CHECK_STACK_LIMIT (-1)
51 /* Processor costs (relative to an add) */
53 struct processor_costs size_cost
= { /* costs for tunning for size */
54 2, /* cost of an add instruction */
55 3, /* cost of a lea instruction */
56 2, /* variable shift costs */
57 3, /* constant shift costs */
58 3, /* cost of starting a multiply */
59 0, /* cost of multiply per each bit set */
60 3, /* cost of a divide/mod */
61 3, /* cost of movsx */
62 3, /* cost of movzx */
65 2, /* cost for loading QImode using movzbl */
66 {2, 2, 2}, /* cost of loading integer registers
67 in QImode, HImode and SImode.
68 Relative to reg-reg move (2). */
69 {2, 2, 2}, /* cost of storing integer registers */
70 2, /* cost of reg,reg fld/fst */
71 {2, 2, 2}, /* cost of loading fp registers
72 in SFmode, DFmode and XFmode */
73 {2, 2, 2}, /* cost of loading integer registers */
74 3, /* cost of moving MMX register */
75 {3, 3}, /* cost of loading MMX registers
76 in SImode and DImode */
77 {3, 3}, /* cost of storing MMX registers
78 in SImode and DImode */
79 3, /* cost of moving SSE register */
80 {3, 3, 3}, /* cost of loading SSE registers
81 in SImode, DImode and TImode */
82 {3, 3, 3}, /* cost of storing SSE registers
83 in SImode, DImode and TImode */
84 3, /* MMX or SSE register to integer */
85 0, /* size of prefetch block */
86 0, /* number of parallel prefetches */
87 2, /* cost of FADD and FSUB insns. */
88 2, /* cost of FMUL instruction. */
89 2, /* cost of FDIV instruction. */
90 2, /* cost of FABS instruction. */
91 2, /* cost of FCHS instruction. */
92 2, /* cost of FSQRT instruction. */
95 /* Processor costs (relative to an add) */
97 struct processor_costs i386_cost
= { /* 386 specific costs */
98 1, /* cost of an add instruction */
99 1, /* cost of a lea instruction */
100 3, /* variable shift costs */
101 2, /* constant shift costs */
102 6, /* cost of starting a multiply */
103 1, /* cost of multiply per each bit set */
104 23, /* cost of a divide/mod */
105 3, /* cost of movsx */
106 2, /* cost of movzx */
107 15, /* "large" insn */
109 4, /* cost for loading QImode using movzbl */
110 {2, 4, 2}, /* cost of loading integer registers
111 in QImode, HImode and SImode.
112 Relative to reg-reg move (2). */
113 {2, 4, 2}, /* cost of storing integer registers */
114 2, /* cost of reg,reg fld/fst */
115 {8, 8, 8}, /* cost of loading fp registers
116 in SFmode, DFmode and XFmode */
117 {8, 8, 8}, /* cost of loading integer registers */
118 2, /* cost of moving MMX register */
119 {4, 8}, /* cost of loading MMX registers
120 in SImode and DImode */
121 {4, 8}, /* cost of storing MMX registers
122 in SImode and DImode */
123 2, /* cost of moving SSE register */
124 {4, 8, 16}, /* cost of loading SSE registers
125 in SImode, DImode and TImode */
126 {4, 8, 16}, /* cost of storing SSE registers
127 in SImode, DImode and TImode */
128 3, /* MMX or SSE register to integer */
129 0, /* size of prefetch block */
130 0, /* number of parallel prefetches */
131 23, /* cost of FADD and FSUB insns. */
132 27, /* cost of FMUL instruction. */
133 88, /* cost of FDIV instruction. */
134 22, /* cost of FABS instruction. */
135 24, /* cost of FCHS instruction. */
136 122, /* cost of FSQRT instruction. */
140 struct processor_costs i486_cost
= { /* 486 specific costs */
141 1, /* cost of an add instruction */
142 1, /* cost of a lea instruction */
143 3, /* variable shift costs */
144 2, /* constant shift costs */
145 12, /* cost of starting a multiply */
146 1, /* cost of multiply per each bit set */
147 40, /* cost of a divide/mod */
148 3, /* cost of movsx */
149 2, /* cost of movzx */
150 15, /* "large" insn */
152 4, /* cost for loading QImode using movzbl */
153 {2, 4, 2}, /* cost of loading integer registers
154 in QImode, HImode and SImode.
155 Relative to reg-reg move (2). */
156 {2, 4, 2}, /* cost of storing integer registers */
157 2, /* cost of reg,reg fld/fst */
158 {8, 8, 8}, /* cost of loading fp registers
159 in SFmode, DFmode and XFmode */
160 {8, 8, 8}, /* cost of loading integer registers */
161 2, /* cost of moving MMX register */
162 {4, 8}, /* cost of loading MMX registers
163 in SImode and DImode */
164 {4, 8}, /* cost of storing MMX registers
165 in SImode and DImode */
166 2, /* cost of moving SSE register */
167 {4, 8, 16}, /* cost of loading SSE registers
168 in SImode, DImode and TImode */
169 {4, 8, 16}, /* cost of storing SSE registers
170 in SImode, DImode and TImode */
171 3, /* MMX or SSE register to integer */
172 0, /* size of prefetch block */
173 0, /* number of parallel prefetches */
174 8, /* cost of FADD and FSUB insns. */
175 16, /* cost of FMUL instruction. */
176 73, /* cost of FDIV instruction. */
177 3, /* cost of FABS instruction. */
178 3, /* cost of FCHS instruction. */
179 83, /* cost of FSQRT instruction. */
183 struct processor_costs pentium_cost
= {
184 1, /* cost of an add instruction */
185 1, /* cost of a lea instruction */
186 4, /* variable shift costs */
187 1, /* constant shift costs */
188 11, /* cost of starting a multiply */
189 0, /* cost of multiply per each bit set */
190 25, /* cost of a divide/mod */
191 3, /* cost of movsx */
192 2, /* cost of movzx */
193 8, /* "large" insn */
195 6, /* cost for loading QImode using movzbl */
196 {2, 4, 2}, /* cost of loading integer registers
197 in QImode, HImode and SImode.
198 Relative to reg-reg move (2). */
199 {2, 4, 2}, /* cost of storing integer registers */
200 2, /* cost of reg,reg fld/fst */
201 {2, 2, 6}, /* cost of loading fp registers
202 in SFmode, DFmode and XFmode */
203 {4, 4, 6}, /* cost of loading integer registers */
204 8, /* cost of moving MMX register */
205 {8, 8}, /* cost of loading MMX registers
206 in SImode and DImode */
207 {8, 8}, /* cost of storing MMX registers
208 in SImode and DImode */
209 2, /* cost of moving SSE register */
210 {4, 8, 16}, /* cost of loading SSE registers
211 in SImode, DImode and TImode */
212 {4, 8, 16}, /* cost of storing SSE registers
213 in SImode, DImode and TImode */
214 3, /* MMX or SSE register to integer */
215 0, /* size of prefetch block */
216 0, /* number of parallel prefetches */
217 3, /* cost of FADD and FSUB insns. */
218 3, /* cost of FMUL instruction. */
219 39, /* cost of FDIV instruction. */
220 1, /* cost of FABS instruction. */
221 1, /* cost of FCHS instruction. */
222 70, /* cost of FSQRT instruction. */
226 struct processor_costs pentiumpro_cost
= {
227 1, /* cost of an add instruction */
228 1, /* cost of a lea instruction */
229 1, /* variable shift costs */
230 1, /* constant shift costs */
231 4, /* cost of starting a multiply */
232 0, /* cost of multiply per each bit set */
233 17, /* cost of a divide/mod */
234 1, /* cost of movsx */
235 1, /* cost of movzx */
236 8, /* "large" insn */
238 2, /* cost for loading QImode using movzbl */
239 {4, 4, 4}, /* cost of loading integer registers
240 in QImode, HImode and SImode.
241 Relative to reg-reg move (2). */
242 {2, 2, 2}, /* cost of storing integer registers */
243 2, /* cost of reg,reg fld/fst */
244 {2, 2, 6}, /* cost of loading fp registers
245 in SFmode, DFmode and XFmode */
246 {4, 4, 6}, /* cost of loading integer registers */
247 2, /* cost of moving MMX register */
248 {2, 2}, /* cost of loading MMX registers
249 in SImode and DImode */
250 {2, 2}, /* cost of storing MMX registers
251 in SImode and DImode */
252 2, /* cost of moving SSE register */
253 {2, 2, 8}, /* cost of loading SSE registers
254 in SImode, DImode and TImode */
255 {2, 2, 8}, /* cost of storing SSE registers
256 in SImode, DImode and TImode */
257 3, /* MMX or SSE register to integer */
258 32, /* size of prefetch block */
259 6, /* number of parallel prefetches */
260 3, /* cost of FADD and FSUB insns. */
261 5, /* cost of FMUL instruction. */
262 56, /* cost of FDIV instruction. */
263 2, /* cost of FABS instruction. */
264 2, /* cost of FCHS instruction. */
265 56, /* cost of FSQRT instruction. */
269 struct processor_costs k6_cost
= {
270 1, /* cost of an add instruction */
271 2, /* cost of a lea instruction */
272 1, /* variable shift costs */
273 1, /* constant shift costs */
274 3, /* cost of starting a multiply */
275 0, /* cost of multiply per each bit set */
276 18, /* cost of a divide/mod */
277 2, /* cost of movsx */
278 2, /* cost of movzx */
279 8, /* "large" insn */
281 3, /* cost for loading QImode using movzbl */
282 {4, 5, 4}, /* cost of loading integer registers
283 in QImode, HImode and SImode.
284 Relative to reg-reg move (2). */
285 {2, 3, 2}, /* cost of storing integer registers */
286 4, /* cost of reg,reg fld/fst */
287 {6, 6, 6}, /* cost of loading fp registers
288 in SFmode, DFmode and XFmode */
289 {4, 4, 4}, /* cost of loading integer registers */
290 2, /* cost of moving MMX register */
291 {2, 2}, /* cost of loading MMX registers
292 in SImode and DImode */
293 {2, 2}, /* cost of storing MMX registers
294 in SImode and DImode */
295 2, /* cost of moving SSE register */
296 {2, 2, 8}, /* cost of loading SSE registers
297 in SImode, DImode and TImode */
298 {2, 2, 8}, /* cost of storing SSE registers
299 in SImode, DImode and TImode */
300 6, /* MMX or SSE register to integer */
301 32, /* size of prefetch block */
302 1, /* number of parallel prefetches */
303 2, /* cost of FADD and FSUB insns. */
304 2, /* cost of FMUL instruction. */
305 56, /* cost of FDIV instruction. */
306 2, /* cost of FABS instruction. */
307 2, /* cost of FCHS instruction. */
308 56, /* cost of FSQRT instruction. */
312 struct processor_costs athlon_cost
= {
313 1, /* cost of an add instruction */
314 2, /* cost of a lea instruction */
315 1, /* variable shift costs */
316 1, /* constant shift costs */
317 5, /* cost of starting a multiply */
318 0, /* cost of multiply per each bit set */
319 42, /* cost of a divide/mod */
320 1, /* cost of movsx */
321 1, /* cost of movzx */
322 8, /* "large" insn */
324 4, /* cost for loading QImode using movzbl */
325 {3, 4, 3}, /* cost of loading integer registers
326 in QImode, HImode and SImode.
327 Relative to reg-reg move (2). */
328 {3, 4, 3}, /* cost of storing integer registers */
329 4, /* cost of reg,reg fld/fst */
330 {4, 4, 12}, /* cost of loading fp registers
331 in SFmode, DFmode and XFmode */
332 {6, 6, 8}, /* cost of loading integer registers */
333 2, /* cost of moving MMX register */
334 {4, 4}, /* cost of loading MMX registers
335 in SImode and DImode */
336 {4, 4}, /* cost of storing MMX registers
337 in SImode and DImode */
338 2, /* cost of moving SSE register */
339 {4, 4, 6}, /* cost of loading SSE registers
340 in SImode, DImode and TImode */
341 {4, 4, 5}, /* cost of storing SSE registers
342 in SImode, DImode and TImode */
343 5, /* MMX or SSE register to integer */
344 64, /* size of prefetch block */
345 6, /* number of parallel prefetches */
346 4, /* cost of FADD and FSUB insns. */
347 4, /* cost of FMUL instruction. */
348 24, /* cost of FDIV instruction. */
349 2, /* cost of FABS instruction. */
350 2, /* cost of FCHS instruction. */
351 35, /* cost of FSQRT instruction. */
355 struct processor_costs pentium4_cost
= {
356 1, /* cost of an add instruction */
357 1, /* cost of a lea instruction */
358 8, /* variable shift costs */
359 8, /* constant shift costs */
360 30, /* cost of starting a multiply */
361 0, /* cost of multiply per each bit set */
362 112, /* cost of a divide/mod */
363 1, /* cost of movsx */
364 1, /* cost of movzx */
365 16, /* "large" insn */
367 2, /* cost for loading QImode using movzbl */
368 {4, 5, 4}, /* cost of loading integer registers
369 in QImode, HImode and SImode.
370 Relative to reg-reg move (2). */
371 {2, 3, 2}, /* cost of storing integer registers */
372 2, /* cost of reg,reg fld/fst */
373 {2, 2, 6}, /* cost of loading fp registers
374 in SFmode, DFmode and XFmode */
375 {4, 4, 6}, /* cost of loading integer registers */
376 2, /* cost of moving MMX register */
377 {2, 2}, /* cost of loading MMX registers
378 in SImode and DImode */
379 {2, 2}, /* cost of storing MMX registers
380 in SImode and DImode */
381 12, /* cost of moving SSE register */
382 {12, 12, 12}, /* cost of loading SSE registers
383 in SImode, DImode and TImode */
384 {2, 2, 8}, /* cost of storing SSE registers
385 in SImode, DImode and TImode */
386 10, /* MMX or SSE register to integer */
387 64, /* size of prefetch block */
388 6, /* number of parallel prefetches */
389 5, /* cost of FADD and FSUB insns. */
390 7, /* cost of FMUL instruction. */
391 43, /* cost of FDIV instruction. */
392 2, /* cost of FABS instruction. */
393 2, /* cost of FCHS instruction. */
394 43, /* cost of FSQRT instruction. */
397 const struct processor_costs
*ix86_cost
= &pentium_cost
;
399 /* Processor feature/optimization bitmasks. */
400 #define m_386 (1<<PROCESSOR_I386)
401 #define m_486 (1<<PROCESSOR_I486)
402 #define m_PENT (1<<PROCESSOR_PENTIUM)
403 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
404 #define m_K6 (1<<PROCESSOR_K6)
405 #define m_ATHLON (1<<PROCESSOR_ATHLON)
406 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
408 const int x86_use_leave
= m_386
| m_K6
| m_ATHLON
;
409 const int x86_push_memory
= m_386
| m_K6
| m_ATHLON
| m_PENT4
;
410 const int x86_zero_extend_with_and
= m_486
| m_PENT
;
411 const int x86_movx
= m_ATHLON
| m_PPRO
| m_PENT4
/* m_386 | m_K6 */;
412 const int x86_double_with_add
= ~m_386
;
413 const int x86_use_bit_test
= m_386
;
414 const int x86_unroll_strlen
= m_486
| m_PENT
| m_PPRO
| m_ATHLON
| m_K6
;
415 const int x86_cmove
= m_PPRO
| m_ATHLON
| m_PENT4
;
416 const int x86_3dnow_a
= m_ATHLON
;
417 const int x86_deep_branch
= m_PPRO
| m_K6
| m_ATHLON
| m_PENT4
;
418 const int x86_branch_hints
= m_PENT4
;
419 const int x86_use_sahf
= m_PPRO
| m_K6
| m_PENT4
;
420 const int x86_partial_reg_stall
= m_PPRO
;
421 const int x86_use_loop
= m_K6
;
422 const int x86_use_fiop
= ~(m_PPRO
| m_ATHLON
| m_PENT
);
423 const int x86_use_mov0
= m_K6
;
424 const int x86_use_cltd
= ~(m_PENT
| m_K6
);
425 const int x86_read_modify_write
= ~m_PENT
;
426 const int x86_read_modify
= ~(m_PENT
| m_PPRO
);
427 const int x86_split_long_moves
= m_PPRO
;
428 const int x86_promote_QImode
= m_K6
| m_PENT
| m_386
| m_486
| m_ATHLON
;
429 const int x86_fast_prefix
= ~(m_PENT
| m_486
| m_386
);
430 const int x86_single_stringop
= m_386
| m_PENT4
;
431 const int x86_qimode_math
= ~(0);
432 const int x86_promote_qi_regs
= 0;
433 const int x86_himode_math
= ~(m_PPRO
);
434 const int x86_promote_hi_regs
= m_PPRO
;
435 const int x86_sub_esp_4
= m_ATHLON
| m_PPRO
| m_PENT4
;
436 const int x86_sub_esp_8
= m_ATHLON
| m_PPRO
| m_386
| m_486
| m_PENT4
;
437 const int x86_add_esp_4
= m_ATHLON
| m_K6
| m_PENT4
;
438 const int x86_add_esp_8
= m_ATHLON
| m_PPRO
| m_K6
| m_386
| m_486
| m_PENT4
;
439 const int x86_integer_DFmode_moves
= ~(m_ATHLON
| m_PENT4
| m_PPRO
);
440 const int x86_partial_reg_dependency
= m_ATHLON
| m_PENT4
;
441 const int x86_memory_mismatch_stall
= m_ATHLON
| m_PENT4
;
442 const int x86_accumulate_outgoing_args
= m_ATHLON
| m_PENT4
| m_PPRO
;
443 const int x86_prologue_using_move
= m_ATHLON
| m_PENT4
| m_PPRO
;
444 const int x86_epilogue_using_move
= m_ATHLON
| m_PENT4
| m_PPRO
;
445 const int x86_decompose_lea
= m_PENT4
;
446 const int x86_shift1
= ~m_486
;
447 const int x86_arch_always_fancy_math_387
= m_PENT
| m_PPRO
| m_ATHLON
| m_PENT4
;
449 /* In case the avreage insn count for single function invocation is
450 lower than this constant, emit fast (but longer) prologue and
452 #define FAST_PROLOGUE_INSN_COUNT 30
454 /* Set by prologue expander and used by epilogue expander to determine
456 static int use_fast_prologue_epilogue
;
458 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
459 static const char *const qi_reg_name
[] = QI_REGISTER_NAMES
;
460 static const char *const qi_high_reg_name
[] = QI_HIGH_REGISTER_NAMES
;
461 static const char *const hi_reg_name
[] = HI_REGISTER_NAMES
;
463 /* Array of the smallest class containing reg number REGNO, indexed by
464 REGNO. Used by REGNO_REG_CLASS in i386.h. */
466 enum reg_class
const regclass_map
[FIRST_PSEUDO_REGISTER
] =
469 AREG
, DREG
, CREG
, BREG
,
471 SIREG
, DIREG
, NON_Q_REGS
, NON_Q_REGS
,
473 FP_TOP_REG
, FP_SECOND_REG
, FLOAT_REGS
, FLOAT_REGS
,
474 FLOAT_REGS
, FLOAT_REGS
, FLOAT_REGS
, FLOAT_REGS
,
477 /* flags, fpsr, dirflag, frame */
478 NO_REGS
, NO_REGS
, NO_REGS
, NON_Q_REGS
,
479 SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
,
481 MMX_REGS
, MMX_REGS
, MMX_REGS
, MMX_REGS
, MMX_REGS
, MMX_REGS
,
483 NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
,
484 NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
,
485 SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
,
489 /* The "default" register map used in 32bit mode. */
491 int const dbx_register_map
[FIRST_PSEUDO_REGISTER
] =
493 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
494 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
495 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
496 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
497 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
498 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
499 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
502 static int const x86_64_int_parameter_registers
[6] =
504 5 /*RDI*/, 4 /*RSI*/, 1 /*RDX*/, 2 /*RCX*/,
505 FIRST_REX_INT_REG
/*R8 */, FIRST_REX_INT_REG
+ 1 /*R9 */
508 static int const x86_64_int_return_registers
[4] =
510 0 /*RAX*/, 1 /*RDI*/, 5 /*RDI*/, 4 /*RSI*/
513 /* The "default" register map used in 64bit mode. */
514 int const dbx64_register_map
[FIRST_PSEUDO_REGISTER
] =
516 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
517 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
518 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
519 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
520 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
521 8,9,10,11,12,13,14,15, /* extended integer registers */
522 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
525 /* Define the register numbers to be used in Dwarf debugging information.
526 The SVR4 reference port C compiler uses the following register numbers
527 in its Dwarf output code:
528 0 for %eax (gcc regno = 0)
529 1 for %ecx (gcc regno = 2)
530 2 for %edx (gcc regno = 1)
531 3 for %ebx (gcc regno = 3)
532 4 for %esp (gcc regno = 7)
533 5 for %ebp (gcc regno = 6)
534 6 for %esi (gcc regno = 4)
535 7 for %edi (gcc regno = 5)
536 The following three DWARF register numbers are never generated by
537 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
538 believes these numbers have these meanings.
539 8 for %eip (no gcc equivalent)
540 9 for %eflags (gcc regno = 17)
541 10 for %trapno (no gcc equivalent)
542 It is not at all clear how we should number the FP stack registers
543 for the x86 architecture. If the version of SDB on x86/svr4 were
544 a bit less brain dead with respect to floating-point then we would
545 have a precedent to follow with respect to DWARF register numbers
546 for x86 FP registers, but the SDB on x86/svr4 is so completely
547 broken with respect to FP registers that it is hardly worth thinking
548 of it as something to strive for compatibility with.
549 The version of x86/svr4 SDB I have at the moment does (partially)
550 seem to believe that DWARF register number 11 is associated with
551 the x86 register %st(0), but that's about all. Higher DWARF
552 register numbers don't seem to be associated with anything in
553 particular, and even for DWARF regno 11, SDB only seems to under-
554 stand that it should say that a variable lives in %st(0) (when
555 asked via an `=' command) if we said it was in DWARF regno 11,
556 but SDB still prints garbage when asked for the value of the
557 variable in question (via a `/' command).
558 (Also note that the labels SDB prints for various FP stack regs
559 when doing an `x' command are all wrong.)
560 Note that these problems generally don't affect the native SVR4
561 C compiler because it doesn't allow the use of -O with -g and
562 because when it is *not* optimizing, it allocates a memory
563 location for each floating-point variable, and the memory
564 location is what gets described in the DWARF AT_location
565 attribute for the variable in question.
566 Regardless of the severe mental illness of the x86/svr4 SDB, we
567 do something sensible here and we use the following DWARF
568 register numbers. Note that these are all stack-top-relative
570 11 for %st(0) (gcc regno = 8)
571 12 for %st(1) (gcc regno = 9)
572 13 for %st(2) (gcc regno = 10)
573 14 for %st(3) (gcc regno = 11)
574 15 for %st(4) (gcc regno = 12)
575 16 for %st(5) (gcc regno = 13)
576 17 for %st(6) (gcc regno = 14)
577 18 for %st(7) (gcc regno = 15)
579 int const svr4_dbx_register_map
[FIRST_PSEUDO_REGISTER
] =
581 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
582 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
583 -1, 9, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
584 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
585 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
586 -1, -1, -1, -1, -1, -1, -1, -1, /* extemded integer registers */
587 -1, -1, -1, -1, -1, -1, -1, -1, /* extemded SSE registers */
590 /* Test and compare insns in i386.md store the information needed to
591 generate branch and scc insns here. */
593 rtx ix86_compare_op0
= NULL_RTX
;
594 rtx ix86_compare_op1
= NULL_RTX
;
596 /* The encoding characters for the four TLS models present in ELF. */
598 static char const tls_model_chars
[] = " GLil";
600 #define MAX_386_STACK_LOCALS 3
601 /* Size of the register save area. */
602 #define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16)
604 /* Define the structure for the machine field in struct function. */
605 struct machine_function
GTY(())
607 rtx stack_locals
[(int) MAX_MACHINE_MODE
][MAX_386_STACK_LOCALS
];
608 const char *some_ld_name
;
609 int save_varrargs_registers
;
610 int accesses_prev_frame
;
613 #define ix86_stack_locals (cfun->machine->stack_locals)
614 #define ix86_save_varrargs_registers (cfun->machine->save_varrargs_registers)
616 /* Structure describing stack frame layout.
617 Stack grows downward:
623 saved frame pointer if frame_pointer_needed
624 <- HARD_FRAME_POINTER
630 > to_allocate <- FRAME_POINTER
642 int outgoing_arguments_size
;
645 HOST_WIDE_INT to_allocate
;
646 /* The offsets relative to ARG_POINTER. */
647 HOST_WIDE_INT frame_pointer_offset
;
648 HOST_WIDE_INT hard_frame_pointer_offset
;
649 HOST_WIDE_INT stack_pointer_offset
;
652 /* Used to enable/disable debugging features. */
653 const char *ix86_debug_arg_string
, *ix86_debug_addr_string
;
654 /* Code model option as passed by user. */
655 const char *ix86_cmodel_string
;
657 enum cmodel ix86_cmodel
;
659 const char *ix86_asm_string
;
660 enum asm_dialect ix86_asm_dialect
= ASM_ATT
;
662 const char *ix86_tls_dialect_string
;
663 enum tls_dialect ix86_tls_dialect
= TLS_DIALECT_GNU
;
665 /* Which unit we are generating floating point math for. */
666 enum fpmath_unit ix86_fpmath
;
668 /* Which cpu are we scheduling for. */
669 enum processor_type ix86_cpu
;
670 /* Which instruction set architecture to use. */
671 enum processor_type ix86_arch
;
673 /* Strings to hold which cpu and instruction set architecture to use. */
674 const char *ix86_cpu_string
; /* for -mcpu=<xxx> */
675 const char *ix86_arch_string
; /* for -march=<xxx> */
676 const char *ix86_fpmath_string
; /* for -mfpmath=<xxx> */
678 /* # of registers to use to pass arguments. */
679 const char *ix86_regparm_string
;
681 /* true if sse prefetch instruction is not NOOP. */
682 int x86_prefetch_sse
;
684 /* ix86_regparm_string as a number */
687 /* Alignment to use for loops and jumps: */
689 /* Power of two alignment for loops. */
690 const char *ix86_align_loops_string
;
692 /* Power of two alignment for non-loop jumps. */
693 const char *ix86_align_jumps_string
;
695 /* Power of two alignment for stack boundary in bytes. */
696 const char *ix86_preferred_stack_boundary_string
;
698 /* Preferred alignment for stack boundary in bits. */
699 int ix86_preferred_stack_boundary
;
701 /* Values 1-5: see jump.c */
702 int ix86_branch_cost
;
703 const char *ix86_branch_cost_string
;
705 /* Power of two alignment for functions. */
706 const char *ix86_align_funcs_string
;
708 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
709 static char internal_label_prefix
[16];
710 static int internal_label_prefix_len
;
712 static int local_symbolic_operand
PARAMS ((rtx
, enum machine_mode
));
713 static int tls_symbolic_operand_1
PARAMS ((rtx
, enum tls_model
));
714 static void output_pic_addr_const
PARAMS ((FILE *, rtx
, int));
715 static void put_condition_code
PARAMS ((enum rtx_code
, enum machine_mode
,
717 static const char *get_some_local_dynamic_name
PARAMS ((void));
718 static int get_some_local_dynamic_name_1
PARAMS ((rtx
*, void *));
719 static rtx maybe_get_pool_constant
PARAMS ((rtx
));
720 static rtx ix86_expand_int_compare
PARAMS ((enum rtx_code
, rtx
, rtx
));
721 static enum rtx_code ix86_prepare_fp_compare_args
PARAMS ((enum rtx_code
,
723 static rtx get_thread_pointer
PARAMS ((void));
724 static void get_pc_thunk_name
PARAMS ((char [32], unsigned int));
725 static rtx gen_push
PARAMS ((rtx
));
726 static int memory_address_length
PARAMS ((rtx addr
));
727 static int ix86_flags_dependant
PARAMS ((rtx
, rtx
, enum attr_type
));
728 static int ix86_agi_dependant
PARAMS ((rtx
, rtx
, enum attr_type
));
729 static enum attr_ppro_uops ix86_safe_ppro_uops
PARAMS ((rtx
));
730 static void ix86_dump_ppro_packet
PARAMS ((FILE *));
731 static void ix86_reorder_insn
PARAMS ((rtx
*, rtx
*));
732 static struct machine_function
* ix86_init_machine_status
PARAMS ((void));
733 static int ix86_split_to_parts
PARAMS ((rtx
, rtx
*, enum machine_mode
));
734 static int ix86_nsaved_regs
PARAMS ((void));
735 static void ix86_emit_save_regs
PARAMS ((void));
736 static void ix86_emit_save_regs_using_mov
PARAMS ((rtx
, HOST_WIDE_INT
));
737 static void ix86_emit_restore_regs_using_mov
PARAMS ((rtx
, int, int));
738 static void ix86_output_function_epilogue
PARAMS ((FILE *, HOST_WIDE_INT
));
739 static void ix86_set_move_mem_attrs_1
PARAMS ((rtx
, rtx
, rtx
, rtx
, rtx
));
740 static void ix86_sched_reorder_ppro
PARAMS ((rtx
*, rtx
*));
741 static HOST_WIDE_INT ix86_GOT_alias_set
PARAMS ((void));
742 static void ix86_adjust_counter
PARAMS ((rtx
, HOST_WIDE_INT
));
743 static rtx ix86_expand_aligntest
PARAMS ((rtx
, int));
744 static void ix86_expand_strlensi_unroll_1
PARAMS ((rtx
, rtx
));
745 static int ix86_issue_rate
PARAMS ((void));
746 static int ix86_adjust_cost
PARAMS ((rtx
, rtx
, rtx
, int));
747 static void ix86_sched_init
PARAMS ((FILE *, int, int));
748 static int ix86_sched_reorder
PARAMS ((FILE *, int, rtx
*, int *, int));
749 static int ix86_variable_issue
PARAMS ((FILE *, int, rtx
, int));
750 static int ia32_use_dfa_pipeline_interface
PARAMS ((void));
751 static int ia32_multipass_dfa_lookahead
PARAMS ((void));
752 static void ix86_init_mmx_sse_builtins
PARAMS ((void));
753 static rtx x86_this_parameter
PARAMS ((tree
));
754 static void x86_output_mi_thunk
PARAMS ((FILE *, tree
, HOST_WIDE_INT
,
755 HOST_WIDE_INT
, tree
));
756 static bool x86_can_output_mi_thunk
PARAMS ((tree
, HOST_WIDE_INT
,
757 HOST_WIDE_INT
, tree
));
761 rtx base
, index
, disp
;
765 static int ix86_decompose_address
PARAMS ((rtx
, struct ix86_address
*));
767 static void ix86_encode_section_info
PARAMS ((tree
, int)) ATTRIBUTE_UNUSED
;
768 static const char *ix86_strip_name_encoding
PARAMS ((const char *))
771 struct builtin_description
;
772 static rtx ix86_expand_sse_comi
PARAMS ((const struct builtin_description
*,
774 static rtx ix86_expand_sse_compare
PARAMS ((const struct builtin_description
*,
776 static rtx ix86_expand_unop1_builtin
PARAMS ((enum insn_code
, tree
, rtx
));
777 static rtx ix86_expand_unop_builtin
PARAMS ((enum insn_code
, tree
, rtx
, int));
778 static rtx ix86_expand_binop_builtin
PARAMS ((enum insn_code
, tree
, rtx
));
779 static rtx ix86_expand_store_builtin
PARAMS ((enum insn_code
, tree
));
780 static rtx safe_vector_operand
PARAMS ((rtx
, enum machine_mode
));
781 static enum rtx_code ix86_fp_compare_code_to_integer
PARAMS ((enum rtx_code
));
782 static void ix86_fp_comparison_codes
PARAMS ((enum rtx_code code
,
786 static rtx ix86_expand_fp_compare
PARAMS ((enum rtx_code
, rtx
, rtx
, rtx
,
788 static int ix86_fp_comparison_arithmetics_cost
PARAMS ((enum rtx_code code
));
789 static int ix86_fp_comparison_fcomi_cost
PARAMS ((enum rtx_code code
));
790 static int ix86_fp_comparison_sahf_cost
PARAMS ((enum rtx_code code
));
791 static int ix86_fp_comparison_cost
PARAMS ((enum rtx_code code
));
792 static unsigned int ix86_select_alt_pic_regnum
PARAMS ((void));
793 static int ix86_save_reg
PARAMS ((unsigned int, int));
794 static void ix86_compute_frame_layout
PARAMS ((struct ix86_frame
*));
795 static int ix86_comp_type_attributes
PARAMS ((tree
, tree
));
796 static int ix86_fntype_regparm
PARAMS ((tree
));
797 const struct attribute_spec ix86_attribute_table
[];
798 static tree ix86_handle_cdecl_attribute
PARAMS ((tree
*, tree
, tree
, int, bool *));
799 static tree ix86_handle_regparm_attribute
PARAMS ((tree
*, tree
, tree
, int, bool *));
800 static int ix86_value_regno
PARAMS ((enum machine_mode
));
802 #if defined (DO_GLOBAL_CTORS_BODY) && defined (HAS_INIT_SECTION)
803 static void ix86_svr3_asm_out_constructor
PARAMS ((rtx
, int));
806 /* Register class used for passing given 64bit part of the argument.
807 These represent classes as documented by the PS ABI, with the exception
808 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
809 use SF or DFmode move instead of DImode to avoid reformating penalties.
811 Similary we play games with INTEGERSI_CLASS to use cheaper SImode moves
812 whenever possible (upper half does contain padding).
814 enum x86_64_reg_class
817 X86_64_INTEGER_CLASS
,
818 X86_64_INTEGERSI_CLASS
,
827 static const char * const x86_64_reg_class_name
[] =
828 {"no", "integer", "integerSI", "sse", "sseSF", "sseDF", "sseup", "x87", "x87up", "no"};
830 #define MAX_CLASSES 4
831 static int classify_argument
PARAMS ((enum machine_mode
, tree
,
832 enum x86_64_reg_class
[MAX_CLASSES
],
834 static int examine_argument
PARAMS ((enum machine_mode
, tree
, int, int *,
836 static rtx construct_container
PARAMS ((enum machine_mode
, tree
, int, int, int,
838 static enum x86_64_reg_class merge_classes
PARAMS ((enum x86_64_reg_class
,
839 enum x86_64_reg_class
));
841 /* Initialize the GCC target structure. */
842 #undef TARGET_ATTRIBUTE_TABLE
843 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
844 #ifdef TARGET_DLLIMPORT_DECL_ATTRIBUTES
845 # undef TARGET_MERGE_DECL_ATTRIBUTES
846 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
849 #undef TARGET_COMP_TYPE_ATTRIBUTES
850 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
852 #undef TARGET_INIT_BUILTINS
853 #define TARGET_INIT_BUILTINS ix86_init_builtins
855 #undef TARGET_EXPAND_BUILTIN
856 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
858 #undef TARGET_ASM_FUNCTION_EPILOGUE
859 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
861 #undef TARGET_ASM_OPEN_PAREN
862 #define TARGET_ASM_OPEN_PAREN ""
863 #undef TARGET_ASM_CLOSE_PAREN
864 #define TARGET_ASM_CLOSE_PAREN ""
866 #undef TARGET_ASM_ALIGNED_HI_OP
867 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
868 #undef TARGET_ASM_ALIGNED_SI_OP
869 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
871 #undef TARGET_ASM_ALIGNED_DI_OP
872 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
875 #undef TARGET_ASM_UNALIGNED_HI_OP
876 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
877 #undef TARGET_ASM_UNALIGNED_SI_OP
878 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
879 #undef TARGET_ASM_UNALIGNED_DI_OP
880 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
882 #undef TARGET_SCHED_ADJUST_COST
883 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
884 #undef TARGET_SCHED_ISSUE_RATE
885 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
886 #undef TARGET_SCHED_VARIABLE_ISSUE
887 #define TARGET_SCHED_VARIABLE_ISSUE ix86_variable_issue
888 #undef TARGET_SCHED_INIT
889 #define TARGET_SCHED_INIT ix86_sched_init
890 #undef TARGET_SCHED_REORDER
891 #define TARGET_SCHED_REORDER ix86_sched_reorder
892 #undef TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE
893 #define TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE \
894 ia32_use_dfa_pipeline_interface
895 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
896 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
897 ia32_multipass_dfa_lookahead
900 #undef TARGET_HAVE_TLS
901 #define TARGET_HAVE_TLS true
904 #undef TARGET_ASM_OUTPUT_MI_THUNK
905 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
906 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
907 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
909 struct gcc_target targetm
= TARGET_INITIALIZER
;
911 /* Sometimes certain combinations of command options do not make
912 sense on a particular target machine. You can define a macro
913 `OVERRIDE_OPTIONS' to take account of this. This macro, if
914 defined, is executed once just after all the command options have
917 Don't use this macro to turn on various extra optimizations for
918 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
924 /* Comes from final.c -- no real reason to change it. */
925 #define MAX_CODE_ALIGN 16
929 const struct processor_costs
*cost
; /* Processor costs */
930 const int target_enable
; /* Target flags to enable. */
931 const int target_disable
; /* Target flags to disable. */
932 const int align_loop
; /* Default alignments. */
933 const int align_loop_max_skip
;
934 const int align_jump
;
935 const int align_jump_max_skip
;
936 const int align_func
;
937 const int branch_cost
;
939 const processor_target_table
[PROCESSOR_max
] =
941 {&i386_cost
, 0, 0, 4, 3, 4, 3, 4, 1},
942 {&i486_cost
, 0, 0, 16, 15, 16, 15, 16, 1},
943 {&pentium_cost
, 0, 0, 16, 7, 16, 7, 16, 1},
944 {&pentiumpro_cost
, 0, 0, 16, 15, 16, 7, 16, 1},
945 {&k6_cost
, 0, 0, 32, 7, 32, 7, 32, 1},
946 {&athlon_cost
, 0, 0, 16, 7, 64, 7, 16, 1},
947 {&pentium4_cost
, 0, 0, 0, 0, 0, 0, 0, 1}
950 static const char * const cpu_names
[] = TARGET_CPU_DEFAULT_NAMES
;
953 const char *const name
; /* processor name or nickname. */
954 const enum processor_type processor
;
960 PTA_PREFETCH_SSE
= 8,
965 const processor_alias_table
[] =
967 {"i386", PROCESSOR_I386
, 0},
968 {"i486", PROCESSOR_I486
, 0},
969 {"i586", PROCESSOR_PENTIUM
, 0},
970 {"pentium", PROCESSOR_PENTIUM
, 0},
971 {"pentium-mmx", PROCESSOR_PENTIUM
, PTA_MMX
},
972 {"winchip-c6", PROCESSOR_I486
, PTA_MMX
},
973 {"winchip2", PROCESSOR_I486
, PTA_MMX
| PTA_3DNOW
},
974 {"c3", PROCESSOR_I486
, PTA_MMX
| PTA_3DNOW
},
975 {"i686", PROCESSOR_PENTIUMPRO
, 0},
976 {"pentiumpro", PROCESSOR_PENTIUMPRO
, 0},
977 {"pentium2", PROCESSOR_PENTIUMPRO
, PTA_MMX
},
978 {"pentium3", PROCESSOR_PENTIUMPRO
, PTA_MMX
| PTA_SSE
| PTA_PREFETCH_SSE
},
979 {"pentium4", PROCESSOR_PENTIUM4
, PTA_SSE
| PTA_SSE2
|
980 PTA_MMX
| PTA_PREFETCH_SSE
},
981 {"k6", PROCESSOR_K6
, PTA_MMX
},
982 {"k6-2", PROCESSOR_K6
, PTA_MMX
| PTA_3DNOW
},
983 {"k6-3", PROCESSOR_K6
, PTA_MMX
| PTA_3DNOW
},
984 {"athlon", PROCESSOR_ATHLON
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
986 {"athlon-tbird", PROCESSOR_ATHLON
, PTA_MMX
| PTA_PREFETCH_SSE
987 | PTA_3DNOW
| PTA_3DNOW_A
},
988 {"athlon-4", PROCESSOR_ATHLON
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
989 | PTA_3DNOW_A
| PTA_SSE
},
990 {"athlon-xp", PROCESSOR_ATHLON
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
991 | PTA_3DNOW_A
| PTA_SSE
},
992 {"athlon-mp", PROCESSOR_ATHLON
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
993 | PTA_3DNOW_A
| PTA_SSE
},
996 int const pta_size
= ARRAY_SIZE (processor_alias_table
);
998 /* By default our XFmode is the 80-bit extended format. If we have
999 use TFmode instead, it's also the 80-bit format, but with padding. */
1000 real_format_for_mode
[XFmode
- QFmode
] = &ieee_extended_intel_96_format
;
1001 real_format_for_mode
[TFmode
- QFmode
] = &ieee_extended_intel_128_format
;
1003 /* Set the default values for switches whose default depends on TARGET_64BIT
1004 in case they weren't overwriten by command line options. */
1007 if (flag_omit_frame_pointer
== 2)
1008 flag_omit_frame_pointer
= 1;
1009 if (flag_asynchronous_unwind_tables
== 2)
1010 flag_asynchronous_unwind_tables
= 1;
1011 if (flag_pcc_struct_return
== 2)
1012 flag_pcc_struct_return
= 0;
1016 if (flag_omit_frame_pointer
== 2)
1017 flag_omit_frame_pointer
= 0;
1018 if (flag_asynchronous_unwind_tables
== 2)
1019 flag_asynchronous_unwind_tables
= 0;
1020 if (flag_pcc_struct_return
== 2)
1021 flag_pcc_struct_return
= 1;
1024 #ifdef SUBTARGET_OVERRIDE_OPTIONS
1025 SUBTARGET_OVERRIDE_OPTIONS
;
1028 if (!ix86_cpu_string
&& ix86_arch_string
)
1029 ix86_cpu_string
= ix86_arch_string
;
1030 if (!ix86_cpu_string
)
1031 ix86_cpu_string
= cpu_names
[TARGET_CPU_DEFAULT
];
1032 if (!ix86_arch_string
)
1033 ix86_arch_string
= TARGET_64BIT
? "athlon-4" : "i386";
1035 if (ix86_cmodel_string
!= 0)
1037 if (!strcmp (ix86_cmodel_string
, "small"))
1038 ix86_cmodel
= flag_pic
? CM_SMALL_PIC
: CM_SMALL
;
1040 sorry ("code model %s not supported in PIC mode", ix86_cmodel_string
);
1041 else if (!strcmp (ix86_cmodel_string
, "32"))
1042 ix86_cmodel
= CM_32
;
1043 else if (!strcmp (ix86_cmodel_string
, "kernel") && !flag_pic
)
1044 ix86_cmodel
= CM_KERNEL
;
1045 else if (!strcmp (ix86_cmodel_string
, "medium") && !flag_pic
)
1046 ix86_cmodel
= CM_MEDIUM
;
1047 else if (!strcmp (ix86_cmodel_string
, "large") && !flag_pic
)
1048 ix86_cmodel
= CM_LARGE
;
1050 error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string
);
1054 ix86_cmodel
= CM_32
;
1056 ix86_cmodel
= flag_pic
? CM_SMALL_PIC
: CM_SMALL
;
1058 if (ix86_asm_string
!= 0)
1060 if (!strcmp (ix86_asm_string
, "intel"))
1061 ix86_asm_dialect
= ASM_INTEL
;
1062 else if (!strcmp (ix86_asm_string
, "att"))
1063 ix86_asm_dialect
= ASM_ATT
;
1065 error ("bad value (%s) for -masm= switch", ix86_asm_string
);
1067 if ((TARGET_64BIT
== 0) != (ix86_cmodel
== CM_32
))
1068 error ("code model `%s' not supported in the %s bit mode",
1069 ix86_cmodel_string
, TARGET_64BIT
? "64" : "32");
1070 if (ix86_cmodel
== CM_LARGE
)
1071 sorry ("code model `large' not supported yet");
1072 if ((TARGET_64BIT
!= 0) != ((target_flags
& MASK_64BIT
) != 0))
1073 sorry ("%i-bit mode not compiled in",
1074 (target_flags
& MASK_64BIT
) ? 64 : 32);
1076 for (i
= 0; i
< pta_size
; i
++)
1077 if (! strcmp (ix86_arch_string
, processor_alias_table
[i
].name
))
1079 ix86_arch
= processor_alias_table
[i
].processor
;
1080 /* Default cpu tuning to the architecture. */
1081 ix86_cpu
= ix86_arch
;
1082 if (processor_alias_table
[i
].flags
& PTA_MMX
1083 && !(target_flags_explicit
& MASK_MMX
))
1084 target_flags
|= MASK_MMX
;
1085 if (processor_alias_table
[i
].flags
& PTA_3DNOW
1086 && !(target_flags_explicit
& MASK_3DNOW
))
1087 target_flags
|= MASK_3DNOW
;
1088 if (processor_alias_table
[i
].flags
& PTA_3DNOW_A
1089 && !(target_flags_explicit
& MASK_3DNOW_A
))
1090 target_flags
|= MASK_3DNOW_A
;
1091 if (processor_alias_table
[i
].flags
& PTA_SSE
1092 && !(target_flags_explicit
& MASK_SSE
))
1093 target_flags
|= MASK_SSE
;
1094 if (processor_alias_table
[i
].flags
& PTA_SSE2
1095 && !(target_flags_explicit
& MASK_SSE2
))
1096 target_flags
|= MASK_SSE2
;
1097 if (processor_alias_table
[i
].flags
& PTA_PREFETCH_SSE
)
1098 x86_prefetch_sse
= true;
1103 error ("bad value (%s) for -march= switch", ix86_arch_string
);
1105 for (i
= 0; i
< pta_size
; i
++)
1106 if (! strcmp (ix86_cpu_string
, processor_alias_table
[i
].name
))
1108 ix86_cpu
= processor_alias_table
[i
].processor
;
1111 if (processor_alias_table
[i
].flags
& PTA_PREFETCH_SSE
)
1112 x86_prefetch_sse
= true;
1114 error ("bad value (%s) for -mcpu= switch", ix86_cpu_string
);
1117 ix86_cost
= &size_cost
;
1119 ix86_cost
= processor_target_table
[ix86_cpu
].cost
;
1120 target_flags
|= processor_target_table
[ix86_cpu
].target_enable
;
1121 target_flags
&= ~processor_target_table
[ix86_cpu
].target_disable
;
1123 /* Arrange to set up i386_stack_locals for all functions. */
1124 init_machine_status
= ix86_init_machine_status
;
1126 /* Validate -mregparm= value. */
1127 if (ix86_regparm_string
)
1129 i
= atoi (ix86_regparm_string
);
1130 if (i
< 0 || i
> REGPARM_MAX
)
1131 error ("-mregparm=%d is not between 0 and %d", i
, REGPARM_MAX
);
1137 ix86_regparm
= REGPARM_MAX
;
1139 /* If the user has provided any of the -malign-* options,
1140 warn and use that value only if -falign-* is not set.
1141 Remove this code in GCC 3.2 or later. */
1142 if (ix86_align_loops_string
)
1144 warning ("-malign-loops is obsolete, use -falign-loops");
1145 if (align_loops
== 0)
1147 i
= atoi (ix86_align_loops_string
);
1148 if (i
< 0 || i
> MAX_CODE_ALIGN
)
1149 error ("-malign-loops=%d is not between 0 and %d", i
, MAX_CODE_ALIGN
);
1151 align_loops
= 1 << i
;
1155 if (ix86_align_jumps_string
)
1157 warning ("-malign-jumps is obsolete, use -falign-jumps");
1158 if (align_jumps
== 0)
1160 i
= atoi (ix86_align_jumps_string
);
1161 if (i
< 0 || i
> MAX_CODE_ALIGN
)
1162 error ("-malign-loops=%d is not between 0 and %d", i
, MAX_CODE_ALIGN
);
1164 align_jumps
= 1 << i
;
1168 if (ix86_align_funcs_string
)
1170 warning ("-malign-functions is obsolete, use -falign-functions");
1171 if (align_functions
== 0)
1173 i
= atoi (ix86_align_funcs_string
);
1174 if (i
< 0 || i
> MAX_CODE_ALIGN
)
1175 error ("-malign-loops=%d is not between 0 and %d", i
, MAX_CODE_ALIGN
);
1177 align_functions
= 1 << i
;
1181 /* Default align_* from the processor table. */
1182 if (align_loops
== 0)
1184 align_loops
= processor_target_table
[ix86_cpu
].align_loop
;
1185 align_loops_max_skip
= processor_target_table
[ix86_cpu
].align_loop_max_skip
;
1187 if (align_jumps
== 0)
1189 align_jumps
= processor_target_table
[ix86_cpu
].align_jump
;
1190 align_jumps_max_skip
= processor_target_table
[ix86_cpu
].align_jump_max_skip
;
1192 if (align_functions
== 0)
1194 align_functions
= processor_target_table
[ix86_cpu
].align_func
;
1197 /* Validate -mpreferred-stack-boundary= value, or provide default.
1198 The default of 128 bits is for Pentium III's SSE __m128, but we
1199 don't want additional code to keep the stack aligned when
1200 optimizing for code size. */
1201 ix86_preferred_stack_boundary
= (optimize_size
1202 ? TARGET_64BIT
? 128 : 32
1204 if (ix86_preferred_stack_boundary_string
)
1206 i
= atoi (ix86_preferred_stack_boundary_string
);
1207 if (i
< (TARGET_64BIT
? 4 : 2) || i
> 12)
1208 error ("-mpreferred-stack-boundary=%d is not between %d and 12", i
,
1209 TARGET_64BIT
? 4 : 2);
1211 ix86_preferred_stack_boundary
= (1 << i
) * BITS_PER_UNIT
;
1214 /* Validate -mbranch-cost= value, or provide default. */
1215 ix86_branch_cost
= processor_target_table
[ix86_cpu
].branch_cost
;
1216 if (ix86_branch_cost_string
)
1218 i
= atoi (ix86_branch_cost_string
);
1220 error ("-mbranch-cost=%d is not between 0 and 5", i
);
1222 ix86_branch_cost
= i
;
1225 if (ix86_tls_dialect_string
)
1227 if (strcmp (ix86_tls_dialect_string
, "gnu") == 0)
1228 ix86_tls_dialect
= TLS_DIALECT_GNU
;
1229 else if (strcmp (ix86_tls_dialect_string
, "sun") == 0)
1230 ix86_tls_dialect
= TLS_DIALECT_SUN
;
1232 error ("bad value (%s) for -mtls-dialect= switch",
1233 ix86_tls_dialect_string
);
1236 /* Keep nonleaf frame pointers. */
1237 if (TARGET_OMIT_LEAF_FRAME_POINTER
)
1238 flag_omit_frame_pointer
= 1;
1240 /* If we're doing fast math, we don't care about comparison order
1241 wrt NaNs. This lets us use a shorter comparison sequence. */
1242 if (flag_unsafe_math_optimizations
)
1243 target_flags
&= ~MASK_IEEE_FP
;
1245 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
1246 since the insns won't need emulation. */
1247 if (x86_arch_always_fancy_math_387
& (1 << ix86_arch
))
1248 target_flags
&= ~MASK_NO_FANCY_MATH_387
;
1252 if (TARGET_ALIGN_DOUBLE
)
1253 error ("-malign-double makes no sense in the 64bit mode");
1255 error ("-mrtd calling convention not supported in the 64bit mode");
1256 /* Enable by default the SSE and MMX builtins. */
1257 target_flags
|= (MASK_SSE2
| MASK_SSE
| MASK_MMX
| MASK_128BIT_LONG_DOUBLE
);
1258 ix86_fpmath
= FPMATH_SSE
;
1261 ix86_fpmath
= FPMATH_387
;
1263 if (ix86_fpmath_string
!= 0)
1265 if (! strcmp (ix86_fpmath_string
, "387"))
1266 ix86_fpmath
= FPMATH_387
;
1267 else if (! strcmp (ix86_fpmath_string
, "sse"))
1271 warning ("SSE instruction set disabled, using 387 arithmetics");
1272 ix86_fpmath
= FPMATH_387
;
1275 ix86_fpmath
= FPMATH_SSE
;
1277 else if (! strcmp (ix86_fpmath_string
, "387,sse")
1278 || ! strcmp (ix86_fpmath_string
, "sse,387"))
1282 warning ("SSE instruction set disabled, using 387 arithmetics");
1283 ix86_fpmath
= FPMATH_387
;
1285 else if (!TARGET_80387
)
1287 warning ("387 instruction set disabled, using SSE arithmetics");
1288 ix86_fpmath
= FPMATH_SSE
;
1291 ix86_fpmath
= FPMATH_SSE
| FPMATH_387
;
1294 error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string
);
1297 /* It makes no sense to ask for just SSE builtins, so MMX is also turned
1301 target_flags
|= MASK_MMX
;
1302 x86_prefetch_sse
= true;
1305 /* If it has 3DNow! it also has MMX so MMX is also turned on by -m3dnow */
1308 target_flags
|= MASK_MMX
;
1309 /* If we are targetting the Athlon architecture, enable the 3Dnow/MMX
1310 extensions it adds. */
1311 if (x86_3dnow_a
& (1 << ix86_arch
))
1312 target_flags
|= MASK_3DNOW_A
;
1314 if ((x86_accumulate_outgoing_args
& CPUMASK
)
1315 && !(target_flags_explicit
& MASK_ACCUMULATE_OUTGOING_ARGS
)
1317 target_flags
|= MASK_ACCUMULATE_OUTGOING_ARGS
;
1319 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
1322 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix
, "LX", 0);
1323 p
= strchr (internal_label_prefix
, 'X');
1324 internal_label_prefix_len
= p
- internal_label_prefix
;
1330 optimization_options (level
, size
)
1332 int size ATTRIBUTE_UNUSED
;
1334 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
1335 make the problem with not enough registers even worse. */
1336 #ifdef INSN_SCHEDULING
1338 flag_schedule_insns
= 0;
1341 /* The default values of these switches depend on the TARGET_64BIT
1342 that is not known at this moment. Mark these values with 2 and
1343 let user the to override these. In case there is no command line option
1344 specifying them, we will set the defaults in override_options. */
1346 flag_omit_frame_pointer
= 2;
1347 flag_pcc_struct_return
= 2;
1348 flag_asynchronous_unwind_tables
= 2;
1351 /* Table of valid machine attributes. */
1352 const struct attribute_spec ix86_attribute_table
[] =
1354 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
1355 /* Stdcall attribute says callee is responsible for popping arguments
1356 if they are not variable. */
1357 { "stdcall", 0, 0, false, true, true, ix86_handle_cdecl_attribute
},
1358 /* Cdecl attribute says the callee is a normal C declaration */
1359 { "cdecl", 0, 0, false, true, true, ix86_handle_cdecl_attribute
},
1360 /* Regparm attribute specifies how many integer arguments are to be
1361 passed in registers. */
1362 { "regparm", 1, 1, false, true, true, ix86_handle_regparm_attribute
},
1363 #ifdef TARGET_DLLIMPORT_DECL_ATTRIBUTES
1364 { "dllimport", 0, 0, false, false, false, ix86_handle_dll_attribute
},
1365 { "dllexport", 0, 0, false, false, false, ix86_handle_dll_attribute
},
1366 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute
},
1368 { NULL
, 0, 0, false, false, false, NULL
}
1371 /* Handle a "cdecl" or "stdcall" attribute;
1372 arguments as in struct attribute_spec.handler. */
1374 ix86_handle_cdecl_attribute (node
, name
, args
, flags
, no_add_attrs
)
1377 tree args ATTRIBUTE_UNUSED
;
1378 int flags ATTRIBUTE_UNUSED
;
1381 if (TREE_CODE (*node
) != FUNCTION_TYPE
1382 && TREE_CODE (*node
) != METHOD_TYPE
1383 && TREE_CODE (*node
) != FIELD_DECL
1384 && TREE_CODE (*node
) != TYPE_DECL
)
1386 warning ("`%s' attribute only applies to functions",
1387 IDENTIFIER_POINTER (name
));
1388 *no_add_attrs
= true;
1393 warning ("`%s' attribute ignored", IDENTIFIER_POINTER (name
));
1394 *no_add_attrs
= true;
1400 /* Handle a "regparm" attribute;
1401 arguments as in struct attribute_spec.handler. */
1403 ix86_handle_regparm_attribute (node
, name
, args
, flags
, no_add_attrs
)
1407 int flags ATTRIBUTE_UNUSED
;
1410 if (TREE_CODE (*node
) != FUNCTION_TYPE
1411 && TREE_CODE (*node
) != METHOD_TYPE
1412 && TREE_CODE (*node
) != FIELD_DECL
1413 && TREE_CODE (*node
) != TYPE_DECL
)
1415 warning ("`%s' attribute only applies to functions",
1416 IDENTIFIER_POINTER (name
));
1417 *no_add_attrs
= true;
1423 cst
= TREE_VALUE (args
);
1424 if (TREE_CODE (cst
) != INTEGER_CST
)
1426 warning ("`%s' attribute requires an integer constant argument",
1427 IDENTIFIER_POINTER (name
));
1428 *no_add_attrs
= true;
1430 else if (compare_tree_int (cst
, REGPARM_MAX
) > 0)
1432 warning ("argument to `%s' attribute larger than %d",
1433 IDENTIFIER_POINTER (name
), REGPARM_MAX
);
1434 *no_add_attrs
= true;
1441 /* Return 0 if the attributes for two types are incompatible, 1 if they
1442 are compatible, and 2 if they are nearly compatible (which causes a
1443 warning to be generated). */
1446 ix86_comp_type_attributes (type1
, type2
)
1450 /* Check for mismatch of non-default calling convention. */
1451 const char *const rtdstr
= TARGET_RTD
? "cdecl" : "stdcall";
1453 if (TREE_CODE (type1
) != FUNCTION_TYPE
)
1456 /* Check for mismatched return types (cdecl vs stdcall). */
1457 if (!lookup_attribute (rtdstr
, TYPE_ATTRIBUTES (type1
))
1458 != !lookup_attribute (rtdstr
, TYPE_ATTRIBUTES (type2
)))
1463 /* Return the regparm value for a fuctio with the indicated TYPE. */
1466 ix86_fntype_regparm (type
)
1471 attr
= lookup_attribute ("regparm", TYPE_ATTRIBUTES (type
));
1473 return TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr
)));
1475 return ix86_regparm
;
1478 /* Value is the number of bytes of arguments automatically
1479 popped when returning from a subroutine call.
1480 FUNDECL is the declaration node of the function (as a tree),
1481 FUNTYPE is the data type of the function (as a tree),
1482 or for a library call it is an identifier node for the subroutine name.
1483 SIZE is the number of bytes of arguments passed on the stack.
1485 On the 80386, the RTD insn may be used to pop them if the number
1486 of args is fixed, but if the number is variable then the caller
1487 must pop them all. RTD can't be used for library calls now
1488 because the library is compiled with the Unix compiler.
1489 Use of RTD is a selectable option, since it is incompatible with
1490 standard Unix calling sequences. If the option is not selected,
1491 the caller must always pop the args.
1493 The attribute stdcall is equivalent to RTD on a per module basis. */
1496 ix86_return_pops_args (fundecl
, funtype
, size
)
1501 int rtd
= TARGET_RTD
&& (!fundecl
|| TREE_CODE (fundecl
) != IDENTIFIER_NODE
);
1503 /* Cdecl functions override -mrtd, and never pop the stack. */
1504 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype
))) {
1506 /* Stdcall functions will pop the stack if not variable args. */
1507 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype
)))
1511 && (TYPE_ARG_TYPES (funtype
) == NULL_TREE
1512 || (TREE_VALUE (tree_last (TYPE_ARG_TYPES (funtype
)))
1513 == void_type_node
)))
1517 /* Lose any fake structure return argument if it is passed on the stack. */
1518 if (aggregate_value_p (TREE_TYPE (funtype
))
1521 int nregs
= ix86_fntype_regparm (funtype
);
1524 return GET_MODE_SIZE (Pmode
);
1530 /* Argument support functions. */
1532 /* Return true when register may be used to pass function parameters. */
1534 ix86_function_arg_regno_p (regno
)
1539 return (regno
< REGPARM_MAX
1540 || (TARGET_SSE
&& SSE_REGNO_P (regno
) && !fixed_regs
[regno
]));
1541 if (SSE_REGNO_P (regno
) && TARGET_SSE
)
1543 /* RAX is used as hidden argument to va_arg functions. */
1546 for (i
= 0; i
< REGPARM_MAX
; i
++)
1547 if (regno
== x86_64_int_parameter_registers
[i
])
1552 /* Initialize a variable CUM of type CUMULATIVE_ARGS
1553 for a call to a function whose data type is FNTYPE.
1554 For a library call, FNTYPE is 0. */
1557 init_cumulative_args (cum
, fntype
, libname
)
1558 CUMULATIVE_ARGS
*cum
; /* Argument info to initialize */
1559 tree fntype
; /* tree ptr for function decl */
1560 rtx libname
; /* SYMBOL_REF of library name or 0 */
1562 static CUMULATIVE_ARGS zero_cum
;
1563 tree param
, next_param
;
1565 if (TARGET_DEBUG_ARG
)
1567 fprintf (stderr
, "\ninit_cumulative_args (");
1569 fprintf (stderr
, "fntype code = %s, ret code = %s",
1570 tree_code_name
[(int) TREE_CODE (fntype
)],
1571 tree_code_name
[(int) TREE_CODE (TREE_TYPE (fntype
))]);
1573 fprintf (stderr
, "no fntype");
1576 fprintf (stderr
, ", libname = %s", XSTR (libname
, 0));
1581 /* Set up the number of registers to use for passing arguments. */
1582 cum
->nregs
= ix86_regparm
;
1583 cum
->sse_nregs
= SSE_REGPARM_MAX
;
1584 if (fntype
&& !TARGET_64BIT
)
1586 tree attr
= lookup_attribute ("regparm", TYPE_ATTRIBUTES (fntype
));
1589 cum
->nregs
= TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr
)));
1591 cum
->maybe_vaarg
= false;
1593 /* Determine if this function has variable arguments. This is
1594 indicated by the last argument being 'void_type_mode' if there
1595 are no variable arguments. If there are variable arguments, then
1596 we won't pass anything in registers */
1600 for (param
= (fntype
) ? TYPE_ARG_TYPES (fntype
) : 0;
1601 param
!= 0; param
= next_param
)
1603 next_param
= TREE_CHAIN (param
);
1604 if (next_param
== 0 && TREE_VALUE (param
) != void_type_node
)
1608 cum
->maybe_vaarg
= true;
1612 if ((!fntype
&& !libname
)
1613 || (fntype
&& !TYPE_ARG_TYPES (fntype
)))
1614 cum
->maybe_vaarg
= 1;
1616 if (TARGET_DEBUG_ARG
)
1617 fprintf (stderr
, ", nregs=%d )\n", cum
->nregs
);
1622 /* x86-64 register passing impleemntation. See x86-64 ABI for details. Goal
1623 of this code is to classify each 8bytes of incoming argument by the register
1624 class and assign registers accordingly. */
1626 /* Return the union class of CLASS1 and CLASS2.
1627 See the x86-64 PS ABI for details. */
1629 static enum x86_64_reg_class
1630 merge_classes (class1
, class2
)
1631 enum x86_64_reg_class class1
, class2
;
1633 /* Rule #1: If both classes are equal, this is the resulting class. */
1634 if (class1
== class2
)
1637 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
1639 if (class1
== X86_64_NO_CLASS
)
1641 if (class2
== X86_64_NO_CLASS
)
1644 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
1645 if (class1
== X86_64_MEMORY_CLASS
|| class2
== X86_64_MEMORY_CLASS
)
1646 return X86_64_MEMORY_CLASS
;
1648 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
1649 if ((class1
== X86_64_INTEGERSI_CLASS
&& class2
== X86_64_SSESF_CLASS
)
1650 || (class2
== X86_64_INTEGERSI_CLASS
&& class1
== X86_64_SSESF_CLASS
))
1651 return X86_64_INTEGERSI_CLASS
;
1652 if (class1
== X86_64_INTEGER_CLASS
|| class1
== X86_64_INTEGERSI_CLASS
1653 || class2
== X86_64_INTEGER_CLASS
|| class2
== X86_64_INTEGERSI_CLASS
)
1654 return X86_64_INTEGER_CLASS
;
1656 /* Rule #5: If one of the classes is X87 or X87UP class, MEMORY is used. */
1657 if (class1
== X86_64_X87_CLASS
|| class1
== X86_64_X87UP_CLASS
1658 || class2
== X86_64_X87_CLASS
|| class2
== X86_64_X87UP_CLASS
)
1659 return X86_64_MEMORY_CLASS
;
1661 /* Rule #6: Otherwise class SSE is used. */
1662 return X86_64_SSE_CLASS
;
1665 /* Classify the argument of type TYPE and mode MODE.
1666 CLASSES will be filled by the register class used to pass each word
1667 of the operand. The number of words is returned. In case the parameter
1668 should be passed in memory, 0 is returned. As a special case for zero
1669 sized containers, classes[0] will be NO_CLASS and 1 is returned.
1671 BIT_OFFSET is used internally for handling records and specifies offset
1672 of the offset in bits modulo 256 to avoid overflow cases.
1674 See the x86-64 PS ABI for details.
1678 classify_argument (mode
, type
, classes
, bit_offset
)
1679 enum machine_mode mode
;
1681 enum x86_64_reg_class classes
[MAX_CLASSES
];
1685 (mode
== BLKmode
) ? int_size_in_bytes (type
) : (int) GET_MODE_SIZE (mode
);
1686 int words
= (bytes
+ (bit_offset
% 64) / 8 + UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
1688 /* Variable sized entities are always passed/returned in memory. */
1692 if (type
&& AGGREGATE_TYPE_P (type
))
1696 enum x86_64_reg_class subclasses
[MAX_CLASSES
];
1698 /* On x86-64 we pass structures larger than 16 bytes on the stack. */
1702 for (i
= 0; i
< words
; i
++)
1703 classes
[i
] = X86_64_NO_CLASS
;
1705 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
1706 signalize memory class, so handle it as special case. */
1709 classes
[0] = X86_64_NO_CLASS
;
1713 /* Classify each field of record and merge classes. */
1714 if (TREE_CODE (type
) == RECORD_TYPE
)
1716 /* For classes first merge in the field of the subclasses. */
1717 if (TYPE_BINFO (type
) != NULL
&& TYPE_BINFO_BASETYPES (type
) != NULL
)
1719 tree bases
= TYPE_BINFO_BASETYPES (type
);
1720 int n_bases
= TREE_VEC_LENGTH (bases
);
1723 for (i
= 0; i
< n_bases
; ++i
)
1725 tree binfo
= TREE_VEC_ELT (bases
, i
);
1727 int offset
= tree_low_cst (BINFO_OFFSET (binfo
), 0) * 8;
1728 tree type
= BINFO_TYPE (binfo
);
1730 num
= classify_argument (TYPE_MODE (type
),
1732 (offset
+ bit_offset
) % 256);
1735 for (i
= 0; i
< num
; i
++)
1737 int pos
= (offset
+ (bit_offset
% 64)) / 8 / 8;
1739 merge_classes (subclasses
[i
], classes
[i
+ pos
]);
1743 /* And now merge the fields of structure. */
1744 for (field
= TYPE_FIELDS (type
); field
; field
= TREE_CHAIN (field
))
1746 if (TREE_CODE (field
) == FIELD_DECL
)
1750 /* Bitfields are always classified as integer. Handle them
1751 early, since later code would consider them to be
1752 misaligned integers. */
1753 if (DECL_BIT_FIELD (field
))
1755 for (i
= int_bit_position (field
) / 8 / 8;
1756 i
< (int_bit_position (field
)
1757 + tree_low_cst (DECL_SIZE (field
), 0)
1760 merge_classes (X86_64_INTEGER_CLASS
,
1765 num
= classify_argument (TYPE_MODE (TREE_TYPE (field
)),
1766 TREE_TYPE (field
), subclasses
,
1767 (int_bit_position (field
)
1768 + bit_offset
) % 256);
1771 for (i
= 0; i
< num
; i
++)
1774 (int_bit_position (field
) + (bit_offset
% 64)) / 8 / 8;
1776 merge_classes (subclasses
[i
], classes
[i
+ pos
]);
1782 /* Arrays are handled as small records. */
1783 else if (TREE_CODE (type
) == ARRAY_TYPE
)
1786 num
= classify_argument (TYPE_MODE (TREE_TYPE (type
)),
1787 TREE_TYPE (type
), subclasses
, bit_offset
);
1791 /* The partial classes are now full classes. */
1792 if (subclasses
[0] == X86_64_SSESF_CLASS
&& bytes
!= 4)
1793 subclasses
[0] = X86_64_SSE_CLASS
;
1794 if (subclasses
[0] == X86_64_INTEGERSI_CLASS
&& bytes
!= 4)
1795 subclasses
[0] = X86_64_INTEGER_CLASS
;
1797 for (i
= 0; i
< words
; i
++)
1798 classes
[i
] = subclasses
[i
% num
];
1800 /* Unions are similar to RECORD_TYPE but offset is always 0. */
1801 else if (TREE_CODE (type
) == UNION_TYPE
1802 || TREE_CODE (type
) == QUAL_UNION_TYPE
)
1804 /* For classes first merge in the field of the subclasses. */
1805 if (TYPE_BINFO (type
) != NULL
&& TYPE_BINFO_BASETYPES (type
) != NULL
)
1807 tree bases
= TYPE_BINFO_BASETYPES (type
);
1808 int n_bases
= TREE_VEC_LENGTH (bases
);
1811 for (i
= 0; i
< n_bases
; ++i
)
1813 tree binfo
= TREE_VEC_ELT (bases
, i
);
1815 int offset
= tree_low_cst (BINFO_OFFSET (binfo
), 0) * 8;
1816 tree type
= BINFO_TYPE (binfo
);
1818 num
= classify_argument (TYPE_MODE (type
),
1820 (offset
+ (bit_offset
% 64)) % 256);
1823 for (i
= 0; i
< num
; i
++)
1825 int pos
= (offset
+ (bit_offset
% 64)) / 8 / 8;
1827 merge_classes (subclasses
[i
], classes
[i
+ pos
]);
1831 for (field
= TYPE_FIELDS (type
); field
; field
= TREE_CHAIN (field
))
1833 if (TREE_CODE (field
) == FIELD_DECL
)
1836 num
= classify_argument (TYPE_MODE (TREE_TYPE (field
)),
1837 TREE_TYPE (field
), subclasses
,
1841 for (i
= 0; i
< num
; i
++)
1842 classes
[i
] = merge_classes (subclasses
[i
], classes
[i
]);
1849 /* Final merger cleanup. */
1850 for (i
= 0; i
< words
; i
++)
1852 /* If one class is MEMORY, everything should be passed in
1854 if (classes
[i
] == X86_64_MEMORY_CLASS
)
1857 /* The X86_64_SSEUP_CLASS should be always preceded by
1858 X86_64_SSE_CLASS. */
1859 if (classes
[i
] == X86_64_SSEUP_CLASS
1860 && (i
== 0 || classes
[i
- 1] != X86_64_SSE_CLASS
))
1861 classes
[i
] = X86_64_SSE_CLASS
;
1863 /* X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS. */
1864 if (classes
[i
] == X86_64_X87UP_CLASS
1865 && (i
== 0 || classes
[i
- 1] != X86_64_X87_CLASS
))
1866 classes
[i
] = X86_64_SSE_CLASS
;
1871 /* Compute alignment needed. We align all types to natural boundaries with
1872 exception of XFmode that is aligned to 64bits. */
1873 if (mode
!= VOIDmode
&& mode
!= BLKmode
)
1875 int mode_alignment
= GET_MODE_BITSIZE (mode
);
1878 mode_alignment
= 128;
1879 else if (mode
== XCmode
)
1880 mode_alignment
= 256;
1881 /* Misaligned fields are always returned in memory. */
1882 if (bit_offset
% mode_alignment
)
1886 /* Classification of atomic types. */
1896 if (bit_offset
+ GET_MODE_BITSIZE (mode
) <= 32)
1897 classes
[0] = X86_64_INTEGERSI_CLASS
;
1899 classes
[0] = X86_64_INTEGER_CLASS
;
1903 classes
[0] = classes
[1] = X86_64_INTEGER_CLASS
;
1906 classes
[0] = classes
[1] = X86_64_INTEGER_CLASS
;
1907 classes
[2] = classes
[3] = X86_64_INTEGER_CLASS
;
1910 if (!(bit_offset
% 64))
1911 classes
[0] = X86_64_SSESF_CLASS
;
1913 classes
[0] = X86_64_SSE_CLASS
;
1916 classes
[0] = X86_64_SSEDF_CLASS
;
1919 classes
[0] = X86_64_X87_CLASS
;
1920 classes
[1] = X86_64_X87UP_CLASS
;
1923 classes
[0] = X86_64_X87_CLASS
;
1924 classes
[1] = X86_64_X87UP_CLASS
;
1925 classes
[2] = X86_64_X87_CLASS
;
1926 classes
[3] = X86_64_X87UP_CLASS
;
1929 classes
[0] = X86_64_SSEDF_CLASS
;
1930 classes
[1] = X86_64_SSEDF_CLASS
;
1933 classes
[0] = X86_64_SSE_CLASS
;
1941 classes
[0] = X86_64_SSE_CLASS
;
1942 classes
[1] = X86_64_SSEUP_CLASS
;
1957 /* Examine the argument and return set number of register required in each
1958 class. Return 0 iff parameter should be passed in memory. */
1960 examine_argument (mode
, type
, in_return
, int_nregs
, sse_nregs
)
1961 enum machine_mode mode
;
1963 int *int_nregs
, *sse_nregs
;
1966 enum x86_64_reg_class
class[MAX_CLASSES
];
1967 int n
= classify_argument (mode
, type
, class, 0);
1973 for (n
--; n
>= 0; n
--)
1976 case X86_64_INTEGER_CLASS
:
1977 case X86_64_INTEGERSI_CLASS
:
1980 case X86_64_SSE_CLASS
:
1981 case X86_64_SSESF_CLASS
:
1982 case X86_64_SSEDF_CLASS
:
1985 case X86_64_NO_CLASS
:
1986 case X86_64_SSEUP_CLASS
:
1988 case X86_64_X87_CLASS
:
1989 case X86_64_X87UP_CLASS
:
1993 case X86_64_MEMORY_CLASS
:
1998 /* Construct container for the argument used by GCC interface. See
1999 FUNCTION_ARG for the detailed description. */
2001 construct_container (mode
, type
, in_return
, nintregs
, nsseregs
, intreg
, sse_regno
)
2002 enum machine_mode mode
;
2005 int nintregs
, nsseregs
;
2009 enum machine_mode tmpmode
;
2011 (mode
== BLKmode
) ? int_size_in_bytes (type
) : (int) GET_MODE_SIZE (mode
);
2012 enum x86_64_reg_class
class[MAX_CLASSES
];
2016 int needed_sseregs
, needed_intregs
;
2017 rtx exp
[MAX_CLASSES
];
2020 n
= classify_argument (mode
, type
, class, 0);
2021 if (TARGET_DEBUG_ARG
)
2024 fprintf (stderr
, "Memory class\n");
2027 fprintf (stderr
, "Classes:");
2028 for (i
= 0; i
< n
; i
++)
2030 fprintf (stderr
, " %s", x86_64_reg_class_name
[class[i
]]);
2032 fprintf (stderr
, "\n");
2037 if (!examine_argument (mode
, type
, in_return
, &needed_intregs
, &needed_sseregs
))
2039 if (needed_intregs
> nintregs
|| needed_sseregs
> nsseregs
)
2042 /* First construct simple cases. Avoid SCmode, since we want to use
2043 single register to pass this type. */
2044 if (n
== 1 && mode
!= SCmode
)
2047 case X86_64_INTEGER_CLASS
:
2048 case X86_64_INTEGERSI_CLASS
:
2049 return gen_rtx_REG (mode
, intreg
[0]);
2050 case X86_64_SSE_CLASS
:
2051 case X86_64_SSESF_CLASS
:
2052 case X86_64_SSEDF_CLASS
:
2053 return gen_rtx_REG (mode
, SSE_REGNO (sse_regno
));
2054 case X86_64_X87_CLASS
:
2055 return gen_rtx_REG (mode
, FIRST_STACK_REG
);
2056 case X86_64_NO_CLASS
:
2057 /* Zero sized array, struct or class. */
2062 if (n
== 2 && class[0] == X86_64_SSE_CLASS
&& class[1] == X86_64_SSEUP_CLASS
)
2063 return gen_rtx_REG (mode
, SSE_REGNO (sse_regno
));
2065 && class[0] == X86_64_X87_CLASS
&& class[1] == X86_64_X87UP_CLASS
)
2066 return gen_rtx_REG (TFmode
, FIRST_STACK_REG
);
2067 if (n
== 2 && class[0] == X86_64_INTEGER_CLASS
2068 && class[1] == X86_64_INTEGER_CLASS
2069 && (mode
== CDImode
|| mode
== TImode
)
2070 && intreg
[0] + 1 == intreg
[1])
2071 return gen_rtx_REG (mode
, intreg
[0]);
2073 && class[0] == X86_64_X87_CLASS
&& class[1] == X86_64_X87UP_CLASS
2074 && class[2] == X86_64_X87_CLASS
&& class[3] == X86_64_X87UP_CLASS
)
2075 return gen_rtx_REG (TCmode
, FIRST_STACK_REG
);
2077 /* Otherwise figure out the entries of the PARALLEL. */
2078 for (i
= 0; i
< n
; i
++)
2082 case X86_64_NO_CLASS
:
2084 case X86_64_INTEGER_CLASS
:
2085 case X86_64_INTEGERSI_CLASS
:
2086 /* Merge TImodes on aligned occassions here too. */
2087 if (i
* 8 + 8 > bytes
)
2088 tmpmode
= mode_for_size ((bytes
- i
* 8) * BITS_PER_UNIT
, MODE_INT
, 0);
2089 else if (class[i
] == X86_64_INTEGERSI_CLASS
)
2093 /* We've requested 24 bytes we don't have mode for. Use DImode. */
2094 if (tmpmode
== BLKmode
)
2096 exp
[nexps
++] = gen_rtx_EXPR_LIST (VOIDmode
,
2097 gen_rtx_REG (tmpmode
, *intreg
),
2101 case X86_64_SSESF_CLASS
:
2102 exp
[nexps
++] = gen_rtx_EXPR_LIST (VOIDmode
,
2103 gen_rtx_REG (SFmode
,
2104 SSE_REGNO (sse_regno
)),
2108 case X86_64_SSEDF_CLASS
:
2109 exp
[nexps
++] = gen_rtx_EXPR_LIST (VOIDmode
,
2110 gen_rtx_REG (DFmode
,
2111 SSE_REGNO (sse_regno
)),
2115 case X86_64_SSE_CLASS
:
2116 if (i
< n
- 1 && class[i
+ 1] == X86_64_SSEUP_CLASS
)
2120 exp
[nexps
++] = gen_rtx_EXPR_LIST (VOIDmode
,
2121 gen_rtx_REG (tmpmode
,
2122 SSE_REGNO (sse_regno
)),
2124 if (tmpmode
== TImode
)
2132 ret
= gen_rtx_PARALLEL (mode
, rtvec_alloc (nexps
));
2133 for (i
= 0; i
< nexps
; i
++)
2134 XVECEXP (ret
, 0, i
) = exp
[i
];
2138 /* Update the data in CUM to advance over an argument
2139 of mode MODE and data type TYPE.
2140 (TYPE is null for libcalls where that information may not be available.) */
2143 function_arg_advance (cum
, mode
, type
, named
)
2144 CUMULATIVE_ARGS
*cum
; /* current arg information */
2145 enum machine_mode mode
; /* current arg mode */
2146 tree type
; /* type of the argument or 0 if lib support */
2147 int named
; /* whether or not the argument was named */
2150 (mode
== BLKmode
) ? int_size_in_bytes (type
) : (int) GET_MODE_SIZE (mode
);
2151 int words
= (bytes
+ UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
2153 if (TARGET_DEBUG_ARG
)
2155 "function_adv (sz=%d, wds=%2d, nregs=%d, mode=%s, named=%d)\n\n",
2156 words
, cum
->words
, cum
->nregs
, GET_MODE_NAME (mode
), named
);
2159 int int_nregs
, sse_nregs
;
2160 if (!examine_argument (mode
, type
, 0, &int_nregs
, &sse_nregs
))
2161 cum
->words
+= words
;
2162 else if (sse_nregs
<= cum
->sse_nregs
&& int_nregs
<= cum
->nregs
)
2164 cum
->nregs
-= int_nregs
;
2165 cum
->sse_nregs
-= sse_nregs
;
2166 cum
->regno
+= int_nregs
;
2167 cum
->sse_regno
+= sse_nregs
;
2170 cum
->words
+= words
;
2174 if (TARGET_SSE
&& mode
== TImode
)
2176 cum
->sse_words
+= words
;
2177 cum
->sse_nregs
-= 1;
2178 cum
->sse_regno
+= 1;
2179 if (cum
->sse_nregs
<= 0)
2187 cum
->words
+= words
;
2188 cum
->nregs
-= words
;
2189 cum
->regno
+= words
;
2191 if (cum
->nregs
<= 0)
2201 /* Define where to put the arguments to a function.
2202 Value is zero to push the argument on the stack,
2203 or a hard register in which to store the argument.
2205 MODE is the argument's machine mode.
2206 TYPE is the data type of the argument (as a tree).
2207 This is null for libcalls where that information may
2209 CUM is a variable of type CUMULATIVE_ARGS which gives info about
2210 the preceding args and about the function being called.
2211 NAMED is nonzero if this argument is a named parameter
2212 (otherwise it is an extra parameter matching an ellipsis). */
2215 function_arg (cum
, mode
, type
, named
)
2216 CUMULATIVE_ARGS
*cum
; /* current arg information */
2217 enum machine_mode mode
; /* current arg mode */
2218 tree type
; /* type of the argument or 0 if lib support */
2219 int named
; /* != 0 for normal args, == 0 for ... args */
2223 (mode
== BLKmode
) ? int_size_in_bytes (type
) : (int) GET_MODE_SIZE (mode
);
2224 int words
= (bytes
+ UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
2226 /* Handle an hidden AL argument containing number of registers for varargs
2227 x86-64 functions. For i386 ABI just return constm1_rtx to avoid
2229 if (mode
== VOIDmode
)
2232 return GEN_INT (cum
->maybe_vaarg
2233 ? (cum
->sse_nregs
< 0
2241 ret
= construct_container (mode
, type
, 0, cum
->nregs
, cum
->sse_nregs
,
2242 &x86_64_int_parameter_registers
[cum
->regno
],
2247 /* For now, pass fp/complex values on the stack. */
2256 if (words
<= cum
->nregs
)
2257 ret
= gen_rtx_REG (mode
, cum
->regno
);
2261 ret
= gen_rtx_REG (mode
, cum
->sse_regno
);
2265 if (TARGET_DEBUG_ARG
)
2268 "function_arg (size=%d, wds=%2d, nregs=%d, mode=%4s, named=%d, ",
2269 words
, cum
->words
, cum
->nregs
, GET_MODE_NAME (mode
), named
);
2272 print_simple_rtl (stderr
, ret
);
2274 fprintf (stderr
, ", stack");
2276 fprintf (stderr
, " )\n");
2282 /* Gives the alignment boundary, in bits, of an argument with the specified mode
2286 ix86_function_arg_boundary (mode
, type
)
2287 enum machine_mode mode
;
2292 return PARM_BOUNDARY
;
2294 align
= TYPE_ALIGN (type
);
2296 align
= GET_MODE_ALIGNMENT (mode
);
2297 if (align
< PARM_BOUNDARY
)
2298 align
= PARM_BOUNDARY
;
2304 /* Return true if N is a possible register number of function value. */
2306 ix86_function_value_regno_p (regno
)
2311 return ((regno
) == 0
2312 || ((regno
) == FIRST_FLOAT_REG
&& TARGET_FLOAT_RETURNS_IN_80387
)
2313 || ((regno
) == FIRST_SSE_REG
&& TARGET_SSE
));
2315 return ((regno
) == 0 || (regno
) == FIRST_FLOAT_REG
2316 || ((regno
) == FIRST_SSE_REG
&& TARGET_SSE
)
2317 || ((regno
) == FIRST_FLOAT_REG
&& TARGET_FLOAT_RETURNS_IN_80387
));
2320 /* Define how to find the value returned by a function.
2321 VALTYPE is the data type of the value (as a tree).
2322 If the precise function being called is known, FUNC is its FUNCTION_DECL;
2323 otherwise, FUNC is 0. */
2325 ix86_function_value (valtype
)
2330 rtx ret
= construct_container (TYPE_MODE (valtype
), valtype
, 1,
2331 REGPARM_MAX
, SSE_REGPARM_MAX
,
2332 x86_64_int_return_registers
, 0);
2333 /* For zero sized structures, construct_continer return NULL, but we need
2334 to keep rest of compiler happy by returning meaningfull value. */
2336 ret
= gen_rtx_REG (TYPE_MODE (valtype
), 0);
2340 return gen_rtx_REG (TYPE_MODE (valtype
),
2341 ix86_value_regno (TYPE_MODE (valtype
)));
2344 /* Return false iff type is returned in memory. */
2346 ix86_return_in_memory (type
)
2349 int needed_intregs
, needed_sseregs
;
2352 return !examine_argument (TYPE_MODE (type
), type
, 1,
2353 &needed_intregs
, &needed_sseregs
);
2357 if (TYPE_MODE (type
) == BLKmode
2358 || (VECTOR_MODE_P (TYPE_MODE (type
))
2359 && int_size_in_bytes (type
) == 8)
2360 || (int_size_in_bytes (type
) > 12 && TYPE_MODE (type
) != TImode
2361 && TYPE_MODE (type
) != TFmode
2362 && !VECTOR_MODE_P (TYPE_MODE (type
))))
2368 /* Define how to find the value returned by a library function
2369 assuming the value has mode MODE. */
2371 ix86_libcall_value (mode
)
2372 enum machine_mode mode
;
2382 return gen_rtx_REG (mode
, FIRST_SSE_REG
);
2385 return gen_rtx_REG (mode
, FIRST_FLOAT_REG
);
2387 return gen_rtx_REG (mode
, 0);
2391 return gen_rtx_REG (mode
, ix86_value_regno (mode
));
2394 /* Given a mode, return the register to use for a return value. */
2397 ix86_value_regno (mode
)
2398 enum machine_mode mode
;
2400 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
&& TARGET_FLOAT_RETURNS_IN_80387
)
2401 return FIRST_FLOAT_REG
;
2402 if (mode
== TImode
|| VECTOR_MODE_P (mode
))
2403 return FIRST_SSE_REG
;
2407 /* Create the va_list data type. */
2410 ix86_build_va_list ()
2412 tree f_gpr
, f_fpr
, f_ovf
, f_sav
, record
, type_decl
;
2414 /* For i386 we use plain pointer to argument area. */
2416 return build_pointer_type (char_type_node
);
2418 record
= (*lang_hooks
.types
.make_type
) (RECORD_TYPE
);
2419 type_decl
= build_decl (TYPE_DECL
, get_identifier ("__va_list_tag"), record
);
2421 f_gpr
= build_decl (FIELD_DECL
, get_identifier ("gp_offset"),
2422 unsigned_type_node
);
2423 f_fpr
= build_decl (FIELD_DECL
, get_identifier ("fp_offset"),
2424 unsigned_type_node
);
2425 f_ovf
= build_decl (FIELD_DECL
, get_identifier ("overflow_arg_area"),
2427 f_sav
= build_decl (FIELD_DECL
, get_identifier ("reg_save_area"),
2430 DECL_FIELD_CONTEXT (f_gpr
) = record
;
2431 DECL_FIELD_CONTEXT (f_fpr
) = record
;
2432 DECL_FIELD_CONTEXT (f_ovf
) = record
;
2433 DECL_FIELD_CONTEXT (f_sav
) = record
;
2435 TREE_CHAIN (record
) = type_decl
;
2436 TYPE_NAME (record
) = type_decl
;
2437 TYPE_FIELDS (record
) = f_gpr
;
2438 TREE_CHAIN (f_gpr
) = f_fpr
;
2439 TREE_CHAIN (f_fpr
) = f_ovf
;
2440 TREE_CHAIN (f_ovf
) = f_sav
;
2442 layout_type (record
);
2444 /* The correct type is an array type of one element. */
2445 return build_array_type (record
, build_index_type (size_zero_node
));
2448 /* Perform any needed actions needed for a function that is receiving a
2449 variable number of arguments.
2453 MODE and TYPE are the mode and type of the current parameter.
2455 PRETEND_SIZE is a variable that should be set to the amount of stack
2456 that must be pushed by the prolog to pretend that our caller pushed
2459 Normally, this macro will push all remaining incoming registers on the
2460 stack and set PRETEND_SIZE to the length of the registers pushed. */
2463 ix86_setup_incoming_varargs (cum
, mode
, type
, pretend_size
, no_rtl
)
2464 CUMULATIVE_ARGS
*cum
;
2465 enum machine_mode mode
;
2467 int *pretend_size ATTRIBUTE_UNUSED
;
2471 CUMULATIVE_ARGS next_cum
;
2472 rtx save_area
= NULL_RTX
, mem
;
2485 /* Indicate to allocate space on the stack for varargs save area. */
2486 ix86_save_varrargs_registers
= 1;
2488 fntype
= TREE_TYPE (current_function_decl
);
2489 stdarg_p
= (TYPE_ARG_TYPES (fntype
) != 0
2490 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype
)))
2491 != void_type_node
));
2493 /* For varargs, we do not want to skip the dummy va_dcl argument.
2494 For stdargs, we do want to skip the last named argument. */
2497 function_arg_advance (&next_cum
, mode
, type
, 1);
2500 save_area
= frame_pointer_rtx
;
2502 set
= get_varargs_alias_set ();
2504 for (i
= next_cum
.regno
; i
< ix86_regparm
; i
++)
2506 mem
= gen_rtx_MEM (Pmode
,
2507 plus_constant (save_area
, i
* UNITS_PER_WORD
));
2508 set_mem_alias_set (mem
, set
);
2509 emit_move_insn (mem
, gen_rtx_REG (Pmode
,
2510 x86_64_int_parameter_registers
[i
]));
2513 if (next_cum
.sse_nregs
)
2515 /* Now emit code to save SSE registers. The AX parameter contains number
2516 of SSE parameter regsiters used to call this function. We use
2517 sse_prologue_save insn template that produces computed jump across
2518 SSE saves. We need some preparation work to get this working. */
2520 label
= gen_label_rtx ();
2521 label_ref
= gen_rtx_LABEL_REF (Pmode
, label
);
2523 /* Compute address to jump to :
2524 label - 5*eax + nnamed_sse_arguments*5 */
2525 tmp_reg
= gen_reg_rtx (Pmode
);
2526 nsse_reg
= gen_reg_rtx (Pmode
);
2527 emit_insn (gen_zero_extendqidi2 (nsse_reg
, gen_rtx_REG (QImode
, 0)));
2528 emit_insn (gen_rtx_SET (VOIDmode
, tmp_reg
,
2529 gen_rtx_MULT (Pmode
, nsse_reg
,
2531 if (next_cum
.sse_regno
)
2534 gen_rtx_CONST (DImode
,
2535 gen_rtx_PLUS (DImode
,
2537 GEN_INT (next_cum
.sse_regno
* 4))));
2539 emit_move_insn (nsse_reg
, label_ref
);
2540 emit_insn (gen_subdi3 (nsse_reg
, nsse_reg
, tmp_reg
));
2542 /* Compute address of memory block we save into. We always use pointer
2543 pointing 127 bytes after first byte to store - this is needed to keep
2544 instruction size limited by 4 bytes. */
2545 tmp_reg
= gen_reg_rtx (Pmode
);
2546 emit_insn (gen_rtx_SET (VOIDmode
, tmp_reg
,
2547 plus_constant (save_area
,
2548 8 * REGPARM_MAX
+ 127)));
2549 mem
= gen_rtx_MEM (BLKmode
, plus_constant (tmp_reg
, -127));
2550 set_mem_alias_set (mem
, set
);
2551 set_mem_align (mem
, BITS_PER_WORD
);
2553 /* And finally do the dirty job! */
2554 emit_insn (gen_sse_prologue_save (mem
, nsse_reg
,
2555 GEN_INT (next_cum
.sse_regno
), label
));
2560 /* Implement va_start. */
2563 ix86_va_start (valist
, nextarg
)
2567 HOST_WIDE_INT words
, n_gpr
, n_fpr
;
2568 tree f_gpr
, f_fpr
, f_ovf
, f_sav
;
2569 tree gpr
, fpr
, ovf
, sav
, t
;
2571 /* Only 64bit target needs something special. */
2574 std_expand_builtin_va_start (valist
, nextarg
);
2578 f_gpr
= TYPE_FIELDS (TREE_TYPE (va_list_type_node
));
2579 f_fpr
= TREE_CHAIN (f_gpr
);
2580 f_ovf
= TREE_CHAIN (f_fpr
);
2581 f_sav
= TREE_CHAIN (f_ovf
);
2583 valist
= build1 (INDIRECT_REF
, TREE_TYPE (TREE_TYPE (valist
)), valist
);
2584 gpr
= build (COMPONENT_REF
, TREE_TYPE (f_gpr
), valist
, f_gpr
);
2585 fpr
= build (COMPONENT_REF
, TREE_TYPE (f_fpr
), valist
, f_fpr
);
2586 ovf
= build (COMPONENT_REF
, TREE_TYPE (f_ovf
), valist
, f_ovf
);
2587 sav
= build (COMPONENT_REF
, TREE_TYPE (f_sav
), valist
, f_sav
);
2589 /* Count number of gp and fp argument registers used. */
2590 words
= current_function_args_info
.words
;
2591 n_gpr
= current_function_args_info
.regno
;
2592 n_fpr
= current_function_args_info
.sse_regno
;
2594 if (TARGET_DEBUG_ARG
)
2595 fprintf (stderr
, "va_start: words = %d, n_gpr = %d, n_fpr = %d\n",
2596 (int) words
, (int) n_gpr
, (int) n_fpr
);
2598 t
= build (MODIFY_EXPR
, TREE_TYPE (gpr
), gpr
,
2599 build_int_2 (n_gpr
* 8, 0));
2600 TREE_SIDE_EFFECTS (t
) = 1;
2601 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
2603 t
= build (MODIFY_EXPR
, TREE_TYPE (fpr
), fpr
,
2604 build_int_2 (n_fpr
* 16 + 8*REGPARM_MAX
, 0));
2605 TREE_SIDE_EFFECTS (t
) = 1;
2606 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
2608 /* Find the overflow area. */
2609 t
= make_tree (TREE_TYPE (ovf
), virtual_incoming_args_rtx
);
2611 t
= build (PLUS_EXPR
, TREE_TYPE (ovf
), t
,
2612 build_int_2 (words
* UNITS_PER_WORD
, 0));
2613 t
= build (MODIFY_EXPR
, TREE_TYPE (ovf
), ovf
, t
);
2614 TREE_SIDE_EFFECTS (t
) = 1;
2615 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
2617 /* Find the register save area.
2618 Prologue of the function save it right above stack frame. */
2619 t
= make_tree (TREE_TYPE (sav
), frame_pointer_rtx
);
2620 t
= build (MODIFY_EXPR
, TREE_TYPE (sav
), sav
, t
);
2621 TREE_SIDE_EFFECTS (t
) = 1;
2622 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
2625 /* Implement va_arg. */
2627 ix86_va_arg (valist
, type
)
2630 static const int intreg
[6] = { 0, 1, 2, 3, 4, 5 };
2631 tree f_gpr
, f_fpr
, f_ovf
, f_sav
;
2632 tree gpr
, fpr
, ovf
, sav
, t
;
2634 rtx lab_false
, lab_over
= NULL_RTX
;
2638 /* Only 64bit target needs something special. */
2641 return std_expand_builtin_va_arg (valist
, type
);
2644 f_gpr
= TYPE_FIELDS (TREE_TYPE (va_list_type_node
));
2645 f_fpr
= TREE_CHAIN (f_gpr
);
2646 f_ovf
= TREE_CHAIN (f_fpr
);
2647 f_sav
= TREE_CHAIN (f_ovf
);
2649 valist
= build1 (INDIRECT_REF
, TREE_TYPE (TREE_TYPE (valist
)), valist
);
2650 gpr
= build (COMPONENT_REF
, TREE_TYPE (f_gpr
), valist
, f_gpr
);
2651 fpr
= build (COMPONENT_REF
, TREE_TYPE (f_fpr
), valist
, f_fpr
);
2652 ovf
= build (COMPONENT_REF
, TREE_TYPE (f_ovf
), valist
, f_ovf
);
2653 sav
= build (COMPONENT_REF
, TREE_TYPE (f_sav
), valist
, f_sav
);
2655 size
= int_size_in_bytes (type
);
2656 rsize
= (size
+ UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
2658 container
= construct_container (TYPE_MODE (type
), type
, 0,
2659 REGPARM_MAX
, SSE_REGPARM_MAX
, intreg
, 0);
2661 * Pull the value out of the saved registers ...
2664 addr_rtx
= gen_reg_rtx (Pmode
);
2668 rtx int_addr_rtx
, sse_addr_rtx
;
2669 int needed_intregs
, needed_sseregs
;
2672 lab_over
= gen_label_rtx ();
2673 lab_false
= gen_label_rtx ();
2675 examine_argument (TYPE_MODE (type
), type
, 0,
2676 &needed_intregs
, &needed_sseregs
);
2679 need_temp
= ((needed_intregs
&& TYPE_ALIGN (type
) > 64)
2680 || TYPE_ALIGN (type
) > 128);
2682 /* In case we are passing structure, verify that it is consetuctive block
2683 on the register save area. If not we need to do moves. */
2684 if (!need_temp
&& !REG_P (container
))
2686 /* Verify that all registers are strictly consetuctive */
2687 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container
, 0, 0), 0))))
2691 for (i
= 0; i
< XVECLEN (container
, 0) && !need_temp
; i
++)
2693 rtx slot
= XVECEXP (container
, 0, i
);
2694 if (REGNO (XEXP (slot
, 0)) != FIRST_SSE_REG
+ (unsigned int) i
2695 || INTVAL (XEXP (slot
, 1)) != i
* 16)
2703 for (i
= 0; i
< XVECLEN (container
, 0) && !need_temp
; i
++)
2705 rtx slot
= XVECEXP (container
, 0, i
);
2706 if (REGNO (XEXP (slot
, 0)) != (unsigned int) i
2707 || INTVAL (XEXP (slot
, 1)) != i
* 8)
2714 int_addr_rtx
= addr_rtx
;
2715 sse_addr_rtx
= addr_rtx
;
2719 int_addr_rtx
= gen_reg_rtx (Pmode
);
2720 sse_addr_rtx
= gen_reg_rtx (Pmode
);
2722 /* First ensure that we fit completely in registers. */
2725 emit_cmp_and_jump_insns (expand_expr
2726 (gpr
, NULL_RTX
, SImode
, EXPAND_NORMAL
),
2727 GEN_INT ((REGPARM_MAX
- needed_intregs
+
2728 1) * 8), GE
, const1_rtx
, SImode
,
2733 emit_cmp_and_jump_insns (expand_expr
2734 (fpr
, NULL_RTX
, SImode
, EXPAND_NORMAL
),
2735 GEN_INT ((SSE_REGPARM_MAX
-
2736 needed_sseregs
+ 1) * 16 +
2737 REGPARM_MAX
* 8), GE
, const1_rtx
,
2738 SImode
, 1, lab_false
);
2741 /* Compute index to start of area used for integer regs. */
2744 t
= build (PLUS_EXPR
, ptr_type_node
, sav
, gpr
);
2745 r
= expand_expr (t
, int_addr_rtx
, Pmode
, EXPAND_NORMAL
);
2746 if (r
!= int_addr_rtx
)
2747 emit_move_insn (int_addr_rtx
, r
);
2751 t
= build (PLUS_EXPR
, ptr_type_node
, sav
, fpr
);
2752 r
= expand_expr (t
, sse_addr_rtx
, Pmode
, EXPAND_NORMAL
);
2753 if (r
!= sse_addr_rtx
)
2754 emit_move_insn (sse_addr_rtx
, r
);
2761 /* Never use the memory itself, as it has the alias set. */
2762 addr_rtx
= XEXP (assign_temp (type
, 0, 1, 0), 0);
2763 mem
= gen_rtx_MEM (BLKmode
, addr_rtx
);
2764 set_mem_alias_set (mem
, get_varargs_alias_set ());
2765 set_mem_align (mem
, BITS_PER_UNIT
);
2767 for (i
= 0; i
< XVECLEN (container
, 0); i
++)
2769 rtx slot
= XVECEXP (container
, 0, i
);
2770 rtx reg
= XEXP (slot
, 0);
2771 enum machine_mode mode
= GET_MODE (reg
);
2777 if (SSE_REGNO_P (REGNO (reg
)))
2779 src_addr
= sse_addr_rtx
;
2780 src_offset
= (REGNO (reg
) - FIRST_SSE_REG
) * 16;
2784 src_addr
= int_addr_rtx
;
2785 src_offset
= REGNO (reg
) * 8;
2787 src_mem
= gen_rtx_MEM (mode
, src_addr
);
2788 set_mem_alias_set (src_mem
, get_varargs_alias_set ());
2789 src_mem
= adjust_address (src_mem
, mode
, src_offset
);
2790 dest_mem
= adjust_address (mem
, mode
, INTVAL (XEXP (slot
, 1)));
2791 emit_move_insn (dest_mem
, src_mem
);
2798 build (PLUS_EXPR
, TREE_TYPE (gpr
), gpr
,
2799 build_int_2 (needed_intregs
* 8, 0));
2800 t
= build (MODIFY_EXPR
, TREE_TYPE (gpr
), gpr
, t
);
2801 TREE_SIDE_EFFECTS (t
) = 1;
2802 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
2807 build (PLUS_EXPR
, TREE_TYPE (fpr
), fpr
,
2808 build_int_2 (needed_sseregs
* 16, 0));
2809 t
= build (MODIFY_EXPR
, TREE_TYPE (fpr
), fpr
, t
);
2810 TREE_SIDE_EFFECTS (t
) = 1;
2811 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
2814 emit_jump_insn (gen_jump (lab_over
));
2816 emit_label (lab_false
);
2819 /* ... otherwise out of the overflow area. */
2821 /* Care for on-stack alignment if needed. */
2822 if (FUNCTION_ARG_BOUNDARY (VOIDmode
, type
) <= 64)
2826 HOST_WIDE_INT align
= FUNCTION_ARG_BOUNDARY (VOIDmode
, type
) / 8;
2827 t
= build (PLUS_EXPR
, TREE_TYPE (ovf
), ovf
, build_int_2 (align
- 1, 0));
2828 t
= build (BIT_AND_EXPR
, TREE_TYPE (t
), t
, build_int_2 (-align
, -1));
2832 r
= expand_expr (t
, addr_rtx
, Pmode
, EXPAND_NORMAL
);
2834 emit_move_insn (addr_rtx
, r
);
2837 build (PLUS_EXPR
, TREE_TYPE (t
), t
,
2838 build_int_2 (rsize
* UNITS_PER_WORD
, 0));
2839 t
= build (MODIFY_EXPR
, TREE_TYPE (ovf
), ovf
, t
);
2840 TREE_SIDE_EFFECTS (t
) = 1;
2841 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
2844 emit_label (lab_over
);
2849 /* Return nonzero if OP is either a i387 or SSE fp register. */
2851 any_fp_register_operand (op
, mode
)
2853 enum machine_mode mode ATTRIBUTE_UNUSED
;
2855 return ANY_FP_REG_P (op
);
2858 /* Return nonzero if OP is an i387 fp register. */
2860 fp_register_operand (op
, mode
)
2862 enum machine_mode mode ATTRIBUTE_UNUSED
;
2864 return FP_REG_P (op
);
2867 /* Return nonzero if OP is a non-fp register_operand. */
2869 register_and_not_any_fp_reg_operand (op
, mode
)
2871 enum machine_mode mode
;
2873 return register_operand (op
, mode
) && !ANY_FP_REG_P (op
);
2876 /* Return nonzero of OP is a register operand other than an
2877 i387 fp register. */
2879 register_and_not_fp_reg_operand (op
, mode
)
2881 enum machine_mode mode
;
2883 return register_operand (op
, mode
) && !FP_REG_P (op
);
2886 /* Return nonzero if OP is general operand representable on x86_64. */
2889 x86_64_general_operand (op
, mode
)
2891 enum machine_mode mode
;
2894 return general_operand (op
, mode
);
2895 if (nonimmediate_operand (op
, mode
))
2897 return x86_64_sign_extended_value (op
, 1);
2900 /* Return nonzero if OP is general operand representable on x86_64
2901 as either sign extended or zero extended constant. */
2904 x86_64_szext_general_operand (op
, mode
)
2906 enum machine_mode mode
;
2909 return general_operand (op
, mode
);
2910 if (nonimmediate_operand (op
, mode
))
2912 return x86_64_sign_extended_value (op
, 1) || x86_64_zero_extended_value (op
);
2915 /* Return nonzero if OP is nonmemory operand representable on x86_64. */
2918 x86_64_nonmemory_operand (op
, mode
)
2920 enum machine_mode mode
;
2923 return nonmemory_operand (op
, mode
);
2924 if (register_operand (op
, mode
))
2926 return x86_64_sign_extended_value (op
, 1);
2929 /* Return nonzero if OP is nonmemory operand acceptable by movabs patterns. */
2932 x86_64_movabs_operand (op
, mode
)
2934 enum machine_mode mode
;
2936 if (!TARGET_64BIT
|| !flag_pic
)
2937 return nonmemory_operand (op
, mode
);
2938 if (register_operand (op
, mode
) || x86_64_sign_extended_value (op
, 0))
2940 if (CONSTANT_P (op
) && !symbolic_reference_mentioned_p (op
))
2945 /* Return nonzero if OP is nonmemory operand representable on x86_64. */
2948 x86_64_szext_nonmemory_operand (op
, mode
)
2950 enum machine_mode mode
;
2953 return nonmemory_operand (op
, mode
);
2954 if (register_operand (op
, mode
))
2956 return x86_64_sign_extended_value (op
, 0) || x86_64_zero_extended_value (op
);
2959 /* Return nonzero if OP is immediate operand representable on x86_64. */
2962 x86_64_immediate_operand (op
, mode
)
2964 enum machine_mode mode
;
2967 return immediate_operand (op
, mode
);
2968 return x86_64_sign_extended_value (op
, 0);
2971 /* Return nonzero if OP is immediate operand representable on x86_64. */
2974 x86_64_zext_immediate_operand (op
, mode
)
2976 enum machine_mode mode ATTRIBUTE_UNUSED
;
2978 return x86_64_zero_extended_value (op
);
2981 /* Return nonzero if OP is (const_int 1), else return zero. */
2984 const_int_1_operand (op
, mode
)
2986 enum machine_mode mode ATTRIBUTE_UNUSED
;
2988 return (GET_CODE (op
) == CONST_INT
&& INTVAL (op
) == 1);
2991 /* Return nonzero if OP is CONST_INT >= 1 and <= 31 (a valid operand
2992 for shift & compare patterns, as shifting by 0 does not change flags),
2993 else return zero. */
2996 const_int_1_31_operand (op
, mode
)
2998 enum machine_mode mode ATTRIBUTE_UNUSED
;
3000 return (GET_CODE (op
) == CONST_INT
&& INTVAL (op
) >= 1 && INTVAL (op
) <= 31);
3003 /* Returns 1 if OP is either a symbol reference or a sum of a symbol
3004 reference and a constant. */
3007 symbolic_operand (op
, mode
)
3009 enum machine_mode mode ATTRIBUTE_UNUSED
;
3011 switch (GET_CODE (op
))
3019 if (GET_CODE (op
) == SYMBOL_REF
3020 || GET_CODE (op
) == LABEL_REF
3021 || (GET_CODE (op
) == UNSPEC
3022 && (XINT (op
, 1) == UNSPEC_GOT
3023 || XINT (op
, 1) == UNSPEC_GOTOFF
3024 || XINT (op
, 1) == UNSPEC_GOTPCREL
)))
3026 if (GET_CODE (op
) != PLUS
3027 || GET_CODE (XEXP (op
, 1)) != CONST_INT
)
3031 if (GET_CODE (op
) == SYMBOL_REF
3032 || GET_CODE (op
) == LABEL_REF
)
3034 /* Only @GOTOFF gets offsets. */
3035 if (GET_CODE (op
) != UNSPEC
3036 || XINT (op
, 1) != UNSPEC_GOTOFF
)
3039 op
= XVECEXP (op
, 0, 0);
3040 if (GET_CODE (op
) == SYMBOL_REF
3041 || GET_CODE (op
) == LABEL_REF
)
3050 /* Return true if the operand contains a @GOT or @GOTOFF reference. */
3053 pic_symbolic_operand (op
, mode
)
3055 enum machine_mode mode ATTRIBUTE_UNUSED
;
3057 if (GET_CODE (op
) != CONST
)
3062 if (GET_CODE (XEXP (op
, 0)) == UNSPEC
)
3067 if (GET_CODE (op
) == UNSPEC
)
3069 if (GET_CODE (op
) != PLUS
3070 || GET_CODE (XEXP (op
, 1)) != CONST_INT
)
3073 if (GET_CODE (op
) == UNSPEC
)
3079 /* Return true if OP is a symbolic operand that resolves locally. */
3082 local_symbolic_operand (op
, mode
)
3084 enum machine_mode mode ATTRIBUTE_UNUSED
;
3086 if (GET_CODE (op
) == CONST
3087 && GET_CODE (XEXP (op
, 0)) == PLUS
3088 && GET_CODE (XEXP (XEXP (op
, 0), 1)) == CONST_INT
3089 && (ix86_cmodel
!= CM_SMALL_PIC
3090 || (INTVAL (XEXP (XEXP (op
, 0), 1)) >= -16*1024*1024
3091 && INTVAL (XEXP (XEXP (op
, 0), 1)) < 16*1024*1024)))
3092 op
= XEXP (XEXP (op
, 0), 0);
3094 if (GET_CODE (op
) == LABEL_REF
)
3097 if (GET_CODE (op
) != SYMBOL_REF
)
3100 /* These we've been told are local by varasm and encode_section_info
3102 if (CONSTANT_POOL_ADDRESS_P (op
) || SYMBOL_REF_FLAG (op
))
3105 /* There is, however, a not insubstantial body of code in the rest of
3106 the compiler that assumes it can just stick the results of
3107 ASM_GENERATE_INTERNAL_LABEL in a symbol_ref and have done. */
3108 /* ??? This is a hack. Should update the body of the compiler to
3109 always create a DECL an invoke targetm.encode_section_info. */
3110 if (strncmp (XSTR (op
, 0), internal_label_prefix
,
3111 internal_label_prefix_len
) == 0)
3117 /* Test for various thread-local symbols. See ix86_encode_section_info. */
3120 tls_symbolic_operand (op
, mode
)
3122 enum machine_mode mode ATTRIBUTE_UNUSED
;
3124 const char *symbol_str
;
3126 if (GET_CODE (op
) != SYMBOL_REF
)
3128 symbol_str
= XSTR (op
, 0);
3130 if (symbol_str
[0] != '%')
3132 return strchr (tls_model_chars
, symbol_str
[1]) - tls_model_chars
;
3136 tls_symbolic_operand_1 (op
, kind
)
3138 enum tls_model kind
;
3140 const char *symbol_str
;
3142 if (GET_CODE (op
) != SYMBOL_REF
)
3144 symbol_str
= XSTR (op
, 0);
3146 return symbol_str
[0] == '%' && symbol_str
[1] == tls_model_chars
[kind
];
3150 global_dynamic_symbolic_operand (op
, mode
)
3152 enum machine_mode mode ATTRIBUTE_UNUSED
;
3154 return tls_symbolic_operand_1 (op
, TLS_MODEL_GLOBAL_DYNAMIC
);
3158 local_dynamic_symbolic_operand (op
, mode
)
3160 enum machine_mode mode ATTRIBUTE_UNUSED
;
3162 return tls_symbolic_operand_1 (op
, TLS_MODEL_LOCAL_DYNAMIC
);
3166 initial_exec_symbolic_operand (op
, mode
)
3168 enum machine_mode mode ATTRIBUTE_UNUSED
;
3170 return tls_symbolic_operand_1 (op
, TLS_MODEL_INITIAL_EXEC
);
3174 local_exec_symbolic_operand (op
, mode
)
3176 enum machine_mode mode ATTRIBUTE_UNUSED
;
3178 return tls_symbolic_operand_1 (op
, TLS_MODEL_LOCAL_EXEC
);
3181 /* Test for a valid operand for a call instruction. Don't allow the
3182 arg pointer register or virtual regs since they may decay into
3183 reg + const, which the patterns can't handle. */
3186 call_insn_operand (op
, mode
)
3188 enum machine_mode mode ATTRIBUTE_UNUSED
;
3190 /* Disallow indirect through a virtual register. This leads to
3191 compiler aborts when trying to eliminate them. */
3192 if (GET_CODE (op
) == REG
3193 && (op
== arg_pointer_rtx
3194 || op
== frame_pointer_rtx
3195 || (REGNO (op
) >= FIRST_PSEUDO_REGISTER
3196 && REGNO (op
) <= LAST_VIRTUAL_REGISTER
)))
3199 /* Disallow `call 1234'. Due to varying assembler lameness this
3200 gets either rejected or translated to `call .+1234'. */
3201 if (GET_CODE (op
) == CONST_INT
)
3204 /* Explicitly allow SYMBOL_REF even if pic. */
3205 if (GET_CODE (op
) == SYMBOL_REF
)
3208 /* Otherwise we can allow any general_operand in the address. */
3209 return general_operand (op
, Pmode
);
3213 constant_call_address_operand (op
, mode
)
3215 enum machine_mode mode ATTRIBUTE_UNUSED
;
3217 if (GET_CODE (op
) == CONST
3218 && GET_CODE (XEXP (op
, 0)) == PLUS
3219 && GET_CODE (XEXP (XEXP (op
, 0), 1)) == CONST_INT
)
3220 op
= XEXP (XEXP (op
, 0), 0);
3221 return GET_CODE (op
) == SYMBOL_REF
;
3224 /* Match exactly zero and one. */
3227 const0_operand (op
, mode
)
3229 enum machine_mode mode
;
3231 return op
== CONST0_RTX (mode
);
3235 const1_operand (op
, mode
)
3237 enum machine_mode mode ATTRIBUTE_UNUSED
;
3239 return op
== const1_rtx
;
3242 /* Match 2, 4, or 8. Used for leal multiplicands. */
3245 const248_operand (op
, mode
)
3247 enum machine_mode mode ATTRIBUTE_UNUSED
;
3249 return (GET_CODE (op
) == CONST_INT
3250 && (INTVAL (op
) == 2 || INTVAL (op
) == 4 || INTVAL (op
) == 8));
3253 /* True if this is a constant appropriate for an increment or decremenmt. */
3256 incdec_operand (op
, mode
)
3258 enum machine_mode mode ATTRIBUTE_UNUSED
;
3260 /* On Pentium4, the inc and dec operations causes extra dependency on flag
3261 registers, since carry flag is not set. */
3262 if (TARGET_PENTIUM4
&& !optimize_size
)
3264 return op
== const1_rtx
|| op
== constm1_rtx
;
3267 /* Return nonzero if OP is acceptable as operand of DImode shift
3271 shiftdi_operand (op
, mode
)
3273 enum machine_mode mode ATTRIBUTE_UNUSED
;
3276 return nonimmediate_operand (op
, mode
);
3278 return register_operand (op
, mode
);
3281 /* Return false if this is the stack pointer, or any other fake
3282 register eliminable to the stack pointer. Otherwise, this is
3285 This is used to prevent esp from being used as an index reg.
3286 Which would only happen in pathological cases. */
3289 reg_no_sp_operand (op
, mode
)
3291 enum machine_mode mode
;
3294 if (GET_CODE (t
) == SUBREG
)
3296 if (t
== stack_pointer_rtx
|| t
== arg_pointer_rtx
|| t
== frame_pointer_rtx
)
3299 return register_operand (op
, mode
);
3303 mmx_reg_operand (op
, mode
)
3305 enum machine_mode mode ATTRIBUTE_UNUSED
;
3307 return MMX_REG_P (op
);
3310 /* Return false if this is any eliminable register. Otherwise
3314 general_no_elim_operand (op
, mode
)
3316 enum machine_mode mode
;
3319 if (GET_CODE (t
) == SUBREG
)
3321 if (t
== arg_pointer_rtx
|| t
== frame_pointer_rtx
3322 || t
== virtual_incoming_args_rtx
|| t
== virtual_stack_vars_rtx
3323 || t
== virtual_stack_dynamic_rtx
)
3326 && REGNO (t
) >= FIRST_VIRTUAL_REGISTER
3327 && REGNO (t
) <= LAST_VIRTUAL_REGISTER
)
3330 return general_operand (op
, mode
);
3333 /* Return false if this is any eliminable register. Otherwise
3334 register_operand or const_int. */
3337 nonmemory_no_elim_operand (op
, mode
)
3339 enum machine_mode mode
;
3342 if (GET_CODE (t
) == SUBREG
)
3344 if (t
== arg_pointer_rtx
|| t
== frame_pointer_rtx
3345 || t
== virtual_incoming_args_rtx
|| t
== virtual_stack_vars_rtx
3346 || t
== virtual_stack_dynamic_rtx
)
3349 return GET_CODE (op
) == CONST_INT
|| register_operand (op
, mode
);
3352 /* Return false if this is any eliminable register or stack register,
3353 otherwise work like register_operand. */
3356 index_register_operand (op
, mode
)
3358 enum machine_mode mode
;
3361 if (GET_CODE (t
) == SUBREG
)
3365 if (t
== arg_pointer_rtx
3366 || t
== frame_pointer_rtx
3367 || t
== virtual_incoming_args_rtx
3368 || t
== virtual_stack_vars_rtx
3369 || t
== virtual_stack_dynamic_rtx
3370 || REGNO (t
) == STACK_POINTER_REGNUM
)
3373 return general_operand (op
, mode
);
3376 /* Return true if op is a Q_REGS class register. */
3379 q_regs_operand (op
, mode
)
3381 enum machine_mode mode
;
3383 if (mode
!= VOIDmode
&& GET_MODE (op
) != mode
)
3385 if (GET_CODE (op
) == SUBREG
)
3386 op
= SUBREG_REG (op
);
3387 return ANY_QI_REG_P (op
);
3390 /* Return true if op is a NON_Q_REGS class register. */
3393 non_q_regs_operand (op
, mode
)
3395 enum machine_mode mode
;
3397 if (mode
!= VOIDmode
&& GET_MODE (op
) != mode
)
3399 if (GET_CODE (op
) == SUBREG
)
3400 op
= SUBREG_REG (op
);
3401 return NON_QI_REG_P (op
);
3404 /* Return 1 if OP is a comparison that can be used in the CMPSS/CMPPS
3407 sse_comparison_operator (op
, mode
)
3409 enum machine_mode mode ATTRIBUTE_UNUSED
;
3411 enum rtx_code code
= GET_CODE (op
);
3414 /* Operations supported directly. */
3424 /* These are equivalent to ones above in non-IEEE comparisons. */
3431 return !TARGET_IEEE_FP
;
3436 /* Return 1 if OP is a valid comparison operator in valid mode. */
3438 ix86_comparison_operator (op
, mode
)
3440 enum machine_mode mode
;
3442 enum machine_mode inmode
;
3443 enum rtx_code code
= GET_CODE (op
);
3444 if (mode
!= VOIDmode
&& GET_MODE (op
) != mode
)
3446 if (GET_RTX_CLASS (code
) != '<')
3448 inmode
= GET_MODE (XEXP (op
, 0));
3450 if (inmode
== CCFPmode
|| inmode
== CCFPUmode
)
3452 enum rtx_code second_code
, bypass_code
;
3453 ix86_fp_comparison_codes (code
, &bypass_code
, &code
, &second_code
);
3454 return (bypass_code
== NIL
&& second_code
== NIL
);
3461 if (inmode
== CCmode
|| inmode
== CCGCmode
3462 || inmode
== CCGOCmode
|| inmode
== CCNOmode
)
3465 case LTU
: case GTU
: case LEU
: case ORDERED
: case UNORDERED
: case GEU
:
3466 if (inmode
== CCmode
)
3470 if (inmode
== CCmode
|| inmode
== CCGCmode
|| inmode
== CCNOmode
)
3478 /* Return 1 if OP is a comparison operator that can be issued by fcmov. */
3481 fcmov_comparison_operator (op
, mode
)
3483 enum machine_mode mode
;
3485 enum machine_mode inmode
;
3486 enum rtx_code code
= GET_CODE (op
);
3487 if (mode
!= VOIDmode
&& GET_MODE (op
) != mode
)
3489 if (GET_RTX_CLASS (code
) != '<')
3491 inmode
= GET_MODE (XEXP (op
, 0));
3492 if (inmode
== CCFPmode
|| inmode
== CCFPUmode
)
3494 enum rtx_code second_code
, bypass_code
;
3495 ix86_fp_comparison_codes (code
, &bypass_code
, &code
, &second_code
);
3496 if (bypass_code
!= NIL
|| second_code
!= NIL
)
3498 code
= ix86_fp_compare_code_to_integer (code
);
3500 /* i387 supports just limited amount of conditional codes. */
3503 case LTU
: case GTU
: case LEU
: case GEU
:
3504 if (inmode
== CCmode
|| inmode
== CCFPmode
|| inmode
== CCFPUmode
)
3507 case ORDERED
: case UNORDERED
:
3515 /* Return 1 if OP is a binary operator that can be promoted to wider mode. */
3518 promotable_binary_operator (op
, mode
)
3520 enum machine_mode mode ATTRIBUTE_UNUSED
;
3522 switch (GET_CODE (op
))
3525 /* Modern CPUs have same latency for HImode and SImode multiply,
3526 but 386 and 486 do HImode multiply faster. */
3527 return ix86_cpu
> PROCESSOR_I486
;
3539 /* Nearly general operand, but accept any const_double, since we wish
3540 to be able to drop them into memory rather than have them get pulled
3544 cmp_fp_expander_operand (op
, mode
)
3546 enum machine_mode mode
;
3548 if (mode
!= VOIDmode
&& mode
!= GET_MODE (op
))
3550 if (GET_CODE (op
) == CONST_DOUBLE
)
3552 return general_operand (op
, mode
);
3555 /* Match an SI or HImode register for a zero_extract. */
3558 ext_register_operand (op
, mode
)
3560 enum machine_mode mode ATTRIBUTE_UNUSED
;
3563 if ((!TARGET_64BIT
|| GET_MODE (op
) != DImode
)
3564 && GET_MODE (op
) != SImode
&& GET_MODE (op
) != HImode
)
3567 if (!register_operand (op
, VOIDmode
))
3570 /* Be curefull to accept only registers having upper parts. */
3571 regno
= REG_P (op
) ? REGNO (op
) : REGNO (SUBREG_REG (op
));
3572 return (regno
> LAST_VIRTUAL_REGISTER
|| regno
< 4);
3575 /* Return 1 if this is a valid binary floating-point operation.
3576 OP is the expression matched, and MODE is its mode. */
3579 binary_fp_operator (op
, mode
)
3581 enum machine_mode mode
;
3583 if (mode
!= VOIDmode
&& mode
!= GET_MODE (op
))
3586 switch (GET_CODE (op
))
3592 return GET_MODE_CLASS (GET_MODE (op
)) == MODE_FLOAT
;
3600 mult_operator (op
, mode
)
3602 enum machine_mode mode ATTRIBUTE_UNUSED
;
3604 return GET_CODE (op
) == MULT
;
3608 div_operator (op
, mode
)
3610 enum machine_mode mode ATTRIBUTE_UNUSED
;
3612 return GET_CODE (op
) == DIV
;
3616 arith_or_logical_operator (op
, mode
)
3618 enum machine_mode mode
;
3620 return ((mode
== VOIDmode
|| GET_MODE (op
) == mode
)
3621 && (GET_RTX_CLASS (GET_CODE (op
)) == 'c'
3622 || GET_RTX_CLASS (GET_CODE (op
)) == '2'));
3625 /* Returns 1 if OP is memory operand with a displacement. */
3628 memory_displacement_operand (op
, mode
)
3630 enum machine_mode mode
;
3632 struct ix86_address parts
;
3634 if (! memory_operand (op
, mode
))
3637 if (! ix86_decompose_address (XEXP (op
, 0), &parts
))
3640 return parts
.disp
!= NULL_RTX
;
3643 /* To avoid problems when jump re-emits comparisons like testqi_ext_ccno_0,
3644 re-recognize the operand to avoid a copy_to_mode_reg that will fail.
3646 ??? It seems likely that this will only work because cmpsi is an
3647 expander, and no actual insns use this. */
3650 cmpsi_operand (op
, mode
)
3652 enum machine_mode mode
;
3654 if (nonimmediate_operand (op
, mode
))
3657 if (GET_CODE (op
) == AND
3658 && GET_MODE (op
) == SImode
3659 && GET_CODE (XEXP (op
, 0)) == ZERO_EXTRACT
3660 && GET_CODE (XEXP (XEXP (op
, 0), 1)) == CONST_INT
3661 && GET_CODE (XEXP (XEXP (op
, 0), 2)) == CONST_INT
3662 && INTVAL (XEXP (XEXP (op
, 0), 1)) == 8
3663 && INTVAL (XEXP (XEXP (op
, 0), 2)) == 8
3664 && GET_CODE (XEXP (op
, 1)) == CONST_INT
)
3670 /* Returns 1 if OP is memory operand that can not be represented by the
3674 long_memory_operand (op
, mode
)
3676 enum machine_mode mode
;
3678 if (! memory_operand (op
, mode
))
3681 return memory_address_length (op
) != 0;
3684 /* Return nonzero if the rtx is known aligned. */
3687 aligned_operand (op
, mode
)
3689 enum machine_mode mode
;
3691 struct ix86_address parts
;
3693 if (!general_operand (op
, mode
))
3696 /* Registers and immediate operands are always "aligned". */
3697 if (GET_CODE (op
) != MEM
)
3700 /* Don't even try to do any aligned optimizations with volatiles. */
3701 if (MEM_VOLATILE_P (op
))
3706 /* Pushes and pops are only valid on the stack pointer. */
3707 if (GET_CODE (op
) == PRE_DEC
3708 || GET_CODE (op
) == POST_INC
)
3711 /* Decode the address. */
3712 if (! ix86_decompose_address (op
, &parts
))
3715 if (parts
.base
&& GET_CODE (parts
.base
) == SUBREG
)
3716 parts
.base
= SUBREG_REG (parts
.base
);
3717 if (parts
.index
&& GET_CODE (parts
.index
) == SUBREG
)
3718 parts
.index
= SUBREG_REG (parts
.index
);
3720 /* Look for some component that isn't known to be aligned. */
3724 && REGNO_POINTER_ALIGN (REGNO (parts
.index
)) < 32)
3729 if (REGNO_POINTER_ALIGN (REGNO (parts
.base
)) < 32)
3734 if (GET_CODE (parts
.disp
) != CONST_INT
3735 || (INTVAL (parts
.disp
) & 3) != 0)
3739 /* Didn't find one -- this must be an aligned address. */
3743 /* Return true if the constant is something that can be loaded with
3744 a special instruction. Only handle 0.0 and 1.0; others are less
3748 standard_80387_constant_p (x
)
3751 if (GET_CODE (x
) != CONST_DOUBLE
|| !FLOAT_MODE_P (GET_MODE (x
)))
3753 /* Note that on the 80387, other constants, such as pi, that we should support
3754 too. On some machines, these are much slower to load as standard constant,
3755 than to load from doubles in memory. */
3756 if (x
== CONST0_RTX (GET_MODE (x
)))
3758 if (x
== CONST1_RTX (GET_MODE (x
)))
3763 /* Return 1 if X is FP constant we can load to SSE register w/o using memory.
3766 standard_sse_constant_p (x
)
3769 if (x
== const0_rtx
)
3771 return (x
== CONST0_RTX (GET_MODE (x
)));
3774 /* Returns 1 if OP contains a symbol reference */
3777 symbolic_reference_mentioned_p (op
)
3780 register const char *fmt
;
3783 if (GET_CODE (op
) == SYMBOL_REF
|| GET_CODE (op
) == LABEL_REF
)
3786 fmt
= GET_RTX_FORMAT (GET_CODE (op
));
3787 for (i
= GET_RTX_LENGTH (GET_CODE (op
)) - 1; i
>= 0; i
--)
3793 for (j
= XVECLEN (op
, i
) - 1; j
>= 0; j
--)
3794 if (symbolic_reference_mentioned_p (XVECEXP (op
, i
, j
)))
3798 else if (fmt
[i
] == 'e' && symbolic_reference_mentioned_p (XEXP (op
, i
)))
3805 /* Return 1 if it is appropriate to emit `ret' instructions in the
3806 body of a function. Do this only if the epilogue is simple, needing a
3807 couple of insns. Prior to reloading, we can't tell how many registers
3808 must be saved, so return 0 then. Return 0 if there is no frame
3809 marker to de-allocate.
3811 If NON_SAVING_SETJMP is defined and true, then it is not possible
3812 for the epilogue to be simple, so return 0. This is a special case
3813 since NON_SAVING_SETJMP will not cause regs_ever_live to change
3814 until final, but jump_optimize may need to know sooner if a
3818 ix86_can_use_return_insn_p ()
3820 struct ix86_frame frame
;
3822 #ifdef NON_SAVING_SETJMP
3823 if (NON_SAVING_SETJMP
&& current_function_calls_setjmp
)
3827 if (! reload_completed
|| frame_pointer_needed
)
3830 /* Don't allow more than 32 pop, since that's all we can do
3831 with one instruction. */
3832 if (current_function_pops_args
3833 && current_function_args_size
>= 32768)
3836 ix86_compute_frame_layout (&frame
);
3837 return frame
.to_allocate
== 0 && frame
.nregs
== 0;
3840 /* Return 1 if VALUE can be stored in the sign extended immediate field. */
3842 x86_64_sign_extended_value (value
, allow_rip
)
3846 switch (GET_CODE (value
))
3848 /* CONST_DOUBLES never match, since HOST_BITS_PER_WIDE_INT is known
3849 to be at least 32 and this all acceptable constants are
3850 represented as CONST_INT. */
3852 if (HOST_BITS_PER_WIDE_INT
== 32)
3856 HOST_WIDE_INT val
= trunc_int_for_mode (INTVAL (value
), DImode
);
3857 return trunc_int_for_mode (val
, SImode
) == val
;
3861 /* For certain code models, the symbolic references are known to fit.
3862 in CM_SMALL_PIC model we know it fits if it is local to the shared
3863 library. Don't count TLS SYMBOL_REFs here, since they should fit
3864 only if inside of UNSPEC handled below. */
3866 return (ix86_cmodel
== CM_SMALL
|| ix86_cmodel
== CM_KERNEL
3868 && ix86_cmodel
== CM_SMALL_PIC
3869 && (CONSTANT_POOL_ADDRESS_P (value
)
3870 || SYMBOL_REF_FLAG (value
))
3871 && ! tls_symbolic_operand (value
, GET_MODE (value
))));
3873 /* For certain code models, the code is near as well. */
3875 return ix86_cmodel
!= CM_LARGE
3876 && (allow_rip
|| ix86_cmodel
!= CM_SMALL_PIC
);
3878 /* We also may accept the offsetted memory references in certain special
3881 if (GET_CODE (XEXP (value
, 0)) == UNSPEC
)
3882 switch (XINT (XEXP (value
, 0), 1))
3884 case UNSPEC_GOTPCREL
:
3886 case UNSPEC_GOTNTPOFF
:
3892 if (GET_CODE (XEXP (value
, 0)) == PLUS
)
3894 rtx op1
= XEXP (XEXP (value
, 0), 0);
3895 rtx op2
= XEXP (XEXP (value
, 0), 1);
3896 HOST_WIDE_INT offset
;
3898 if (ix86_cmodel
== CM_LARGE
)
3900 if (GET_CODE (op2
) != CONST_INT
)
3902 offset
= trunc_int_for_mode (INTVAL (op2
), DImode
);
3903 switch (GET_CODE (op1
))
3906 /* For CM_SMALL assume that latest object is 16MB before
3907 end of 31bits boundary. We may also accept pretty
3908 large negative constants knowing that all objects are
3909 in the positive half of address space. */
3910 if (ix86_cmodel
== CM_SMALL
3911 && offset
< 16*1024*1024
3912 && trunc_int_for_mode (offset
, SImode
) == offset
)
3914 /* For CM_KERNEL we know that all object resist in the
3915 negative half of 32bits address space. We may not
3916 accept negative offsets, since they may be just off
3917 and we may accept pretty large positive ones. */
3918 if (ix86_cmodel
== CM_KERNEL
3920 && trunc_int_for_mode (offset
, SImode
) == offset
)
3922 /* For CM_SMALL_PIC, we can make similar assumptions
3923 as for CM_SMALL model, if we know the symbol is local
3924 to the shared library. Disallow any TLS symbols,
3925 since they should always be enclosed in an UNSPEC. */
3926 if (ix86_cmodel
== CM_SMALL_PIC
3928 && (CONSTANT_POOL_ADDRESS_P (op1
)
3929 || SYMBOL_REF_FLAG (op1
))
3930 && ! tls_symbolic_operand (op1
, GET_MODE (op1
))
3931 && offset
< 16*1024*1024
3932 && offset
>= -16*1024*1024
3933 && trunc_int_for_mode (offset
, SImode
) == offset
)
3937 /* These conditions are similar to SYMBOL_REF ones, just the
3938 constraints for code models differ. */
3939 if ((ix86_cmodel
== CM_SMALL
|| ix86_cmodel
== CM_MEDIUM
3940 || (ix86_cmodel
== CM_SMALL_PIC
&& allow_rip
3941 && offset
>= -16*1024*1024))
3942 && offset
< 16*1024*1024
3943 && trunc_int_for_mode (offset
, SImode
) == offset
)
3945 if (ix86_cmodel
== CM_KERNEL
3947 && trunc_int_for_mode (offset
, SImode
) == offset
)
3951 switch (XINT (op1
, 1))
3956 && trunc_int_for_mode (offset
, SImode
) == offset
)
3970 /* Return 1 if VALUE can be stored in the zero extended immediate field. */
3972 x86_64_zero_extended_value (value
)
3975 switch (GET_CODE (value
))
3978 if (HOST_BITS_PER_WIDE_INT
== 32)
3979 return (GET_MODE (value
) == VOIDmode
3980 && !CONST_DOUBLE_HIGH (value
));
3984 if (HOST_BITS_PER_WIDE_INT
== 32)
3985 return INTVAL (value
) >= 0;
3987 return !(INTVAL (value
) & ~(HOST_WIDE_INT
) 0xffffffff);
3990 /* For certain code models, the symbolic references are known to fit. */
3992 return ix86_cmodel
== CM_SMALL
;
3994 /* For certain code models, the code is near as well. */
3996 return ix86_cmodel
== CM_SMALL
|| ix86_cmodel
== CM_MEDIUM
;
3998 /* We also may accept the offsetted memory references in certain special
4001 if (GET_CODE (XEXP (value
, 0)) == PLUS
)
4003 rtx op1
= XEXP (XEXP (value
, 0), 0);
4004 rtx op2
= XEXP (XEXP (value
, 0), 1);
4006 if (ix86_cmodel
== CM_LARGE
)
4008 switch (GET_CODE (op1
))
4012 /* For small code model we may accept pretty large positive
4013 offsets, since one bit is available for free. Negative
4014 offsets are limited by the size of NULL pointer area
4015 specified by the ABI. */
4016 if (ix86_cmodel
== CM_SMALL
4017 && GET_CODE (op2
) == CONST_INT
4018 && trunc_int_for_mode (INTVAL (op2
), DImode
) > -0x10000
4019 && (trunc_int_for_mode (INTVAL (op2
), SImode
)
4022 /* ??? For the kernel, we may accept adjustment of
4023 -0x10000000, since we know that it will just convert
4024 negative address space to positive, but perhaps this
4025 is not worthwhile. */
4028 /* These conditions are similar to SYMBOL_REF ones, just the
4029 constraints for code models differ. */
4030 if ((ix86_cmodel
== CM_SMALL
|| ix86_cmodel
== CM_MEDIUM
)
4031 && GET_CODE (op2
) == CONST_INT
4032 && trunc_int_for_mode (INTVAL (op2
), DImode
) > -0x10000
4033 && (trunc_int_for_mode (INTVAL (op2
), SImode
)
4047 /* Value should be nonzero if functions must have frame pointers.
4048 Zero means the frame pointer need not be set up (and parms may
4049 be accessed via the stack pointer) in functions that seem suitable. */
4052 ix86_frame_pointer_required ()
4054 /* If we accessed previous frames, then the generated code expects
4055 to be able to access the saved ebp value in our frame. */
4056 if (cfun
->machine
->accesses_prev_frame
)
4059 /* Several x86 os'es need a frame pointer for other reasons,
4060 usually pertaining to setjmp. */
4061 if (SUBTARGET_FRAME_POINTER_REQUIRED
)
4064 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
4065 the frame pointer by default. Turn it back on now if we've not
4066 got a leaf function. */
4067 if (TARGET_OMIT_LEAF_FRAME_POINTER
4068 && (!current_function_is_leaf
))
4071 if (current_function_profile
)
4077 /* Record that the current function accesses previous call frames. */
4080 ix86_setup_frame_addresses ()
4082 cfun
->machine
->accesses_prev_frame
= 1;
4085 #if defined(HAVE_GAS_HIDDEN) && defined(SUPPORTS_ONE_ONLY)
4086 # define USE_HIDDEN_LINKONCE 1
4088 # define USE_HIDDEN_LINKONCE 0
4091 static int pic_labels_used
;
4093 /* Fills in the label name that should be used for a pc thunk for
4094 the given register. */
4097 get_pc_thunk_name (name
, regno
)
4101 if (USE_HIDDEN_LINKONCE
)
4102 sprintf (name
, "__i686.get_pc_thunk.%s", reg_names
[regno
]);
4104 ASM_GENERATE_INTERNAL_LABEL (name
, "LPR", regno
);
4108 /* This function generates code for -fpic that loads %ebx with
4109 the return address of the caller and then returns. */
4112 ix86_asm_file_end (file
)
4118 for (regno
= 0; regno
< 8; ++regno
)
4122 if (! ((pic_labels_used
>> regno
) & 1))
4125 get_pc_thunk_name (name
, regno
);
4127 if (USE_HIDDEN_LINKONCE
)
4131 decl
= build_decl (FUNCTION_DECL
, get_identifier (name
),
4133 TREE_PUBLIC (decl
) = 1;
4134 TREE_STATIC (decl
) = 1;
4135 DECL_ONE_ONLY (decl
) = 1;
4137 (*targetm
.asm_out
.unique_section
) (decl
, 0);
4138 named_section (decl
, NULL
, 0);
4140 (*targetm
.asm_out
.globalize_label
) (file
, name
);
4141 fputs ("\t.hidden\t", file
);
4142 assemble_name (file
, name
);
4144 ASM_DECLARE_FUNCTION_NAME (file
, name
, decl
);
4149 ASM_OUTPUT_LABEL (file
, name
);
4152 xops
[0] = gen_rtx_REG (SImode
, regno
);
4153 xops
[1] = gen_rtx_MEM (SImode
, stack_pointer_rtx
);
4154 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops
);
4155 output_asm_insn ("ret", xops
);
4159 /* Emit code for the SET_GOT patterns. */
4162 output_set_got (dest
)
4168 xops
[1] = gen_rtx_SYMBOL_REF (Pmode
, GOT_SYMBOL_NAME
);
4170 if (! TARGET_DEEP_BRANCH_PREDICTION
|| !flag_pic
)
4172 xops
[2] = gen_rtx_LABEL_REF (Pmode
, gen_label_rtx ());
4175 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops
);
4177 output_asm_insn ("call\t%a2", xops
);
4180 /* Output the "canonical" label name ("Lxx$pb") here too. This
4181 is what will be referred to by the Mach-O PIC subsystem. */
4182 ASM_OUTPUT_LABEL (asm_out_file
, machopic_function_base_name ());
4184 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file
, "L",
4185 CODE_LABEL_NUMBER (XEXP (xops
[2], 0)));
4188 output_asm_insn ("pop{l}\t%0", xops
);
4193 get_pc_thunk_name (name
, REGNO (dest
));
4194 pic_labels_used
|= 1 << REGNO (dest
);
4196 xops
[2] = gen_rtx_SYMBOL_REF (Pmode
, ggc_strdup (name
));
4197 xops
[2] = gen_rtx_MEM (QImode
, xops
[2]);
4198 output_asm_insn ("call\t%X2", xops
);
4201 if (!flag_pic
|| TARGET_DEEP_BRANCH_PREDICTION
)
4202 output_asm_insn ("add{l}\t{%1, %0|%0, %1}", xops
);
4203 else if (!TARGET_MACHO
)
4204 output_asm_insn ("add{l}\t{%1+[.-%a2], %0|%0, %a1+(.-%a2)}", xops
);
4209 /* Generate an "push" pattern for input ARG. */
4215 return gen_rtx_SET (VOIDmode
,
4217 gen_rtx_PRE_DEC (Pmode
,
4218 stack_pointer_rtx
)),
4222 /* Return >= 0 if there is an unused call-clobbered register available
4223 for the entire function. */
4226 ix86_select_alt_pic_regnum ()
4228 if (current_function_is_leaf
&& !current_function_profile
)
4231 for (i
= 2; i
>= 0; --i
)
4232 if (!regs_ever_live
[i
])
4236 return INVALID_REGNUM
;
4239 /* Return 1 if we need to save REGNO. */
4241 ix86_save_reg (regno
, maybe_eh_return
)
4243 int maybe_eh_return
;
4245 if (pic_offset_table_rtx
4246 && regno
== REAL_PIC_OFFSET_TABLE_REGNUM
4247 && (regs_ever_live
[REAL_PIC_OFFSET_TABLE_REGNUM
]
4248 || current_function_profile
4249 || current_function_calls_eh_return
))
4251 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM
)
4256 if (current_function_calls_eh_return
&& maybe_eh_return
)
4261 unsigned test
= EH_RETURN_DATA_REGNO (i
);
4262 if (test
== INVALID_REGNUM
)
4269 return (regs_ever_live
[regno
]
4270 && !call_used_regs
[regno
]
4271 && !fixed_regs
[regno
]
4272 && (regno
!= HARD_FRAME_POINTER_REGNUM
|| !frame_pointer_needed
));
4275 /* Return number of registers to be saved on the stack. */
4283 for (regno
= FIRST_PSEUDO_REGISTER
- 1; regno
>= 0; regno
--)
4284 if (ix86_save_reg (regno
, true))
4289 /* Return the offset between two registers, one to be eliminated, and the other
4290 its replacement, at the start of a routine. */
4293 ix86_initial_elimination_offset (from
, to
)
4297 struct ix86_frame frame
;
4298 ix86_compute_frame_layout (&frame
);
4300 if (from
== ARG_POINTER_REGNUM
&& to
== HARD_FRAME_POINTER_REGNUM
)
4301 return frame
.hard_frame_pointer_offset
;
4302 else if (from
== FRAME_POINTER_REGNUM
4303 && to
== HARD_FRAME_POINTER_REGNUM
)
4304 return frame
.hard_frame_pointer_offset
- frame
.frame_pointer_offset
;
4307 if (to
!= STACK_POINTER_REGNUM
)
4309 else if (from
== ARG_POINTER_REGNUM
)
4310 return frame
.stack_pointer_offset
;
4311 else if (from
!= FRAME_POINTER_REGNUM
)
4314 return frame
.stack_pointer_offset
- frame
.frame_pointer_offset
;
4318 /* Fill structure ix86_frame about frame of currently computed function. */
4321 ix86_compute_frame_layout (frame
)
4322 struct ix86_frame
*frame
;
4324 HOST_WIDE_INT total_size
;
4325 int stack_alignment_needed
= cfun
->stack_alignment_needed
/ BITS_PER_UNIT
;
4327 int preferred_alignment
= cfun
->preferred_stack_boundary
/ BITS_PER_UNIT
;
4328 HOST_WIDE_INT size
= get_frame_size ();
4330 frame
->nregs
= ix86_nsaved_regs ();
4333 /* Skip return address and saved base pointer. */
4334 offset
= frame_pointer_needed
? UNITS_PER_WORD
* 2 : UNITS_PER_WORD
;
4336 frame
->hard_frame_pointer_offset
= offset
;
4338 /* Do some sanity checking of stack_alignment_needed and
4339 preferred_alignment, since i386 port is the only using those features
4340 that may break easily. */
4342 if (size
&& !stack_alignment_needed
)
4344 if (preferred_alignment
< STACK_BOUNDARY
/ BITS_PER_UNIT
)
4346 if (preferred_alignment
> PREFERRED_STACK_BOUNDARY
/ BITS_PER_UNIT
)
4348 if (stack_alignment_needed
> PREFERRED_STACK_BOUNDARY
/ BITS_PER_UNIT
)
4351 if (stack_alignment_needed
< STACK_BOUNDARY
/ BITS_PER_UNIT
)
4352 stack_alignment_needed
= STACK_BOUNDARY
/ BITS_PER_UNIT
;
4354 /* Register save area */
4355 offset
+= frame
->nregs
* UNITS_PER_WORD
;
4358 if (ix86_save_varrargs_registers
)
4360 offset
+= X86_64_VARARGS_SIZE
;
4361 frame
->va_arg_size
= X86_64_VARARGS_SIZE
;
4364 frame
->va_arg_size
= 0;
4366 /* Align start of frame for local function. */
4367 frame
->padding1
= ((offset
+ stack_alignment_needed
- 1)
4368 & -stack_alignment_needed
) - offset
;
4370 offset
+= frame
->padding1
;
4372 /* Frame pointer points here. */
4373 frame
->frame_pointer_offset
= offset
;
4377 /* Add outgoing arguments area. Can be skipped if we eliminated
4378 all the function calls as dead code. */
4379 if (ACCUMULATE_OUTGOING_ARGS
&& !current_function_is_leaf
)
4381 offset
+= current_function_outgoing_args_size
;
4382 frame
->outgoing_arguments_size
= current_function_outgoing_args_size
;
4385 frame
->outgoing_arguments_size
= 0;
4387 /* Align stack boundary. Only needed if we're calling another function
4389 if (!current_function_is_leaf
|| current_function_calls_alloca
)
4390 frame
->padding2
= ((offset
+ preferred_alignment
- 1)
4391 & -preferred_alignment
) - offset
;
4393 frame
->padding2
= 0;
4395 offset
+= frame
->padding2
;
4397 /* We've reached end of stack frame. */
4398 frame
->stack_pointer_offset
= offset
;
4400 /* Size prologue needs to allocate. */
4401 frame
->to_allocate
=
4402 (size
+ frame
->padding1
+ frame
->padding2
4403 + frame
->outgoing_arguments_size
+ frame
->va_arg_size
);
4405 if (TARGET_64BIT
&& TARGET_RED_ZONE
&& current_function_sp_is_unchanging
4406 && current_function_is_leaf
)
4408 frame
->red_zone_size
= frame
->to_allocate
;
4409 if (frame
->red_zone_size
> RED_ZONE_SIZE
- RED_ZONE_RESERVE
)
4410 frame
->red_zone_size
= RED_ZONE_SIZE
- RED_ZONE_RESERVE
;
4413 frame
->red_zone_size
= 0;
4414 frame
->to_allocate
-= frame
->red_zone_size
;
4415 frame
->stack_pointer_offset
-= frame
->red_zone_size
;
4417 fprintf (stderr
, "nregs: %i\n", frame
->nregs
);
4418 fprintf (stderr
, "size: %i\n", size
);
4419 fprintf (stderr
, "alignment1: %i\n", stack_alignment_needed
);
4420 fprintf (stderr
, "padding1: %i\n", frame
->padding1
);
4421 fprintf (stderr
, "va_arg: %i\n", frame
->va_arg_size
);
4422 fprintf (stderr
, "padding2: %i\n", frame
->padding2
);
4423 fprintf (stderr
, "to_allocate: %i\n", frame
->to_allocate
);
4424 fprintf (stderr
, "red_zone_size: %i\n", frame
->red_zone_size
);
4425 fprintf (stderr
, "frame_pointer_offset: %i\n", frame
->frame_pointer_offset
);
4426 fprintf (stderr
, "hard_frame_pointer_offset: %i\n",
4427 frame
->hard_frame_pointer_offset
);
4428 fprintf (stderr
, "stack_pointer_offset: %i\n", frame
->stack_pointer_offset
);
4432 /* Emit code to save registers in the prologue. */
4435 ix86_emit_save_regs ()
4440 for (regno
= FIRST_PSEUDO_REGISTER
- 1; regno
>= 0; regno
--)
4441 if (ix86_save_reg (regno
, true))
4443 insn
= emit_insn (gen_push (gen_rtx_REG (Pmode
, regno
)));
4444 RTX_FRAME_RELATED_P (insn
) = 1;
4448 /* Emit code to save registers using MOV insns. First register
4449 is restored from POINTER + OFFSET. */
4451 ix86_emit_save_regs_using_mov (pointer
, offset
)
4453 HOST_WIDE_INT offset
;
4458 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
4459 if (ix86_save_reg (regno
, true))
4461 insn
= emit_move_insn (adjust_address (gen_rtx_MEM (Pmode
, pointer
),
4463 gen_rtx_REG (Pmode
, regno
));
4464 RTX_FRAME_RELATED_P (insn
) = 1;
4465 offset
+= UNITS_PER_WORD
;
4469 /* Expand the prologue into a bunch of separate insns. */
4472 ix86_expand_prologue ()
4476 struct ix86_frame frame
;
4478 HOST_WIDE_INT allocate
;
4482 use_fast_prologue_epilogue
4483 = !expensive_function_p (FAST_PROLOGUE_INSN_COUNT
);
4484 if (TARGET_PROLOGUE_USING_MOVE
)
4485 use_mov
= use_fast_prologue_epilogue
;
4487 ix86_compute_frame_layout (&frame
);
4489 /* Note: AT&T enter does NOT have reversed args. Enter is probably
4490 slower on all targets. Also sdb doesn't like it. */
4492 if (frame_pointer_needed
)
4494 insn
= emit_insn (gen_push (hard_frame_pointer_rtx
));
4495 RTX_FRAME_RELATED_P (insn
) = 1;
4497 insn
= emit_move_insn (hard_frame_pointer_rtx
, stack_pointer_rtx
);
4498 RTX_FRAME_RELATED_P (insn
) = 1;
4501 allocate
= frame
.to_allocate
;
4502 /* In case we are dealing only with single register and empty frame,
4503 push is equivalent of the mov+add sequence. */
4504 if (allocate
== 0 && frame
.nregs
<= 1)
4508 ix86_emit_save_regs ();
4510 allocate
+= frame
.nregs
* UNITS_PER_WORD
;
4514 else if (! TARGET_STACK_PROBE
|| allocate
< CHECK_STACK_LIMIT
)
4516 insn
= emit_insn (gen_pro_epilogue_adjust_stack
4517 (stack_pointer_rtx
, stack_pointer_rtx
,
4518 GEN_INT (-allocate
)));
4519 RTX_FRAME_RELATED_P (insn
) = 1;
4523 /* ??? Is this only valid for Win32? */
4530 arg0
= gen_rtx_REG (SImode
, 0);
4531 emit_move_insn (arg0
, GEN_INT (allocate
));
4533 sym
= gen_rtx_MEM (FUNCTION_MODE
,
4534 gen_rtx_SYMBOL_REF (Pmode
, "_alloca"));
4535 insn
= emit_call_insn (gen_call (sym
, const0_rtx
, constm1_rtx
));
4537 CALL_INSN_FUNCTION_USAGE (insn
)
4538 = gen_rtx_EXPR_LIST (VOIDmode
, gen_rtx_USE (VOIDmode
, arg0
),
4539 CALL_INSN_FUNCTION_USAGE (insn
));
4543 if (!frame_pointer_needed
|| !frame
.to_allocate
)
4544 ix86_emit_save_regs_using_mov (stack_pointer_rtx
, frame
.to_allocate
);
4546 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx
,
4547 -frame
.nregs
* UNITS_PER_WORD
);
4550 #ifdef SUBTARGET_PROLOGUE
4554 pic_reg_used
= false;
4555 if (pic_offset_table_rtx
4556 && (regs_ever_live
[REAL_PIC_OFFSET_TABLE_REGNUM
]
4557 || current_function_profile
))
4559 unsigned int alt_pic_reg_used
= ix86_select_alt_pic_regnum ();
4561 if (alt_pic_reg_used
!= INVALID_REGNUM
)
4562 REGNO (pic_offset_table_rtx
) = alt_pic_reg_used
;
4564 pic_reg_used
= true;
4569 insn
= emit_insn (gen_set_got (pic_offset_table_rtx
));
4571 /* Even with accurate pre-reload life analysis, we can wind up
4572 deleting all references to the pic register after reload.
4573 Consider if cross-jumping unifies two sides of a branch
4574 controled by a comparison vs the only read from a global.
4575 In which case, allow the set_got to be deleted, though we're
4576 too late to do anything about the ebx save in the prologue. */
4577 REG_NOTES (insn
) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD
, const0_rtx
, NULL
);
4580 /* Prevent function calls from be scheduled before the call to mcount.
4581 In the pic_reg_used case, make sure that the got load isn't deleted. */
4582 if (current_function_profile
)
4583 emit_insn (gen_blockage (pic_reg_used
? pic_offset_table_rtx
: const0_rtx
));
4586 /* Emit code to restore saved registers using MOV insns. First register
4587 is restored from POINTER + OFFSET. */
4589 ix86_emit_restore_regs_using_mov (pointer
, offset
, maybe_eh_return
)
4592 int maybe_eh_return
;
4596 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
4597 if (ix86_save_reg (regno
, maybe_eh_return
))
4599 emit_move_insn (gen_rtx_REG (Pmode
, regno
),
4600 adjust_address (gen_rtx_MEM (Pmode
, pointer
),
4602 offset
+= UNITS_PER_WORD
;
4606 /* Restore function stack, frame, and registers. */
4609 ix86_expand_epilogue (style
)
4613 int sp_valid
= !frame_pointer_needed
|| current_function_sp_is_unchanging
;
4614 struct ix86_frame frame
;
4615 HOST_WIDE_INT offset
;
4617 ix86_compute_frame_layout (&frame
);
4619 /* Calculate start of saved registers relative to ebp. Special care
4620 must be taken for the normal return case of a function using
4621 eh_return: the eax and edx registers are marked as saved, but not
4622 restored along this path. */
4623 offset
= frame
.nregs
;
4624 if (current_function_calls_eh_return
&& style
!= 2)
4626 offset
*= -UNITS_PER_WORD
;
4628 /* If we're only restoring one register and sp is not valid then
4629 using a move instruction to restore the register since it's
4630 less work than reloading sp and popping the register.
4632 The default code result in stack adjustment using add/lea instruction,
4633 while this code results in LEAVE instruction (or discrete equivalent),
4634 so it is profitable in some other cases as well. Especially when there
4635 are no registers to restore. We also use this code when TARGET_USE_LEAVE
4636 and there is exactly one register to pop. This heruistic may need some
4637 tuning in future. */
4638 if ((!sp_valid
&& frame
.nregs
<= 1)
4639 || (TARGET_EPILOGUE_USING_MOVE
4640 && use_fast_prologue_epilogue
4641 && (frame
.nregs
> 1 || frame
.to_allocate
))
4642 || (frame_pointer_needed
&& !frame
.nregs
&& frame
.to_allocate
)
4643 || (frame_pointer_needed
&& TARGET_USE_LEAVE
4644 && use_fast_prologue_epilogue
&& frame
.nregs
== 1)
4645 || current_function_calls_eh_return
)
4647 /* Restore registers. We can use ebp or esp to address the memory
4648 locations. If both are available, default to ebp, since offsets
4649 are known to be small. Only exception is esp pointing directly to the
4650 end of block of saved registers, where we may simplify addressing
4653 if (!frame_pointer_needed
|| (sp_valid
&& !frame
.to_allocate
))
4654 ix86_emit_restore_regs_using_mov (stack_pointer_rtx
,
4655 frame
.to_allocate
, style
== 2);
4657 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx
,
4658 offset
, style
== 2);
4660 /* eh_return epilogues need %ecx added to the stack pointer. */
4663 rtx tmp
, sa
= EH_RETURN_STACKADJ_RTX
;
4665 if (frame_pointer_needed
)
4667 tmp
= gen_rtx_PLUS (Pmode
, hard_frame_pointer_rtx
, sa
);
4668 tmp
= plus_constant (tmp
, UNITS_PER_WORD
);
4669 emit_insn (gen_rtx_SET (VOIDmode
, sa
, tmp
));
4671 tmp
= gen_rtx_MEM (Pmode
, hard_frame_pointer_rtx
);
4672 emit_move_insn (hard_frame_pointer_rtx
, tmp
);
4674 emit_insn (gen_pro_epilogue_adjust_stack
4675 (stack_pointer_rtx
, sa
, const0_rtx
));
4679 tmp
= gen_rtx_PLUS (Pmode
, stack_pointer_rtx
, sa
);
4680 tmp
= plus_constant (tmp
, (frame
.to_allocate
4681 + frame
.nregs
* UNITS_PER_WORD
));
4682 emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, tmp
));
4685 else if (!frame_pointer_needed
)
4686 emit_insn (gen_pro_epilogue_adjust_stack
4687 (stack_pointer_rtx
, stack_pointer_rtx
,
4688 GEN_INT (frame
.to_allocate
4689 + frame
.nregs
* UNITS_PER_WORD
)));
4690 /* If not an i386, mov & pop is faster than "leave". */
4691 else if (TARGET_USE_LEAVE
|| optimize_size
|| !use_fast_prologue_epilogue
)
4692 emit_insn (TARGET_64BIT
? gen_leave_rex64 () : gen_leave ());
4695 emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx
,
4696 hard_frame_pointer_rtx
,
4699 emit_insn (gen_popdi1 (hard_frame_pointer_rtx
));
4701 emit_insn (gen_popsi1 (hard_frame_pointer_rtx
));
4706 /* First step is to deallocate the stack frame so that we can
4707 pop the registers. */
4710 if (!frame_pointer_needed
)
4712 emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx
,
4713 hard_frame_pointer_rtx
,
4716 else if (frame
.to_allocate
)
4717 emit_insn (gen_pro_epilogue_adjust_stack
4718 (stack_pointer_rtx
, stack_pointer_rtx
,
4719 GEN_INT (frame
.to_allocate
)));
4721 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
4722 if (ix86_save_reg (regno
, false))
4725 emit_insn (gen_popdi1 (gen_rtx_REG (Pmode
, regno
)));
4727 emit_insn (gen_popsi1 (gen_rtx_REG (Pmode
, regno
)));
4729 if (frame_pointer_needed
)
4731 /* Leave results in shorter dependency chains on CPUs that are
4732 able to grok it fast. */
4733 if (TARGET_USE_LEAVE
)
4734 emit_insn (TARGET_64BIT
? gen_leave_rex64 () : gen_leave ());
4735 else if (TARGET_64BIT
)
4736 emit_insn (gen_popdi1 (hard_frame_pointer_rtx
));
4738 emit_insn (gen_popsi1 (hard_frame_pointer_rtx
));
4742 /* Sibcall epilogues don't want a return instruction. */
4746 if (current_function_pops_args
&& current_function_args_size
)
4748 rtx popc
= GEN_INT (current_function_pops_args
);
4750 /* i386 can only pop 64K bytes. If asked to pop more, pop
4751 return address, do explicit add, and jump indirectly to the
4754 if (current_function_pops_args
>= 65536)
4756 rtx ecx
= gen_rtx_REG (SImode
, 2);
4758 /* There are is no "pascal" calling convention in 64bit ABI. */
4762 emit_insn (gen_popsi1 (ecx
));
4763 emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
, popc
));
4764 emit_jump_insn (gen_return_indirect_internal (ecx
));
4767 emit_jump_insn (gen_return_pop_internal (popc
));
4770 emit_jump_insn (gen_return_internal ());
4773 /* Reset from the function's potential modifications. */
4776 ix86_output_function_epilogue (file
, size
)
4777 FILE *file ATTRIBUTE_UNUSED
;
4778 HOST_WIDE_INT size ATTRIBUTE_UNUSED
;
4780 if (pic_offset_table_rtx
)
4781 REGNO (pic_offset_table_rtx
) = REAL_PIC_OFFSET_TABLE_REGNUM
;
4784 /* Extract the parts of an RTL expression that is a valid memory address
4785 for an instruction. Return 0 if the structure of the address is
4786 grossly off. Return -1 if the address contains ASHIFT, so it is not
4787 strictly valid, but still used for computing length of lea instruction.
4791 ix86_decompose_address (addr
, out
)
4793 struct ix86_address
*out
;
4795 rtx base
= NULL_RTX
;
4796 rtx index
= NULL_RTX
;
4797 rtx disp
= NULL_RTX
;
4798 HOST_WIDE_INT scale
= 1;
4799 rtx scale_rtx
= NULL_RTX
;
4802 if (REG_P (addr
) || GET_CODE (addr
) == SUBREG
)
4804 else if (GET_CODE (addr
) == PLUS
)
4806 rtx op0
= XEXP (addr
, 0);
4807 rtx op1
= XEXP (addr
, 1);
4808 enum rtx_code code0
= GET_CODE (op0
);
4809 enum rtx_code code1
= GET_CODE (op1
);
4811 if (code0
== REG
|| code0
== SUBREG
)
4813 if (code1
== REG
|| code1
== SUBREG
)
4814 index
= op0
, base
= op1
; /* index + base */
4816 base
= op0
, disp
= op1
; /* base + displacement */
4818 else if (code0
== MULT
)
4820 index
= XEXP (op0
, 0);
4821 scale_rtx
= XEXP (op0
, 1);
4822 if (code1
== REG
|| code1
== SUBREG
)
4823 base
= op1
; /* index*scale + base */
4825 disp
= op1
; /* index*scale + disp */
4827 else if (code0
== PLUS
&& GET_CODE (XEXP (op0
, 0)) == MULT
)
4829 index
= XEXP (XEXP (op0
, 0), 0); /* index*scale + base + disp */
4830 scale_rtx
= XEXP (XEXP (op0
, 0), 1);
4831 base
= XEXP (op0
, 1);
4834 else if (code0
== PLUS
)
4836 index
= XEXP (op0
, 0); /* index + base + disp */
4837 base
= XEXP (op0
, 1);
4843 else if (GET_CODE (addr
) == MULT
)
4845 index
= XEXP (addr
, 0); /* index*scale */
4846 scale_rtx
= XEXP (addr
, 1);
4848 else if (GET_CODE (addr
) == ASHIFT
)
4852 /* We're called for lea too, which implements ashift on occasion. */
4853 index
= XEXP (addr
, 0);
4854 tmp
= XEXP (addr
, 1);
4855 if (GET_CODE (tmp
) != CONST_INT
)
4857 scale
= INTVAL (tmp
);
4858 if ((unsigned HOST_WIDE_INT
) scale
> 3)
4864 disp
= addr
; /* displacement */
4866 /* Extract the integral value of scale. */
4869 if (GET_CODE (scale_rtx
) != CONST_INT
)
4871 scale
= INTVAL (scale_rtx
);
4874 /* Allow arg pointer and stack pointer as index if there is not scaling */
4875 if (base
&& index
&& scale
== 1
4876 && (index
== arg_pointer_rtx
|| index
== frame_pointer_rtx
4877 || index
== stack_pointer_rtx
))
4884 /* Special case: %ebp cannot be encoded as a base without a displacement. */
4885 if ((base
== hard_frame_pointer_rtx
4886 || base
== frame_pointer_rtx
4887 || base
== arg_pointer_rtx
) && !disp
)
4890 /* Special case: on K6, [%esi] makes the instruction vector decoded.
4891 Avoid this by transforming to [%esi+0]. */
4892 if (ix86_cpu
== PROCESSOR_K6
&& !optimize_size
4893 && base
&& !index
&& !disp
4895 && REGNO_REG_CLASS (REGNO (base
)) == SIREG
)
4898 /* Special case: encode reg+reg instead of reg*2. */
4899 if (!base
&& index
&& scale
&& scale
== 2)
4900 base
= index
, scale
= 1;
4902 /* Special case: scaling cannot be encoded without base or displacement. */
4903 if (!base
&& !disp
&& index
&& scale
!= 1)
4914 /* Return cost of the memory address x.
4915 For i386, it is better to use a complex address than let gcc copy
4916 the address into a reg and make a new pseudo. But not if the address
4917 requires to two regs - that would mean more pseudos with longer
4920 ix86_address_cost (x
)
4923 struct ix86_address parts
;
4926 if (!ix86_decompose_address (x
, &parts
))
4929 if (parts
.base
&& GET_CODE (parts
.base
) == SUBREG
)
4930 parts
.base
= SUBREG_REG (parts
.base
);
4931 if (parts
.index
&& GET_CODE (parts
.index
) == SUBREG
)
4932 parts
.index
= SUBREG_REG (parts
.index
);
4934 /* More complex memory references are better. */
4935 if (parts
.disp
&& parts
.disp
!= const0_rtx
)
4938 /* Attempt to minimize number of registers in the address. */
4940 && (!REG_P (parts
.base
) || REGNO (parts
.base
) >= FIRST_PSEUDO_REGISTER
))
4942 && (!REG_P (parts
.index
)
4943 || REGNO (parts
.index
) >= FIRST_PSEUDO_REGISTER
)))
4947 && (!REG_P (parts
.base
) || REGNO (parts
.base
) >= FIRST_PSEUDO_REGISTER
)
4949 && (!REG_P (parts
.index
) || REGNO (parts
.index
) >= FIRST_PSEUDO_REGISTER
)
4950 && parts
.base
!= parts
.index
)
4953 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
4954 since it's predecode logic can't detect the length of instructions
4955 and it degenerates to vector decoded. Increase cost of such
4956 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
4957 to split such addresses or even refuse such addresses at all.
4959 Following addressing modes are affected:
4964 The first and last case may be avoidable by explicitly coding the zero in
4965 memory address, but I don't have AMD-K6 machine handy to check this
4969 && ((!parts
.disp
&& parts
.base
&& parts
.index
&& parts
.scale
!= 1)
4970 || (parts
.disp
&& !parts
.base
&& parts
.index
&& parts
.scale
!= 1)
4971 || (!parts
.disp
&& parts
.base
&& parts
.index
&& parts
.scale
== 1)))
4977 /* If X is a machine specific address (i.e. a symbol or label being
4978 referenced as a displacement from the GOT implemented using an
4979 UNSPEC), then return the base term. Otherwise return X. */
4982 ix86_find_base_term (x
)
4989 if (GET_CODE (x
) != CONST
)
4992 if (GET_CODE (term
) == PLUS
4993 && (GET_CODE (XEXP (term
, 1)) == CONST_INT
4994 || GET_CODE (XEXP (term
, 1)) == CONST_DOUBLE
))
4995 term
= XEXP (term
, 0);
4996 if (GET_CODE (term
) != UNSPEC
4997 || XINT (term
, 1) != UNSPEC_GOTPCREL
)
5000 term
= XVECEXP (term
, 0, 0);
5002 if (GET_CODE (term
) != SYMBOL_REF
5003 && GET_CODE (term
) != LABEL_REF
)
5009 if (GET_CODE (x
) != PLUS
5010 || XEXP (x
, 0) != pic_offset_table_rtx
5011 || GET_CODE (XEXP (x
, 1)) != CONST
)
5014 term
= XEXP (XEXP (x
, 1), 0);
5016 if (GET_CODE (term
) == PLUS
&& GET_CODE (XEXP (term
, 1)) == CONST_INT
)
5017 term
= XEXP (term
, 0);
5019 if (GET_CODE (term
) != UNSPEC
5020 || XINT (term
, 1) != UNSPEC_GOTOFF
)
5023 term
= XVECEXP (term
, 0, 0);
5025 if (GET_CODE (term
) != SYMBOL_REF
5026 && GET_CODE (term
) != LABEL_REF
)
5032 /* Determine if a given RTX is a valid constant. We already know this
5033 satisfies CONSTANT_P. */
5036 legitimate_constant_p (x
)
5041 switch (GET_CODE (x
))
5044 /* TLS symbols are not constant. */
5045 if (tls_symbolic_operand (x
, Pmode
))
5050 inner
= XEXP (x
, 0);
5052 /* Offsets of TLS symbols are never valid.
5053 Discourage CSE from creating them. */
5054 if (GET_CODE (inner
) == PLUS
5055 && tls_symbolic_operand (XEXP (inner
, 0), Pmode
))
5058 /* Only some unspecs are valid as "constants". */
5059 if (GET_CODE (inner
) == UNSPEC
)
5060 switch (XINT (inner
, 1))
5063 return local_exec_symbolic_operand (XVECEXP (inner
, 0, 0), Pmode
);
5073 /* Otherwise we handle everything else in the move patterns. */
5077 /* Determine if a given RTX is a valid constant address. */
5080 constant_address_p (x
)
5083 switch (GET_CODE (x
))
5090 return TARGET_64BIT
;
5093 /* For Mach-O, really believe the CONST. */
5096 /* Otherwise fall through. */
5098 return !flag_pic
&& legitimate_constant_p (x
);
5105 /* Nonzero if the constant value X is a legitimate general operand
5106 when generating PIC code. It is given that flag_pic is on and
5107 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
5110 legitimate_pic_operand_p (x
)
5115 switch (GET_CODE (x
))
5118 inner
= XEXP (x
, 0);
5120 /* Only some unspecs are valid as "constants". */
5121 if (GET_CODE (inner
) == UNSPEC
)
5122 switch (XINT (inner
, 1))
5125 return local_exec_symbolic_operand (XVECEXP (inner
, 0, 0), Pmode
);
5133 return legitimate_pic_address_disp_p (x
);
5140 /* Determine if a given CONST RTX is a valid memory displacement
5144 legitimate_pic_address_disp_p (disp
)
5149 /* In 64bit mode we can allow direct addresses of symbols and labels
5150 when they are not dynamic symbols. */
5151 if (TARGET_64BIT
&& local_symbolic_operand (disp
, Pmode
))
5153 if (GET_CODE (disp
) != CONST
)
5155 disp
= XEXP (disp
, 0);
5159 /* We are unsafe to allow PLUS expressions. This limit allowed distance
5160 of GOT tables. We should not need these anyway. */
5161 if (GET_CODE (disp
) != UNSPEC
5162 || XINT (disp
, 1) != UNSPEC_GOTPCREL
)
5165 if (GET_CODE (XVECEXP (disp
, 0, 0)) != SYMBOL_REF
5166 && GET_CODE (XVECEXP (disp
, 0, 0)) != LABEL_REF
)
5172 if (GET_CODE (disp
) == PLUS
)
5174 if (GET_CODE (XEXP (disp
, 1)) != CONST_INT
)
5176 disp
= XEXP (disp
, 0);
5180 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O. */
5181 if (TARGET_MACHO
&& GET_CODE (disp
) == MINUS
)
5183 if (GET_CODE (XEXP (disp
, 0)) == LABEL_REF
5184 || GET_CODE (XEXP (disp
, 0)) == SYMBOL_REF
)
5185 if (GET_CODE (XEXP (disp
, 1)) == SYMBOL_REF
)
5187 const char *sym_name
= XSTR (XEXP (disp
, 1), 0);
5188 if (strstr (sym_name
, "$pb") != 0)
5193 if (GET_CODE (disp
) != UNSPEC
)
5196 switch (XINT (disp
, 1))
5201 return GET_CODE (XVECEXP (disp
, 0, 0)) == SYMBOL_REF
;
5203 return local_symbolic_operand (XVECEXP (disp
, 0, 0), Pmode
);
5204 case UNSPEC_GOTTPOFF
:
5205 case UNSPEC_GOTNTPOFF
:
5206 case UNSPEC_INDNTPOFF
:
5209 return initial_exec_symbolic_operand (XVECEXP (disp
, 0, 0), Pmode
);
5211 return local_exec_symbolic_operand (XVECEXP (disp
, 0, 0), Pmode
);
5213 return local_dynamic_symbolic_operand (XVECEXP (disp
, 0, 0), Pmode
);
5219 /* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
5220 memory address for an instruction. The MODE argument is the machine mode
5221 for the MEM expression that wants to use this address.
5223 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
5224 convert common non-canonical forms to canonical form so that they will
5228 legitimate_address_p (mode
, addr
, strict
)
5229 enum machine_mode mode
;
5233 struct ix86_address parts
;
5234 rtx base
, index
, disp
;
5235 HOST_WIDE_INT scale
;
5236 const char *reason
= NULL
;
5237 rtx reason_rtx
= NULL_RTX
;
5239 if (TARGET_DEBUG_ADDR
)
5242 "\n======\nGO_IF_LEGITIMATE_ADDRESS, mode = %s, strict = %d\n",
5243 GET_MODE_NAME (mode
), strict
);
5247 if (GET_CODE (addr
) == UNSPEC
&& XINT (addr
, 1) == UNSPEC_TP
)
5249 if (TARGET_DEBUG_ADDR
)
5250 fprintf (stderr
, "Success.\n");
5254 if (ix86_decompose_address (addr
, &parts
) <= 0)
5256 reason
= "decomposition failed";
5261 index
= parts
.index
;
5263 scale
= parts
.scale
;
5265 /* Validate base register.
5267 Don't allow SUBREG's here, it can lead to spill failures when the base
5268 is one word out of a two word structure, which is represented internally
5276 if (GET_CODE (base
) == SUBREG
)
5277 reg
= SUBREG_REG (base
);
5281 if (GET_CODE (reg
) != REG
)
5283 reason
= "base is not a register";
5287 if (GET_MODE (base
) != Pmode
)
5289 reason
= "base is not in Pmode";
5293 if ((strict
&& ! REG_OK_FOR_BASE_STRICT_P (reg
))
5294 || (! strict
&& ! REG_OK_FOR_BASE_NONSTRICT_P (reg
)))
5296 reason
= "base is not valid";
5301 /* Validate index register.
5303 Don't allow SUBREG's here, it can lead to spill failures when the index
5304 is one word out of a two word structure, which is represented internally
5312 if (GET_CODE (index
) == SUBREG
)
5313 reg
= SUBREG_REG (index
);
5317 if (GET_CODE (reg
) != REG
)
5319 reason
= "index is not a register";
5323 if (GET_MODE (index
) != Pmode
)
5325 reason
= "index is not in Pmode";
5329 if ((strict
&& ! REG_OK_FOR_INDEX_STRICT_P (reg
))
5330 || (! strict
&& ! REG_OK_FOR_INDEX_NONSTRICT_P (reg
)))
5332 reason
= "index is not valid";
5337 /* Validate scale factor. */
5340 reason_rtx
= GEN_INT (scale
);
5343 reason
= "scale without index";
5347 if (scale
!= 2 && scale
!= 4 && scale
!= 8)
5349 reason
= "scale is not a valid multiplier";
5354 /* Validate displacement. */
5361 if (!x86_64_sign_extended_value (disp
, !(index
|| base
)))
5363 reason
= "displacement is out of range";
5369 if (GET_CODE (disp
) == CONST_DOUBLE
)
5371 reason
= "displacement is a const_double";
5376 if (GET_CODE (disp
) == CONST
5377 && GET_CODE (XEXP (disp
, 0)) == UNSPEC
)
5378 switch (XINT (XEXP (disp
, 0), 1))
5382 case UNSPEC_GOTPCREL
:
5385 goto is_legitimate_pic
;
5387 case UNSPEC_GOTTPOFF
:
5388 case UNSPEC_GOTNTPOFF
:
5389 case UNSPEC_INDNTPOFF
:
5395 reason
= "invalid address unspec";
5399 else if (flag_pic
&& (SYMBOLIC_CONST (disp
)
5401 && !machopic_operand_p (disp
)
5406 if (TARGET_64BIT
&& (index
|| base
))
5408 /* foo@dtpoff(%rX) is ok. */
5409 if (GET_CODE (disp
) != CONST
5410 || GET_CODE (XEXP (disp
, 0)) != PLUS
5411 || GET_CODE (XEXP (XEXP (disp
, 0), 0)) != UNSPEC
5412 || GET_CODE (XEXP (XEXP (disp
, 0), 1)) != CONST_INT
5413 || (XINT (XEXP (XEXP (disp
, 0), 0), 1) != UNSPEC_DTPOFF
5414 && XINT (XEXP (XEXP (disp
, 0), 0), 1) != UNSPEC_NTPOFF
))
5416 reason
= "non-constant pic memory reference";
5420 else if (! legitimate_pic_address_disp_p (disp
))
5422 reason
= "displacement is an invalid pic construct";
5426 /* This code used to verify that a symbolic pic displacement
5427 includes the pic_offset_table_rtx register.
5429 While this is good idea, unfortunately these constructs may
5430 be created by "adds using lea" optimization for incorrect
5439 This code is nonsensical, but results in addressing
5440 GOT table with pic_offset_table_rtx base. We can't
5441 just refuse it easily, since it gets matched by
5442 "addsi3" pattern, that later gets split to lea in the
5443 case output register differs from input. While this
5444 can be handled by separate addsi pattern for this case
5445 that never results in lea, this seems to be easier and
5446 correct fix for crash to disable this test. */
5448 else if (!CONSTANT_ADDRESS_P (disp
))
5450 reason
= "displacement is not constant";
5455 /* Everything looks valid. */
5456 if (TARGET_DEBUG_ADDR
)
5457 fprintf (stderr
, "Success.\n");
5461 if (TARGET_DEBUG_ADDR
)
5463 fprintf (stderr
, "Error: %s\n", reason
);
5464 debug_rtx (reason_rtx
);
5469 /* Return an unique alias set for the GOT. */
5471 static HOST_WIDE_INT
5472 ix86_GOT_alias_set ()
5474 static HOST_WIDE_INT set
= -1;
5476 set
= new_alias_set ();
5480 /* Return a legitimate reference for ORIG (an address) using the
5481 register REG. If REG is 0, a new pseudo is generated.
5483 There are two types of references that must be handled:
5485 1. Global data references must load the address from the GOT, via
5486 the PIC reg. An insn is emitted to do this load, and the reg is
5489 2. Static data references, constant pool addresses, and code labels
5490 compute the address as an offset from the GOT, whose base is in
5491 the PIC reg. Static data objects have SYMBOL_REF_FLAG set to
5492 differentiate them from global data objects. The returned
5493 address is the PIC reg + an unspec constant.
5495 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
5496 reg also appears in the address. */
5499 legitimize_pic_address (orig
, reg
)
5509 reg
= gen_reg_rtx (Pmode
);
5510 /* Use the generic Mach-O PIC machinery. */
5511 return machopic_legitimize_pic_address (orig
, GET_MODE (orig
), reg
);
5514 if (local_symbolic_operand (addr
, Pmode
))
5516 /* In 64bit mode we can address such objects directly. */
5521 /* This symbol may be referenced via a displacement from the PIC
5522 base address (@GOTOFF). */
5524 if (reload_in_progress
)
5525 regs_ever_live
[PIC_OFFSET_TABLE_REGNUM
] = 1;
5526 new = gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOTOFF
);
5527 new = gen_rtx_CONST (Pmode
, new);
5528 new = gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, new);
5532 emit_move_insn (reg
, new);
5537 else if (GET_CODE (addr
) == SYMBOL_REF
)
5541 new = gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOTPCREL
);
5542 new = gen_rtx_CONST (Pmode
, new);
5543 new = gen_rtx_MEM (Pmode
, new);
5544 RTX_UNCHANGING_P (new) = 1;
5545 set_mem_alias_set (new, ix86_GOT_alias_set ());
5548 reg
= gen_reg_rtx (Pmode
);
5549 /* Use directly gen_movsi, otherwise the address is loaded
5550 into register for CSE. We don't want to CSE this addresses,
5551 instead we CSE addresses from the GOT table, so skip this. */
5552 emit_insn (gen_movsi (reg
, new));
5557 /* This symbol must be referenced via a load from the
5558 Global Offset Table (@GOT). */
5560 if (reload_in_progress
)
5561 regs_ever_live
[PIC_OFFSET_TABLE_REGNUM
] = 1;
5562 new = gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOT
);
5563 new = gen_rtx_CONST (Pmode
, new);
5564 new = gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, new);
5565 new = gen_rtx_MEM (Pmode
, new);
5566 RTX_UNCHANGING_P (new) = 1;
5567 set_mem_alias_set (new, ix86_GOT_alias_set ());
5570 reg
= gen_reg_rtx (Pmode
);
5571 emit_move_insn (reg
, new);
5577 if (GET_CODE (addr
) == CONST
)
5579 addr
= XEXP (addr
, 0);
5581 /* We must match stuff we generate before. Assume the only
5582 unspecs that can get here are ours. Not that we could do
5583 anything with them anyway... */
5584 if (GET_CODE (addr
) == UNSPEC
5585 || (GET_CODE (addr
) == PLUS
5586 && GET_CODE (XEXP (addr
, 0)) == UNSPEC
))
5588 if (GET_CODE (addr
) != PLUS
)
5591 if (GET_CODE (addr
) == PLUS
)
5593 rtx op0
= XEXP (addr
, 0), op1
= XEXP (addr
, 1);
5595 /* Check first to see if this is a constant offset from a @GOTOFF
5596 symbol reference. */
5597 if (local_symbolic_operand (op0
, Pmode
)
5598 && GET_CODE (op1
) == CONST_INT
)
5602 if (reload_in_progress
)
5603 regs_ever_live
[PIC_OFFSET_TABLE_REGNUM
] = 1;
5604 new = gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, op0
),
5606 new = gen_rtx_PLUS (Pmode
, new, op1
);
5607 new = gen_rtx_CONST (Pmode
, new);
5608 new = gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, new);
5612 emit_move_insn (reg
, new);
5618 if (INTVAL (op1
) < -16*1024*1024
5619 || INTVAL (op1
) >= 16*1024*1024)
5620 new = gen_rtx_PLUS (Pmode
, op0
, force_reg (Pmode
, op1
));
5625 base
= legitimize_pic_address (XEXP (addr
, 0), reg
);
5626 new = legitimize_pic_address (XEXP (addr
, 1),
5627 base
== reg
? NULL_RTX
: reg
);
5629 if (GET_CODE (new) == CONST_INT
)
5630 new = plus_constant (base
, INTVAL (new));
5633 if (GET_CODE (new) == PLUS
&& CONSTANT_P (XEXP (new, 1)))
5635 base
= gen_rtx_PLUS (Pmode
, base
, XEXP (new, 0));
5636 new = XEXP (new, 1);
5638 new = gen_rtx_PLUS (Pmode
, base
, new);
5647 ix86_encode_section_info (decl
, first
)
5649 int first ATTRIBUTE_UNUSED
;
5651 bool local_p
= (*targetm
.binds_local_p
) (decl
);
5654 rtl
= DECL_P (decl
) ? DECL_RTL (decl
) : TREE_CST_RTL (decl
);
5655 if (GET_CODE (rtl
) != MEM
)
5657 symbol
= XEXP (rtl
, 0);
5658 if (GET_CODE (symbol
) != SYMBOL_REF
)
5661 /* For basic x86, if using PIC, mark a SYMBOL_REF for a non-global
5662 symbol so that we may access it directly in the GOT. */
5665 SYMBOL_REF_FLAG (symbol
) = local_p
;
5667 /* For ELF, encode thread-local data with %[GLil] for "global dynamic",
5668 "local dynamic", "initial exec" or "local exec" TLS models
5671 if (TREE_CODE (decl
) == VAR_DECL
&& DECL_THREAD_LOCAL (decl
))
5673 const char *symbol_str
;
5676 enum tls_model kind
= decl_tls_model (decl
);
5678 if (TARGET_64BIT
&& ! flag_pic
)
5680 /* x86-64 doesn't allow non-pic code for shared libraries,
5681 so don't generate GD/LD TLS models for non-pic code. */
5684 case TLS_MODEL_GLOBAL_DYNAMIC
:
5685 kind
= TLS_MODEL_INITIAL_EXEC
; break;
5686 case TLS_MODEL_LOCAL_DYNAMIC
:
5687 kind
= TLS_MODEL_LOCAL_EXEC
; break;
5693 symbol_str
= XSTR (symbol
, 0);
5695 if (symbol_str
[0] == '%')
5697 if (symbol_str
[1] == tls_model_chars
[kind
])
5701 len
= strlen (symbol_str
) + 1;
5702 newstr
= alloca (len
+ 2);
5705 newstr
[1] = tls_model_chars
[kind
];
5706 memcpy (newstr
+ 2, symbol_str
, len
);
5708 XSTR (symbol
, 0) = ggc_alloc_string (newstr
, len
+ 2 - 1);
5712 /* Undo the above when printing symbol names. */
5715 ix86_strip_name_encoding (str
)
5725 /* Load the thread pointer into a register. */
5728 get_thread_pointer ()
5732 tp
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, const0_rtx
), UNSPEC_TP
);
5733 tp
= gen_rtx_MEM (Pmode
, tp
);
5734 RTX_UNCHANGING_P (tp
) = 1;
5735 set_mem_alias_set (tp
, ix86_GOT_alias_set ());
5736 tp
= force_reg (Pmode
, tp
);
5741 /* Try machine-dependent ways of modifying an illegitimate address
5742 to be legitimate. If we find one, return the new, valid address.
5743 This macro is used in only one place: `memory_address' in explow.c.
5745 OLDX is the address as it was before break_out_memory_refs was called.
5746 In some cases it is useful to look at this to decide what needs to be done.
5748 MODE and WIN are passed so that this macro can use
5749 GO_IF_LEGITIMATE_ADDRESS.
5751 It is always safe for this macro to do nothing. It exists to recognize
5752 opportunities to optimize the output.
5754 For the 80386, we handle X+REG by loading X into a register R and
5755 using R+REG. R will go in a general reg and indexing will be used.
5756 However, if REG is a broken-out memory address or multiplication,
5757 nothing needs to be done because REG can certainly go in a general reg.
5759 When -fpic is used, special handling is needed for symbolic references.
5760 See comments by legitimize_pic_address in i386.c for details. */
5763 legitimize_address (x
, oldx
, mode
)
5765 register rtx oldx ATTRIBUTE_UNUSED
;
5766 enum machine_mode mode
;
5771 if (TARGET_DEBUG_ADDR
)
5773 fprintf (stderr
, "\n==========\nLEGITIMIZE_ADDRESS, mode = %s\n",
5774 GET_MODE_NAME (mode
));
5778 log
= tls_symbolic_operand (x
, mode
);
5781 rtx dest
, base
, off
, pic
;
5786 case TLS_MODEL_GLOBAL_DYNAMIC
:
5787 dest
= gen_reg_rtx (Pmode
);
5790 rtx rax
= gen_rtx_REG (Pmode
, 0), insns
;
5793 emit_call_insn (gen_tls_global_dynamic_64 (rax
, x
));
5794 insns
= get_insns ();
5797 emit_libcall_block (insns
, dest
, rax
, x
);
5800 emit_insn (gen_tls_global_dynamic_32 (dest
, x
));
5803 case TLS_MODEL_LOCAL_DYNAMIC
:
5804 base
= gen_reg_rtx (Pmode
);
5807 rtx rax
= gen_rtx_REG (Pmode
, 0), insns
, note
;
5810 emit_call_insn (gen_tls_local_dynamic_base_64 (rax
));
5811 insns
= get_insns ();
5814 note
= gen_rtx_EXPR_LIST (VOIDmode
, const0_rtx
, NULL
);
5815 note
= gen_rtx_EXPR_LIST (VOIDmode
, ix86_tls_get_addr (), note
);
5816 emit_libcall_block (insns
, base
, rax
, note
);
5819 emit_insn (gen_tls_local_dynamic_base_32 (base
));
5821 off
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, x
), UNSPEC_DTPOFF
);
5822 off
= gen_rtx_CONST (Pmode
, off
);
5824 return gen_rtx_PLUS (Pmode
, base
, off
);
5826 case TLS_MODEL_INITIAL_EXEC
:
5830 type
= UNSPEC_GOTNTPOFF
;
5834 if (reload_in_progress
)
5835 regs_ever_live
[PIC_OFFSET_TABLE_REGNUM
] = 1;
5836 pic
= pic_offset_table_rtx
;
5837 type
= TARGET_GNU_TLS
? UNSPEC_GOTNTPOFF
: UNSPEC_GOTTPOFF
;
5839 else if (!TARGET_GNU_TLS
)
5841 pic
= gen_reg_rtx (Pmode
);
5842 emit_insn (gen_set_got (pic
));
5843 type
= UNSPEC_GOTTPOFF
;
5848 type
= UNSPEC_INDNTPOFF
;
5851 base
= get_thread_pointer ();
5853 off
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, x
), type
);
5854 off
= gen_rtx_CONST (Pmode
, off
);
5856 off
= gen_rtx_PLUS (Pmode
, pic
, off
);
5857 off
= gen_rtx_MEM (Pmode
, off
);
5858 RTX_UNCHANGING_P (off
) = 1;
5859 set_mem_alias_set (off
, ix86_GOT_alias_set ());
5860 dest
= gen_reg_rtx (Pmode
);
5862 if (TARGET_64BIT
|| TARGET_GNU_TLS
)
5864 emit_move_insn (dest
, off
);
5865 return gen_rtx_PLUS (Pmode
, base
, dest
);
5868 emit_insn (gen_subsi3 (dest
, base
, off
));
5871 case TLS_MODEL_LOCAL_EXEC
:
5872 base
= get_thread_pointer ();
5874 off
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, x
),
5875 (TARGET_64BIT
|| TARGET_GNU_TLS
)
5876 ? UNSPEC_NTPOFF
: UNSPEC_TPOFF
);
5877 off
= gen_rtx_CONST (Pmode
, off
);
5879 if (TARGET_64BIT
|| TARGET_GNU_TLS
)
5880 return gen_rtx_PLUS (Pmode
, base
, off
);
5883 dest
= gen_reg_rtx (Pmode
);
5884 emit_insn (gen_subsi3 (dest
, base
, off
));
5895 if (flag_pic
&& SYMBOLIC_CONST (x
))
5896 return legitimize_pic_address (x
, 0);
5898 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
5899 if (GET_CODE (x
) == ASHIFT
5900 && GET_CODE (XEXP (x
, 1)) == CONST_INT
5901 && (log
= (unsigned) exact_log2 (INTVAL (XEXP (x
, 1)))) < 4)
5904 x
= gen_rtx_MULT (Pmode
, force_reg (Pmode
, XEXP (x
, 0)),
5905 GEN_INT (1 << log
));
5908 if (GET_CODE (x
) == PLUS
)
5910 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
5912 if (GET_CODE (XEXP (x
, 0)) == ASHIFT
5913 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == CONST_INT
5914 && (log
= (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x
, 0), 1)))) < 4)
5917 XEXP (x
, 0) = gen_rtx_MULT (Pmode
,
5918 force_reg (Pmode
, XEXP (XEXP (x
, 0), 0)),
5919 GEN_INT (1 << log
));
5922 if (GET_CODE (XEXP (x
, 1)) == ASHIFT
5923 && GET_CODE (XEXP (XEXP (x
, 1), 1)) == CONST_INT
5924 && (log
= (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x
, 1), 1)))) < 4)
5927 XEXP (x
, 1) = gen_rtx_MULT (Pmode
,
5928 force_reg (Pmode
, XEXP (XEXP (x
, 1), 0)),
5929 GEN_INT (1 << log
));
5932 /* Put multiply first if it isn't already. */
5933 if (GET_CODE (XEXP (x
, 1)) == MULT
)
5935 rtx tmp
= XEXP (x
, 0);
5936 XEXP (x
, 0) = XEXP (x
, 1);
5941 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
5942 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
5943 created by virtual register instantiation, register elimination, and
5944 similar optimizations. */
5945 if (GET_CODE (XEXP (x
, 0)) == MULT
&& GET_CODE (XEXP (x
, 1)) == PLUS
)
5948 x
= gen_rtx_PLUS (Pmode
,
5949 gen_rtx_PLUS (Pmode
, XEXP (x
, 0),
5950 XEXP (XEXP (x
, 1), 0)),
5951 XEXP (XEXP (x
, 1), 1));
5955 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
5956 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
5957 else if (GET_CODE (x
) == PLUS
&& GET_CODE (XEXP (x
, 0)) == PLUS
5958 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
5959 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == PLUS
5960 && CONSTANT_P (XEXP (x
, 1)))
5963 rtx other
= NULL_RTX
;
5965 if (GET_CODE (XEXP (x
, 1)) == CONST_INT
)
5967 constant
= XEXP (x
, 1);
5968 other
= XEXP (XEXP (XEXP (x
, 0), 1), 1);
5970 else if (GET_CODE (XEXP (XEXP (XEXP (x
, 0), 1), 1)) == CONST_INT
)
5972 constant
= XEXP (XEXP (XEXP (x
, 0), 1), 1);
5973 other
= XEXP (x
, 1);
5981 x
= gen_rtx_PLUS (Pmode
,
5982 gen_rtx_PLUS (Pmode
, XEXP (XEXP (x
, 0), 0),
5983 XEXP (XEXP (XEXP (x
, 0), 1), 0)),
5984 plus_constant (other
, INTVAL (constant
)));
5988 if (changed
&& legitimate_address_p (mode
, x
, FALSE
))
5991 if (GET_CODE (XEXP (x
, 0)) == MULT
)
5994 XEXP (x
, 0) = force_operand (XEXP (x
, 0), 0);
5997 if (GET_CODE (XEXP (x
, 1)) == MULT
)
6000 XEXP (x
, 1) = force_operand (XEXP (x
, 1), 0);
6004 && GET_CODE (XEXP (x
, 1)) == REG
6005 && GET_CODE (XEXP (x
, 0)) == REG
)
6008 if (flag_pic
&& SYMBOLIC_CONST (XEXP (x
, 1)))
6011 x
= legitimize_pic_address (x
, 0);
6014 if (changed
&& legitimate_address_p (mode
, x
, FALSE
))
6017 if (GET_CODE (XEXP (x
, 0)) == REG
)
6019 register rtx temp
= gen_reg_rtx (Pmode
);
6020 register rtx val
= force_operand (XEXP (x
, 1), temp
);
6022 emit_move_insn (temp
, val
);
6028 else if (GET_CODE (XEXP (x
, 1)) == REG
)
6030 register rtx temp
= gen_reg_rtx (Pmode
);
6031 register rtx val
= force_operand (XEXP (x
, 0), temp
);
6033 emit_move_insn (temp
, val
);
6043 /* Print an integer constant expression in assembler syntax. Addition
6044 and subtraction are the only arithmetic that may appear in these
6045 expressions. FILE is the stdio stream to write to, X is the rtx, and
6046 CODE is the operand print code from the output string. */
6049 output_pic_addr_const (file
, x
, code
)
6056 switch (GET_CODE (x
))
6066 assemble_name (file
, XSTR (x
, 0));
6067 if (!TARGET_MACHO
&& code
== 'P' && ! SYMBOL_REF_FLAG (x
))
6068 fputs ("@PLT", file
);
6075 ASM_GENERATE_INTERNAL_LABEL (buf
, "L", CODE_LABEL_NUMBER (x
));
6076 assemble_name (asm_out_file
, buf
);
6080 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
));
6084 /* This used to output parentheses around the expression,
6085 but that does not work on the 386 (either ATT or BSD assembler). */
6086 output_pic_addr_const (file
, XEXP (x
, 0), code
);
6090 if (GET_MODE (x
) == VOIDmode
)
6092 /* We can use %d if the number is <32 bits and positive. */
6093 if (CONST_DOUBLE_HIGH (x
) || CONST_DOUBLE_LOW (x
) < 0)
6094 fprintf (file
, "0x%lx%08lx",
6095 (unsigned long) CONST_DOUBLE_HIGH (x
),
6096 (unsigned long) CONST_DOUBLE_LOW (x
));
6098 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, CONST_DOUBLE_LOW (x
));
6101 /* We can't handle floating point constants;
6102 PRINT_OPERAND must handle them. */
6103 output_operand_lossage ("floating constant misused");
6107 /* Some assemblers need integer constants to appear first. */
6108 if (GET_CODE (XEXP (x
, 0)) == CONST_INT
)
6110 output_pic_addr_const (file
, XEXP (x
, 0), code
);
6112 output_pic_addr_const (file
, XEXP (x
, 1), code
);
6114 else if (GET_CODE (XEXP (x
, 1)) == CONST_INT
)
6116 output_pic_addr_const (file
, XEXP (x
, 1), code
);
6118 output_pic_addr_const (file
, XEXP (x
, 0), code
);
6126 putc (ASSEMBLER_DIALECT
== ASM_INTEL
? '(' : '[', file
);
6127 output_pic_addr_const (file
, XEXP (x
, 0), code
);
6129 output_pic_addr_const (file
, XEXP (x
, 1), code
);
6131 putc (ASSEMBLER_DIALECT
== ASM_INTEL
? ')' : ']', file
);
6135 if (XVECLEN (x
, 0) != 1)
6137 output_pic_addr_const (file
, XVECEXP (x
, 0, 0), code
);
6138 switch (XINT (x
, 1))
6141 fputs ("@GOT", file
);
6144 fputs ("@GOTOFF", file
);
6146 case UNSPEC_GOTPCREL
:
6147 fputs ("@GOTPCREL(%rip)", file
);
6149 case UNSPEC_GOTTPOFF
:
6150 /* FIXME: This might be @TPOFF in Sun ld too. */
6151 fputs ("@GOTTPOFF", file
);
6154 fputs ("@TPOFF", file
);
6158 fputs ("@TPOFF", file
);
6160 fputs ("@NTPOFF", file
);
6163 fputs ("@DTPOFF", file
);
6165 case UNSPEC_GOTNTPOFF
:
6167 fputs ("@GOTTPOFF(%rip)", file
);
6169 fputs ("@GOTNTPOFF", file
);
6171 case UNSPEC_INDNTPOFF
:
6172 fputs ("@INDNTPOFF", file
);
6175 output_operand_lossage ("invalid UNSPEC as operand");
6181 output_operand_lossage ("invalid expression as operand");
6185 /* This is called from dwarfout.c via ASM_OUTPUT_DWARF_ADDR_CONST.
6186 We need to handle our special PIC relocations. */
6189 i386_dwarf_output_addr_const (file
, x
)
6194 fprintf (file
, "%s", TARGET_64BIT
? ASM_QUAD
: ASM_LONG
);
6198 fprintf (file
, "%s", ASM_LONG
);
6201 output_pic_addr_const (file
, x
, '\0');
6203 output_addr_const (file
, x
);
6207 /* This is called from dwarf2out.c via ASM_OUTPUT_DWARF_DTPREL.
6208 We need to emit DTP-relative relocations. */
6211 i386_output_dwarf_dtprel (file
, size
, x
)
6216 fputs (ASM_LONG
, file
);
6217 output_addr_const (file
, x
);
6218 fputs ("@DTPOFF", file
);
6224 fputs (", 0", file
);
6231 /* In the name of slightly smaller debug output, and to cater to
6232 general assembler losage, recognize PIC+GOTOFF and turn it back
6233 into a direct symbol reference. */
6236 i386_simplify_dwarf_addr (orig_x
)
6241 if (GET_CODE (x
) == MEM
)
6246 if (GET_CODE (x
) != CONST
6247 || GET_CODE (XEXP (x
, 0)) != UNSPEC
6248 || XINT (XEXP (x
, 0), 1) != UNSPEC_GOTPCREL
6249 || GET_CODE (orig_x
) != MEM
)
6251 return XVECEXP (XEXP (x
, 0), 0, 0);
6254 if (GET_CODE (x
) != PLUS
6255 || GET_CODE (XEXP (x
, 1)) != CONST
)
6258 if (GET_CODE (XEXP (x
, 0)) == REG
6259 && REGNO (XEXP (x
, 0)) == PIC_OFFSET_TABLE_REGNUM
)
6260 /* %ebx + GOT/GOTOFF */
6262 else if (GET_CODE (XEXP (x
, 0)) == PLUS
)
6264 /* %ebx + %reg * scale + GOT/GOTOFF */
6266 if (GET_CODE (XEXP (y
, 0)) == REG
6267 && REGNO (XEXP (y
, 0)) == PIC_OFFSET_TABLE_REGNUM
)
6269 else if (GET_CODE (XEXP (y
, 1)) == REG
6270 && REGNO (XEXP (y
, 1)) == PIC_OFFSET_TABLE_REGNUM
)
6274 if (GET_CODE (y
) != REG
6275 && GET_CODE (y
) != MULT
6276 && GET_CODE (y
) != ASHIFT
)
6282 x
= XEXP (XEXP (x
, 1), 0);
6283 if (GET_CODE (x
) == UNSPEC
6284 && ((XINT (x
, 1) == UNSPEC_GOT
&& GET_CODE (orig_x
) == MEM
)
6285 || (XINT (x
, 1) == UNSPEC_GOTOFF
&& GET_CODE (orig_x
) != MEM
)))
6288 return gen_rtx_PLUS (Pmode
, y
, XVECEXP (x
, 0, 0));
6289 return XVECEXP (x
, 0, 0);
6292 if (GET_CODE (x
) == PLUS
6293 && GET_CODE (XEXP (x
, 0)) == UNSPEC
6294 && GET_CODE (XEXP (x
, 1)) == CONST_INT
6295 && ((XINT (XEXP (x
, 0), 1) == UNSPEC_GOT
&& GET_CODE (orig_x
) == MEM
)
6296 || (XINT (XEXP (x
, 0), 1) == UNSPEC_GOTOFF
6297 && GET_CODE (orig_x
) != MEM
)))
6299 x
= gen_rtx_PLUS (VOIDmode
, XVECEXP (XEXP (x
, 0), 0, 0), XEXP (x
, 1));
6301 return gen_rtx_PLUS (Pmode
, y
, x
);
6309 put_condition_code (code
, mode
, reverse
, fp
, file
)
6311 enum machine_mode mode
;
6317 if (mode
== CCFPmode
|| mode
== CCFPUmode
)
6319 enum rtx_code second_code
, bypass_code
;
6320 ix86_fp_comparison_codes (code
, &bypass_code
, &code
, &second_code
);
6321 if (bypass_code
!= NIL
|| second_code
!= NIL
)
6323 code
= ix86_fp_compare_code_to_integer (code
);
6327 code
= reverse_condition (code
);
6338 if (mode
!= CCmode
&& mode
!= CCNOmode
&& mode
!= CCGCmode
)
6343 /* ??? Use "nbe" instead of "a" for fcmov losage on some assemblers.
6344 Those same assemblers have the same but opposite losage on cmov. */
6347 suffix
= fp
? "nbe" : "a";
6350 if (mode
== CCNOmode
|| mode
== CCGOCmode
)
6352 else if (mode
== CCmode
|| mode
== CCGCmode
)
6363 if (mode
== CCNOmode
|| mode
== CCGOCmode
)
6365 else if (mode
== CCmode
|| mode
== CCGCmode
)
6374 suffix
= fp
? "nb" : "ae";
6377 if (mode
!= CCmode
&& mode
!= CCGCmode
&& mode
!= CCNOmode
)
6387 suffix
= fp
? "u" : "p";
6390 suffix
= fp
? "nu" : "np";
6395 fputs (suffix
, file
);
6399 print_reg (x
, code
, file
)
6404 if (REGNO (x
) == ARG_POINTER_REGNUM
6405 || REGNO (x
) == FRAME_POINTER_REGNUM
6406 || REGNO (x
) == FLAGS_REG
6407 || REGNO (x
) == FPSR_REG
)
6410 if (ASSEMBLER_DIALECT
== ASM_ATT
|| USER_LABEL_PREFIX
[0] == 0)
6413 if (code
== 'w' || MMX_REG_P (x
))
6415 else if (code
== 'b')
6417 else if (code
== 'k')
6419 else if (code
== 'q')
6421 else if (code
== 'y')
6423 else if (code
== 'h')
6426 code
= GET_MODE_SIZE (GET_MODE (x
));
6428 /* Irritatingly, AMD extended registers use different naming convention
6429 from the normal registers. */
6430 if (REX_INT_REG_P (x
))
6437 error ("extended registers have no high halves");
6440 fprintf (file
, "r%ib", REGNO (x
) - FIRST_REX_INT_REG
+ 8);
6443 fprintf (file
, "r%iw", REGNO (x
) - FIRST_REX_INT_REG
+ 8);
6446 fprintf (file
, "r%id", REGNO (x
) - FIRST_REX_INT_REG
+ 8);
6449 fprintf (file
, "r%i", REGNO (x
) - FIRST_REX_INT_REG
+ 8);
6452 error ("unsupported operand size for extended register");
6460 if (STACK_TOP_P (x
))
6462 fputs ("st(0)", file
);
6469 if (! ANY_FP_REG_P (x
))
6470 putc (code
== 8 && TARGET_64BIT
? 'r' : 'e', file
);
6474 fputs (hi_reg_name
[REGNO (x
)], file
);
6477 fputs (qi_reg_name
[REGNO (x
)], file
);
6480 fputs (qi_high_reg_name
[REGNO (x
)], file
);
6487 /* Locate some local-dynamic symbol still in use by this function
6488 so that we can print its name in some tls_local_dynamic_base
6492 get_some_local_dynamic_name ()
6496 if (cfun
->machine
->some_ld_name
)
6497 return cfun
->machine
->some_ld_name
;
6499 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
6501 && for_each_rtx (&PATTERN (insn
), get_some_local_dynamic_name_1
, 0))
6502 return cfun
->machine
->some_ld_name
;
6508 get_some_local_dynamic_name_1 (px
, data
)
6510 void *data ATTRIBUTE_UNUSED
;
6514 if (GET_CODE (x
) == SYMBOL_REF
6515 && local_dynamic_symbolic_operand (x
, Pmode
))
6517 cfun
->machine
->some_ld_name
= XSTR (x
, 0);
6525 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
6526 C -- print opcode suffix for set/cmov insn.
6527 c -- like C, but print reversed condition
6528 F,f -- likewise, but for floating-point.
6529 O -- if CMOV_SUN_AS_SYNTAX, expand to "w.", "l." or "q.", otherwise
6531 R -- print the prefix for register names.
6532 z -- print the opcode suffix for the size of the current operand.
6533 * -- print a star (in certain assembler syntax)
6534 A -- print an absolute memory reference.
6535 w -- print the operand as if it's a "word" (HImode) even if it isn't.
6536 s -- print a shift double count, followed by the assemblers argument
6538 b -- print the QImode name of the register for the indicated operand.
6539 %b0 would print %al if operands[0] is reg 0.
6540 w -- likewise, print the HImode name of the register.
6541 k -- likewise, print the SImode name of the register.
6542 q -- likewise, print the DImode name of the register.
6543 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
6544 y -- print "st(0)" instead of "st" as a register.
6545 D -- print condition for SSE cmp instruction.
6546 P -- if PIC, print an @PLT suffix.
6547 X -- don't print any sort of PIC '@' suffix for a symbol.
6548 & -- print some in-use local-dynamic symbol name.
6552 print_operand (file
, x
, code
)
6562 if (ASSEMBLER_DIALECT
== ASM_ATT
)
6567 assemble_name (file
, get_some_local_dynamic_name ());
6571 if (ASSEMBLER_DIALECT
== ASM_ATT
)
6573 else if (ASSEMBLER_DIALECT
== ASM_INTEL
)
6575 /* Intel syntax. For absolute addresses, registers should not
6576 be surrounded by braces. */
6577 if (GET_CODE (x
) != REG
)
6580 PRINT_OPERAND (file
, x
, 0);
6588 PRINT_OPERAND (file
, x
, 0);
6593 if (ASSEMBLER_DIALECT
== ASM_ATT
)
6598 if (ASSEMBLER_DIALECT
== ASM_ATT
)
6603 if (ASSEMBLER_DIALECT
== ASM_ATT
)
6608 if (ASSEMBLER_DIALECT
== ASM_ATT
)
6613 if (ASSEMBLER_DIALECT
== ASM_ATT
)
6618 if (ASSEMBLER_DIALECT
== ASM_ATT
)
6623 /* 387 opcodes don't get size suffixes if the operands are
6625 if (STACK_REG_P (x
))
6628 /* Likewise if using Intel opcodes. */
6629 if (ASSEMBLER_DIALECT
== ASM_INTEL
)
6632 /* This is the size of op from size of operand. */
6633 switch (GET_MODE_SIZE (GET_MODE (x
)))
6636 #ifdef HAVE_GAS_FILDS_FISTS
6642 if (GET_MODE (x
) == SFmode
)
6657 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_INT
)
6659 #ifdef GAS_MNEMONICS
6685 if (GET_CODE (x
) == CONST_INT
|| ! SHIFT_DOUBLE_OMITS_COUNT
)
6687 PRINT_OPERAND (file
, x
, 0);
6693 /* Little bit of braindamage here. The SSE compare instructions
6694 does use completely different names for the comparisons that the
6695 fp conditional moves. */
6696 switch (GET_CODE (x
))
6711 fputs ("unord", file
);
6715 fputs ("neq", file
);
6719 fputs ("nlt", file
);
6723 fputs ("nle", file
);
6726 fputs ("ord", file
);
6734 #ifdef CMOV_SUN_AS_SYNTAX
6735 if (ASSEMBLER_DIALECT
== ASM_ATT
)
6737 switch (GET_MODE (x
))
6739 case HImode
: putc ('w', file
); break;
6741 case SFmode
: putc ('l', file
); break;
6743 case DFmode
: putc ('q', file
); break;
6751 put_condition_code (GET_CODE (x
), GET_MODE (XEXP (x
, 0)), 0, 0, file
);
6754 #ifdef CMOV_SUN_AS_SYNTAX
6755 if (ASSEMBLER_DIALECT
== ASM_ATT
)
6758 put_condition_code (GET_CODE (x
), GET_MODE (XEXP (x
, 0)), 0, 1, file
);
6761 /* Like above, but reverse condition */
6763 /* Check to see if argument to %c is really a constant
6764 and not a condition code which needs to be reversed. */
6765 if (GET_RTX_CLASS (GET_CODE (x
)) != '<')
6767 output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'");
6770 put_condition_code (GET_CODE (x
), GET_MODE (XEXP (x
, 0)), 1, 0, file
);
6773 #ifdef CMOV_SUN_AS_SYNTAX
6774 if (ASSEMBLER_DIALECT
== ASM_ATT
)
6777 put_condition_code (GET_CODE (x
), GET_MODE (XEXP (x
, 0)), 1, 1, file
);
6783 if (!optimize
|| optimize_size
|| !TARGET_BRANCH_PREDICTION_HINTS
)
6786 x
= find_reg_note (current_output_insn
, REG_BR_PROB
, 0);
6789 int pred_val
= INTVAL (XEXP (x
, 0));
6791 if (pred_val
< REG_BR_PROB_BASE
* 45 / 100
6792 || pred_val
> REG_BR_PROB_BASE
* 55 / 100)
6794 int taken
= pred_val
> REG_BR_PROB_BASE
/ 2;
6795 int cputaken
= final_forward_branch_p (current_output_insn
) == 0;
6797 /* Emit hints only in the case default branch prediction
6798 heruistics would fail. */
6799 if (taken
!= cputaken
)
6801 /* We use 3e (DS) prefix for taken branches and
6802 2e (CS) prefix for not taken branches. */
6804 fputs ("ds ; ", file
);
6806 fputs ("cs ; ", file
);
6813 output_operand_lossage ("invalid operand code `%c'", code
);
6817 if (GET_CODE (x
) == REG
)
6819 PRINT_REG (x
, code
, file
);
6822 else if (GET_CODE (x
) == MEM
)
6824 /* No `byte ptr' prefix for call instructions. */
6825 if (ASSEMBLER_DIALECT
== ASM_INTEL
&& code
!= 'X' && code
!= 'P')
6828 switch (GET_MODE_SIZE (GET_MODE (x
)))
6830 case 1: size
= "BYTE"; break;
6831 case 2: size
= "WORD"; break;
6832 case 4: size
= "DWORD"; break;
6833 case 8: size
= "QWORD"; break;
6834 case 12: size
= "XWORD"; break;
6835 case 16: size
= "XMMWORD"; break;
6840 /* Check for explicit size override (codes 'b', 'w' and 'k') */
6843 else if (code
== 'w')
6845 else if (code
== 'k')
6849 fputs (" PTR ", file
);
6853 if (flag_pic
&& CONSTANT_ADDRESS_P (x
))
6854 output_pic_addr_const (file
, x
, code
);
6855 /* Avoid (%rip) for call operands. */
6856 else if (CONSTANT_ADDRESS_P (x
) && code
== 'P'
6857 && GET_CODE (x
) != CONST_INT
)
6858 output_addr_const (file
, x
);
6859 else if (this_is_asm_operands
&& ! address_operand (x
, VOIDmode
))
6860 output_operand_lossage ("invalid constraints for operand");
6865 else if (GET_CODE (x
) == CONST_DOUBLE
&& GET_MODE (x
) == SFmode
)
6870 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
6871 REAL_VALUE_TO_TARGET_SINGLE (r
, l
);
6873 if (ASSEMBLER_DIALECT
== ASM_ATT
)
6875 fprintf (file
, "0x%lx", l
);
6878 /* These float cases don't actually occur as immediate operands. */
6879 else if (GET_CODE (x
) == CONST_DOUBLE
&& GET_MODE (x
) == DFmode
)
6883 real_to_decimal (dstr
, CONST_DOUBLE_REAL_VALUE (x
), sizeof (dstr
), 0, 1);
6884 fprintf (file
, "%s", dstr
);
6887 else if (GET_CODE (x
) == CONST_DOUBLE
6888 && (GET_MODE (x
) == XFmode
|| GET_MODE (x
) == TFmode
))
6892 real_to_decimal (dstr
, CONST_DOUBLE_REAL_VALUE (x
), sizeof (dstr
), 0, 1);
6893 fprintf (file
, "%s", dstr
);
6900 if (GET_CODE (x
) == CONST_INT
|| GET_CODE (x
) == CONST_DOUBLE
)
6902 if (ASSEMBLER_DIALECT
== ASM_ATT
)
6905 else if (GET_CODE (x
) == CONST
|| GET_CODE (x
) == SYMBOL_REF
6906 || GET_CODE (x
) == LABEL_REF
)
6908 if (ASSEMBLER_DIALECT
== ASM_ATT
)
6911 fputs ("OFFSET FLAT:", file
);
6914 if (GET_CODE (x
) == CONST_INT
)
6915 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
));
6917 output_pic_addr_const (file
, x
, code
);
6919 output_addr_const (file
, x
);
6923 /* Print a memory operand whose address is ADDR. */
6926 print_operand_address (file
, addr
)
6930 struct ix86_address parts
;
6931 rtx base
, index
, disp
;
6934 if (GET_CODE (addr
) == UNSPEC
&& XINT (addr
, 1) == UNSPEC_TP
)
6936 if (ASSEMBLER_DIALECT
== ASM_INTEL
)
6937 fputs ("DWORD PTR ", file
);
6938 if (ASSEMBLER_DIALECT
== ASM_ATT
|| USER_LABEL_PREFIX
[0] == 0)
6941 fputs ("fs:0", file
);
6943 fputs ("gs:0", file
);
6947 if (! ix86_decompose_address (addr
, &parts
))
6951 index
= parts
.index
;
6953 scale
= parts
.scale
;
6955 if (!base
&& !index
)
6957 /* Displacement only requires special attention. */
6959 if (GET_CODE (disp
) == CONST_INT
)
6961 if (ASSEMBLER_DIALECT
== ASM_INTEL
)
6963 if (USER_LABEL_PREFIX
[0] == 0)
6965 fputs ("ds:", file
);
6967 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (addr
));
6970 output_pic_addr_const (file
, addr
, 0);
6972 output_addr_const (file
, addr
);
6974 /* Use one byte shorter RIP relative addressing for 64bit mode. */
6976 && ((GET_CODE (addr
) == SYMBOL_REF
6977 && ! tls_symbolic_operand (addr
, GET_MODE (addr
)))
6978 || GET_CODE (addr
) == LABEL_REF
6979 || (GET_CODE (addr
) == CONST
6980 && GET_CODE (XEXP (addr
, 0)) == PLUS
6981 && (GET_CODE (XEXP (XEXP (addr
, 0), 0)) == SYMBOL_REF
6982 || GET_CODE (XEXP (XEXP (addr
, 0), 0)) == LABEL_REF
)
6983 && GET_CODE (XEXP (XEXP (addr
, 0), 1)) == CONST_INT
)))
6984 fputs ("(%rip)", file
);
6988 if (ASSEMBLER_DIALECT
== ASM_ATT
)
6993 output_pic_addr_const (file
, disp
, 0);
6994 else if (GET_CODE (disp
) == LABEL_REF
)
6995 output_asm_label (disp
);
6997 output_addr_const (file
, disp
);
7002 PRINT_REG (base
, 0, file
);
7006 PRINT_REG (index
, 0, file
);
7008 fprintf (file
, ",%d", scale
);
7014 rtx offset
= NULL_RTX
;
7018 /* Pull out the offset of a symbol; print any symbol itself. */
7019 if (GET_CODE (disp
) == CONST
7020 && GET_CODE (XEXP (disp
, 0)) == PLUS
7021 && GET_CODE (XEXP (XEXP (disp
, 0), 1)) == CONST_INT
)
7023 offset
= XEXP (XEXP (disp
, 0), 1);
7024 disp
= gen_rtx_CONST (VOIDmode
,
7025 XEXP (XEXP (disp
, 0), 0));
7029 output_pic_addr_const (file
, disp
, 0);
7030 else if (GET_CODE (disp
) == LABEL_REF
)
7031 output_asm_label (disp
);
7032 else if (GET_CODE (disp
) == CONST_INT
)
7035 output_addr_const (file
, disp
);
7041 PRINT_REG (base
, 0, file
);
7044 if (INTVAL (offset
) >= 0)
7046 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (offset
));
7050 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (offset
));
7057 PRINT_REG (index
, 0, file
);
7059 fprintf (file
, "*%d", scale
);
7067 output_addr_const_extra (file
, x
)
7073 if (GET_CODE (x
) != UNSPEC
)
7076 op
= XVECEXP (x
, 0, 0);
7077 switch (XINT (x
, 1))
7079 case UNSPEC_GOTTPOFF
:
7080 output_addr_const (file
, op
);
7081 /* FIXME: This might be @TPOFF in Sun ld. */
7082 fputs ("@GOTTPOFF", file
);
7085 output_addr_const (file
, op
);
7086 fputs ("@TPOFF", file
);
7089 output_addr_const (file
, op
);
7091 fputs ("@TPOFF", file
);
7093 fputs ("@NTPOFF", file
);
7096 output_addr_const (file
, op
);
7097 fputs ("@DTPOFF", file
);
7099 case UNSPEC_GOTNTPOFF
:
7100 output_addr_const (file
, op
);
7102 fputs ("@GOTTPOFF(%rip)", file
);
7104 fputs ("@GOTNTPOFF", file
);
7106 case UNSPEC_INDNTPOFF
:
7107 output_addr_const (file
, op
);
7108 fputs ("@INDNTPOFF", file
);
7118 /* Split one or more DImode RTL references into pairs of SImode
7119 references. The RTL can be REG, offsettable MEM, integer constant, or
7120 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
7121 split and "num" is its length. lo_half and hi_half are output arrays
7122 that parallel "operands". */
7125 split_di (operands
, num
, lo_half
, hi_half
)
7128 rtx lo_half
[], hi_half
[];
7132 rtx op
= operands
[num
];
7134 /* simplify_subreg refuse to split volatile memory addresses,
7135 but we still have to handle it. */
7136 if (GET_CODE (op
) == MEM
)
7138 lo_half
[num
] = adjust_address (op
, SImode
, 0);
7139 hi_half
[num
] = adjust_address (op
, SImode
, 4);
7143 lo_half
[num
] = simplify_gen_subreg (SImode
, op
,
7144 GET_MODE (op
) == VOIDmode
7145 ? DImode
: GET_MODE (op
), 0);
7146 hi_half
[num
] = simplify_gen_subreg (SImode
, op
,
7147 GET_MODE (op
) == VOIDmode
7148 ? DImode
: GET_MODE (op
), 4);
7152 /* Split one or more TImode RTL references into pairs of SImode
7153 references. The RTL can be REG, offsettable MEM, integer constant, or
7154 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
7155 split and "num" is its length. lo_half and hi_half are output arrays
7156 that parallel "operands". */
7159 split_ti (operands
, num
, lo_half
, hi_half
)
7162 rtx lo_half
[], hi_half
[];
7166 rtx op
= operands
[num
];
7168 /* simplify_subreg refuse to split volatile memory addresses, but we
7169 still have to handle it. */
7170 if (GET_CODE (op
) == MEM
)
7172 lo_half
[num
] = adjust_address (op
, DImode
, 0);
7173 hi_half
[num
] = adjust_address (op
, DImode
, 8);
7177 lo_half
[num
] = simplify_gen_subreg (DImode
, op
, TImode
, 0);
7178 hi_half
[num
] = simplify_gen_subreg (DImode
, op
, TImode
, 8);
7183 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
7184 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
7185 is the expression of the binary operation. The output may either be
7186 emitted here, or returned to the caller, like all output_* functions.
7188 There is no guarantee that the operands are the same mode, as they
7189 might be within FLOAT or FLOAT_EXTEND expressions. */
7191 #ifndef SYSV386_COMPAT
7192 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
7193 wants to fix the assemblers because that causes incompatibility
7194 with gcc. No-one wants to fix gcc because that causes
7195 incompatibility with assemblers... You can use the option of
7196 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
7197 #define SYSV386_COMPAT 1
7201 output_387_binary_op (insn
, operands
)
7205 static char buf
[30];
7208 int is_sse
= SSE_REG_P (operands
[0]) | SSE_REG_P (operands
[1]) | SSE_REG_P (operands
[2]);
7210 #ifdef ENABLE_CHECKING
7211 /* Even if we do not want to check the inputs, this documents input
7212 constraints. Which helps in understanding the following code. */
7213 if (STACK_REG_P (operands
[0])
7214 && ((REG_P (operands
[1])
7215 && REGNO (operands
[0]) == REGNO (operands
[1])
7216 && (STACK_REG_P (operands
[2]) || GET_CODE (operands
[2]) == MEM
))
7217 || (REG_P (operands
[2])
7218 && REGNO (operands
[0]) == REGNO (operands
[2])
7219 && (STACK_REG_P (operands
[1]) || GET_CODE (operands
[1]) == MEM
)))
7220 && (STACK_TOP_P (operands
[1]) || STACK_TOP_P (operands
[2])))
7226 switch (GET_CODE (operands
[3]))
7229 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
7230 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
7238 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
7239 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
7247 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
7248 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
7256 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
7257 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
7271 if (GET_MODE (operands
[0]) == SFmode
)
7272 strcat (buf
, "ss\t{%2, %0|%0, %2}");
7274 strcat (buf
, "sd\t{%2, %0|%0, %2}");
7279 switch (GET_CODE (operands
[3]))
7283 if (REG_P (operands
[2]) && REGNO (operands
[0]) == REGNO (operands
[2]))
7285 rtx temp
= operands
[2];
7286 operands
[2] = operands
[1];
7290 /* know operands[0] == operands[1]. */
7292 if (GET_CODE (operands
[2]) == MEM
)
7298 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[2])))
7300 if (STACK_TOP_P (operands
[0]))
7301 /* How is it that we are storing to a dead operand[2]?
7302 Well, presumably operands[1] is dead too. We can't
7303 store the result to st(0) as st(0) gets popped on this
7304 instruction. Instead store to operands[2] (which I
7305 think has to be st(1)). st(1) will be popped later.
7306 gcc <= 2.8.1 didn't have this check and generated
7307 assembly code that the Unixware assembler rejected. */
7308 p
= "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
7310 p
= "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
7314 if (STACK_TOP_P (operands
[0]))
7315 p
= "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
7317 p
= "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
7322 if (GET_CODE (operands
[1]) == MEM
)
7328 if (GET_CODE (operands
[2]) == MEM
)
7334 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[2])))
7337 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
7338 derived assemblers, confusingly reverse the direction of
7339 the operation for fsub{r} and fdiv{r} when the
7340 destination register is not st(0). The Intel assembler
7341 doesn't have this brain damage. Read !SYSV386_COMPAT to
7342 figure out what the hardware really does. */
7343 if (STACK_TOP_P (operands
[0]))
7344 p
= "{p\t%0, %2|rp\t%2, %0}";
7346 p
= "{rp\t%2, %0|p\t%0, %2}";
7348 if (STACK_TOP_P (operands
[0]))
7349 /* As above for fmul/fadd, we can't store to st(0). */
7350 p
= "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
7352 p
= "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
7357 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[1])))
7360 if (STACK_TOP_P (operands
[0]))
7361 p
= "{rp\t%0, %1|p\t%1, %0}";
7363 p
= "{p\t%1, %0|rp\t%0, %1}";
7365 if (STACK_TOP_P (operands
[0]))
7366 p
= "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
7368 p
= "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
7373 if (STACK_TOP_P (operands
[0]))
7375 if (STACK_TOP_P (operands
[1]))
7376 p
= "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
7378 p
= "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
7381 else if (STACK_TOP_P (operands
[1]))
7384 p
= "{\t%1, %0|r\t%0, %1}";
7386 p
= "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
7392 p
= "{r\t%2, %0|\t%0, %2}";
7394 p
= "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
7407 /* Output code to initialize control word copies used by
7408 trunc?f?i patterns. NORMAL is set to current control word, while ROUND_DOWN
7409 is set to control word rounding downwards. */
7411 emit_i387_cw_initialization (normal
, round_down
)
7412 rtx normal
, round_down
;
7414 rtx reg
= gen_reg_rtx (HImode
);
7416 emit_insn (gen_x86_fnstcw_1 (normal
));
7417 emit_move_insn (reg
, normal
);
7418 if (!TARGET_PARTIAL_REG_STALL
&& !optimize_size
7420 emit_insn (gen_movsi_insv_1 (reg
, GEN_INT (0xc)));
7422 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0xc00)));
7423 emit_move_insn (round_down
, reg
);
7426 /* Output code for INSN to convert a float to a signed int. OPERANDS
7427 are the insn operands. The output may be [HSD]Imode and the input
7428 operand may be [SDX]Fmode. */
7431 output_fix_trunc (insn
, operands
)
7435 int stack_top_dies
= find_regno_note (insn
, REG_DEAD
, FIRST_STACK_REG
) != 0;
7436 int dimode_p
= GET_MODE (operands
[0]) == DImode
;
7438 /* Jump through a hoop or two for DImode, since the hardware has no
7439 non-popping instruction. We used to do this a different way, but
7440 that was somewhat fragile and broke with post-reload splitters. */
7441 if (dimode_p
&& !stack_top_dies
)
7442 output_asm_insn ("fld\t%y1", operands
);
7444 if (!STACK_TOP_P (operands
[1]))
7447 if (GET_CODE (operands
[0]) != MEM
)
7450 output_asm_insn ("fldcw\t%3", operands
);
7451 if (stack_top_dies
|| dimode_p
)
7452 output_asm_insn ("fistp%z0\t%0", operands
);
7454 output_asm_insn ("fist%z0\t%0", operands
);
7455 output_asm_insn ("fldcw\t%2", operands
);
7460 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
7461 should be used and 2 when fnstsw should be used. UNORDERED_P is true
7462 when fucom should be used. */
7465 output_fp_compare (insn
, operands
, eflags_p
, unordered_p
)
7468 int eflags_p
, unordered_p
;
7471 rtx cmp_op0
= operands
[0];
7472 rtx cmp_op1
= operands
[1];
7473 int is_sse
= SSE_REG_P (operands
[0]) | SSE_REG_P (operands
[1]);
7478 cmp_op1
= operands
[2];
7482 if (GET_MODE (operands
[0]) == SFmode
)
7484 return "ucomiss\t{%1, %0|%0, %1}";
7486 return "comiss\t{%1, %0|%0, %y}";
7489 return "ucomisd\t{%1, %0|%0, %1}";
7491 return "comisd\t{%1, %0|%0, %y}";
7494 if (! STACK_TOP_P (cmp_op0
))
7497 stack_top_dies
= find_regno_note (insn
, REG_DEAD
, FIRST_STACK_REG
) != 0;
7499 if (STACK_REG_P (cmp_op1
)
7501 && find_regno_note (insn
, REG_DEAD
, REGNO (cmp_op1
))
7502 && REGNO (cmp_op1
) != FIRST_STACK_REG
)
7504 /* If both the top of the 387 stack dies, and the other operand
7505 is also a stack register that dies, then this must be a
7506 `fcompp' float compare */
7510 /* There is no double popping fcomi variant. Fortunately,
7511 eflags is immune from the fstp's cc clobbering. */
7513 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands
);
7515 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands
);
7523 return "fucompp\n\tfnstsw\t%0";
7525 return "fcompp\n\tfnstsw\t%0";
7538 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
7540 static const char * const alt
[24] =
7552 "fcomi\t{%y1, %0|%0, %y1}",
7553 "fcomip\t{%y1, %0|%0, %y1}",
7554 "fucomi\t{%y1, %0|%0, %y1}",
7555 "fucomip\t{%y1, %0|%0, %y1}",
7562 "fcom%z2\t%y2\n\tfnstsw\t%0",
7563 "fcomp%z2\t%y2\n\tfnstsw\t%0",
7564 "fucom%z2\t%y2\n\tfnstsw\t%0",
7565 "fucomp%z2\t%y2\n\tfnstsw\t%0",
7567 "ficom%z2\t%y2\n\tfnstsw\t%0",
7568 "ficomp%z2\t%y2\n\tfnstsw\t%0",
7576 mask
= eflags_p
<< 3;
7577 mask
|= (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
) << 2;
7578 mask
|= unordered_p
<< 1;
7579 mask
|= stack_top_dies
;
7592 ix86_output_addr_vec_elt (file
, value
)
7596 const char *directive
= ASM_LONG
;
7601 directive
= ASM_QUAD
;
7607 fprintf (file
, "%s%s%d\n", directive
, LPREFIX
, value
);
7611 ix86_output_addr_diff_elt (file
, value
, rel
)
7616 fprintf (file
, "%s%s%d-%s%d\n",
7617 ASM_LONG
, LPREFIX
, value
, LPREFIX
, rel
);
7618 else if (HAVE_AS_GOTOFF_IN_DATA
)
7619 fprintf (file
, "%s%s%d@GOTOFF\n", ASM_LONG
, LPREFIX
, value
);
7621 else if (TARGET_MACHO
)
7622 fprintf (file
, "%s%s%d-%s\n", ASM_LONG
, LPREFIX
, value
,
7623 machopic_function_base_name () + 1);
7626 asm_fprintf (file
, "%s%U%s+[.-%s%d]\n",
7627 ASM_LONG
, GOT_SYMBOL_NAME
, LPREFIX
, value
);
7630 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
7634 ix86_expand_clear (dest
)
7639 /* We play register width games, which are only valid after reload. */
7640 if (!reload_completed
)
7643 /* Avoid HImode and its attendant prefix byte. */
7644 if (GET_MODE_SIZE (GET_MODE (dest
)) < 4)
7645 dest
= gen_rtx_REG (SImode
, REGNO (dest
));
7647 tmp
= gen_rtx_SET (VOIDmode
, dest
, const0_rtx
);
7649 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
7650 if (reload_completed
&& (!TARGET_USE_MOV0
|| optimize_size
))
7652 rtx clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, 17));
7653 tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, tmp
, clob
));
7659 /* X is an unchanging MEM. If it is a constant pool reference, return
7660 the constant pool rtx, else NULL. */
7663 maybe_get_pool_constant (x
)
7668 if (flag_pic
&& ! TARGET_64BIT
)
7670 if (GET_CODE (x
) != PLUS
)
7672 if (XEXP (x
, 0) != pic_offset_table_rtx
)
7675 if (GET_CODE (x
) != CONST
)
7678 if (GET_CODE (x
) != UNSPEC
)
7680 if (XINT (x
, 1) != UNSPEC_GOTOFF
)
7682 x
= XVECEXP (x
, 0, 0);
7685 if (GET_CODE (x
) == SYMBOL_REF
&& CONSTANT_POOL_ADDRESS_P (x
))
7686 return get_pool_constant (x
);
7692 ix86_expand_move (mode
, operands
)
7693 enum machine_mode mode
;
7696 int strict
= (reload_in_progress
|| reload_completed
);
7697 rtx insn
, op0
, op1
, tmp
;
7702 /* ??? We have a slight problem. We need to say that tls symbols are
7703 not legitimate constants so that reload does not helpfully reload
7704 these constants from a REG_EQUIV, which we cannot handle. (Recall
7705 that general- and local-dynamic address resolution requires a
7708 However, if we say that tls symbols are not legitimate constants,
7709 then emit_move_insn helpfully drop them into the constant pool.
7711 It is far easier to work around emit_move_insn than reload. Recognize
7712 the MEM that we would have created and extract the symbol_ref. */
7715 && GET_CODE (op1
) == MEM
7716 && RTX_UNCHANGING_P (op1
))
7718 tmp
= maybe_get_pool_constant (op1
);
7719 /* Note that we only care about symbolic constants here, which
7720 unlike CONST_INT will always have a proper mode. */
7721 if (tmp
&& GET_MODE (tmp
) == Pmode
)
7725 if (tls_symbolic_operand (op1
, Pmode
))
7727 op1
= legitimize_address (op1
, op1
, VOIDmode
);
7728 if (GET_CODE (op0
) == MEM
)
7730 tmp
= gen_reg_rtx (mode
);
7731 emit_insn (gen_rtx_SET (VOIDmode
, tmp
, op1
));
7735 else if (flag_pic
&& mode
== Pmode
&& symbolic_operand (op1
, Pmode
))
7740 rtx temp
= ((reload_in_progress
7741 || ((op0
&& GET_CODE (op0
) == REG
)
7743 ? op0
: gen_reg_rtx (Pmode
));
7744 op1
= machopic_indirect_data_reference (op1
, temp
);
7745 op1
= machopic_legitimize_pic_address (op1
, mode
,
7746 temp
== op1
? 0 : temp
);
7750 if (MACHOPIC_INDIRECT
)
7751 op1
= machopic_indirect_data_reference (op1
, 0);
7755 insn
= gen_rtx_SET (VOIDmode
, op0
, op1
);
7759 #endif /* TARGET_MACHO */
7760 if (GET_CODE (op0
) == MEM
)
7761 op1
= force_reg (Pmode
, op1
);
7765 if (GET_CODE (temp
) != REG
)
7766 temp
= gen_reg_rtx (Pmode
);
7767 temp
= legitimize_pic_address (op1
, temp
);
7775 if (GET_CODE (op0
) == MEM
7776 && (PUSH_ROUNDING (GET_MODE_SIZE (mode
)) != GET_MODE_SIZE (mode
)
7777 || !push_operand (op0
, mode
))
7778 && GET_CODE (op1
) == MEM
)
7779 op1
= force_reg (mode
, op1
);
7781 if (push_operand (op0
, mode
)
7782 && ! general_no_elim_operand (op1
, mode
))
7783 op1
= copy_to_mode_reg (mode
, op1
);
7785 /* Force large constants in 64bit compilation into register
7786 to get them CSEed. */
7787 if (TARGET_64BIT
&& mode
== DImode
7788 && immediate_operand (op1
, mode
)
7789 && !x86_64_zero_extended_value (op1
)
7790 && !register_operand (op0
, mode
)
7791 && optimize
&& !reload_completed
&& !reload_in_progress
)
7792 op1
= copy_to_mode_reg (mode
, op1
);
7794 if (FLOAT_MODE_P (mode
))
7796 /* If we are loading a floating point constant to a register,
7797 force the value to memory now, since we'll get better code
7798 out the back end. */
7802 else if (GET_CODE (op1
) == CONST_DOUBLE
7803 && register_operand (op0
, mode
))
7804 op1
= validize_mem (force_const_mem (mode
, op1
));
7808 insn
= gen_rtx_SET (VOIDmode
, op0
, op1
);
7814 ix86_expand_vector_move (mode
, operands
)
7815 enum machine_mode mode
;
7818 /* Force constants other than zero into memory. We do not know how
7819 the instructions used to build constants modify the upper 64 bits
7820 of the register, once we have that information we may be able
7821 to handle some of them more efficiently. */
7822 if ((reload_in_progress
| reload_completed
) == 0
7823 && register_operand (operands
[0], mode
)
7824 && CONSTANT_P (operands
[1]))
7825 operands
[1] = force_const_mem (mode
, operands
[1]);
7827 /* Make operand1 a register if it isn't already. */
7829 && !register_operand (operands
[0], mode
)
7830 && !register_operand (operands
[1], mode
))
7832 rtx temp
= force_reg (GET_MODE (operands
[1]), operands
[1]);
7833 emit_move_insn (operands
[0], temp
);
7837 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0], operands
[1]));
7840 /* Attempt to expand a binary operator. Make the expansion closer to the
7841 actual machine, then just general_operand, which will allow 3 separate
7842 memory references (one output, two input) in a single insn. */
7845 ix86_expand_binary_operator (code
, mode
, operands
)
7847 enum machine_mode mode
;
7850 int matching_memory
;
7851 rtx src1
, src2
, dst
, op
, clob
;
7857 /* Recognize <var1> = <value> <op> <var1> for commutative operators */
7858 if (GET_RTX_CLASS (code
) == 'c'
7859 && (rtx_equal_p (dst
, src2
)
7860 || immediate_operand (src1
, mode
)))
7867 /* If the destination is memory, and we do not have matching source
7868 operands, do things in registers. */
7869 matching_memory
= 0;
7870 if (GET_CODE (dst
) == MEM
)
7872 if (rtx_equal_p (dst
, src1
))
7873 matching_memory
= 1;
7874 else if (GET_RTX_CLASS (code
) == 'c'
7875 && rtx_equal_p (dst
, src2
))
7876 matching_memory
= 2;
7878 dst
= gen_reg_rtx (mode
);
7881 /* Both source operands cannot be in memory. */
7882 if (GET_CODE (src1
) == MEM
&& GET_CODE (src2
) == MEM
)
7884 if (matching_memory
!= 2)
7885 src2
= force_reg (mode
, src2
);
7887 src1
= force_reg (mode
, src1
);
7890 /* If the operation is not commutable, source 1 cannot be a constant
7891 or non-matching memory. */
7892 if ((CONSTANT_P (src1
)
7893 || (!matching_memory
&& GET_CODE (src1
) == MEM
))
7894 && GET_RTX_CLASS (code
) != 'c')
7895 src1
= force_reg (mode
, src1
);
7897 /* If optimizing, copy to regs to improve CSE */
7898 if (optimize
&& ! no_new_pseudos
)
7900 if (GET_CODE (dst
) == MEM
)
7901 dst
= gen_reg_rtx (mode
);
7902 if (GET_CODE (src1
) == MEM
)
7903 src1
= force_reg (mode
, src1
);
7904 if (GET_CODE (src2
) == MEM
)
7905 src2
= force_reg (mode
, src2
);
7908 /* Emit the instruction. */
7910 op
= gen_rtx_SET (VOIDmode
, dst
, gen_rtx_fmt_ee (code
, mode
, src1
, src2
));
7911 if (reload_in_progress
)
7913 /* Reload doesn't know about the flags register, and doesn't know that
7914 it doesn't want to clobber it. We can only do this with PLUS. */
7921 clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
7922 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, op
, clob
)));
7925 /* Fix up the destination if needed. */
7926 if (dst
!= operands
[0])
7927 emit_move_insn (operands
[0], dst
);
7930 /* Return TRUE or FALSE depending on whether the binary operator meets the
7931 appropriate constraints. */
7934 ix86_binary_operator_ok (code
, mode
, operands
)
7936 enum machine_mode mode ATTRIBUTE_UNUSED
;
7939 /* Both source operands cannot be in memory. */
7940 if (GET_CODE (operands
[1]) == MEM
&& GET_CODE (operands
[2]) == MEM
)
7942 /* If the operation is not commutable, source 1 cannot be a constant. */
7943 if (CONSTANT_P (operands
[1]) && GET_RTX_CLASS (code
) != 'c')
7945 /* If the destination is memory, we must have a matching source operand. */
7946 if (GET_CODE (operands
[0]) == MEM
7947 && ! (rtx_equal_p (operands
[0], operands
[1])
7948 || (GET_RTX_CLASS (code
) == 'c'
7949 && rtx_equal_p (operands
[0], operands
[2]))))
7951 /* If the operation is not commutable and the source 1 is memory, we must
7952 have a matching destination. */
7953 if (GET_CODE (operands
[1]) == MEM
7954 && GET_RTX_CLASS (code
) != 'c'
7955 && ! rtx_equal_p (operands
[0], operands
[1]))
7960 /* Attempt to expand a unary operator. Make the expansion closer to the
7961 actual machine, then just general_operand, which will allow 2 separate
7962 memory references (one output, one input) in a single insn. */
7965 ix86_expand_unary_operator (code
, mode
, operands
)
7967 enum machine_mode mode
;
7970 int matching_memory
;
7971 rtx src
, dst
, op
, clob
;
7976 /* If the destination is memory, and we do not have matching source
7977 operands, do things in registers. */
7978 matching_memory
= 0;
7979 if (GET_CODE (dst
) == MEM
)
7981 if (rtx_equal_p (dst
, src
))
7982 matching_memory
= 1;
7984 dst
= gen_reg_rtx (mode
);
7987 /* When source operand is memory, destination must match. */
7988 if (!matching_memory
&& GET_CODE (src
) == MEM
)
7989 src
= force_reg (mode
, src
);
7991 /* If optimizing, copy to regs to improve CSE */
7992 if (optimize
&& ! no_new_pseudos
)
7994 if (GET_CODE (dst
) == MEM
)
7995 dst
= gen_reg_rtx (mode
);
7996 if (GET_CODE (src
) == MEM
)
7997 src
= force_reg (mode
, src
);
8000 /* Emit the instruction. */
8002 op
= gen_rtx_SET (VOIDmode
, dst
, gen_rtx_fmt_e (code
, mode
, src
));
8003 if (reload_in_progress
|| code
== NOT
)
8005 /* Reload doesn't know about the flags register, and doesn't know that
8006 it doesn't want to clobber it. */
8013 clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
8014 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, op
, clob
)));
8017 /* Fix up the destination if needed. */
8018 if (dst
!= operands
[0])
8019 emit_move_insn (operands
[0], dst
);
8022 /* Return TRUE or FALSE depending on whether the unary operator meets the
8023 appropriate constraints. */
8026 ix86_unary_operator_ok (code
, mode
, operands
)
8027 enum rtx_code code ATTRIBUTE_UNUSED
;
8028 enum machine_mode mode ATTRIBUTE_UNUSED
;
8029 rtx operands
[2] ATTRIBUTE_UNUSED
;
8031 /* If one of operands is memory, source and destination must match. */
8032 if ((GET_CODE (operands
[0]) == MEM
8033 || GET_CODE (operands
[1]) == MEM
)
8034 && ! rtx_equal_p (operands
[0], operands
[1]))
8039 /* Return TRUE or FALSE depending on whether the first SET in INSN
8040 has source and destination with matching CC modes, and that the
8041 CC mode is at least as constrained as REQ_MODE. */
8044 ix86_match_ccmode (insn
, req_mode
)
8046 enum machine_mode req_mode
;
8049 enum machine_mode set_mode
;
8051 set
= PATTERN (insn
);
8052 if (GET_CODE (set
) == PARALLEL
)
8053 set
= XVECEXP (set
, 0, 0);
8054 if (GET_CODE (set
) != SET
)
8056 if (GET_CODE (SET_SRC (set
)) != COMPARE
)
8059 set_mode
= GET_MODE (SET_DEST (set
));
8063 if (req_mode
!= CCNOmode
8064 && (req_mode
!= CCmode
8065 || XEXP (SET_SRC (set
), 1) != const0_rtx
))
8069 if (req_mode
== CCGCmode
)
8073 if (req_mode
== CCGOCmode
|| req_mode
== CCNOmode
)
8077 if (req_mode
== CCZmode
)
8087 return (GET_MODE (SET_SRC (set
)) == set_mode
);
8090 /* Generate insn patterns to do an integer compare of OPERANDS. */
8093 ix86_expand_int_compare (code
, op0
, op1
)
8097 enum machine_mode cmpmode
;
8100 cmpmode
= SELECT_CC_MODE (code
, op0
, op1
);
8101 flags
= gen_rtx_REG (cmpmode
, FLAGS_REG
);
8103 /* This is very simple, but making the interface the same as in the
8104 FP case makes the rest of the code easier. */
8105 tmp
= gen_rtx_COMPARE (cmpmode
, op0
, op1
);
8106 emit_insn (gen_rtx_SET (VOIDmode
, flags
, tmp
));
8108 /* Return the test that should be put into the flags user, i.e.
8109 the bcc, scc, or cmov instruction. */
8110 return gen_rtx_fmt_ee (code
, VOIDmode
, flags
, const0_rtx
);
8113 /* Figure out whether to use ordered or unordered fp comparisons.
8114 Return the appropriate mode to use. */
8117 ix86_fp_compare_mode (code
)
8118 enum rtx_code code ATTRIBUTE_UNUSED
;
8120 /* ??? In order to make all comparisons reversible, we do all comparisons
8121 non-trapping when compiling for IEEE. Once gcc is able to distinguish
8122 all forms trapping and nontrapping comparisons, we can make inequality
8123 comparisons trapping again, since it results in better code when using
8124 FCOM based compares. */
8125 return TARGET_IEEE_FP
? CCFPUmode
: CCFPmode
;
8129 ix86_cc_mode (code
, op0
, op1
)
8133 if (GET_MODE_CLASS (GET_MODE (op0
)) == MODE_FLOAT
)
8134 return ix86_fp_compare_mode (code
);
8137 /* Only zero flag is needed. */
8139 case NE
: /* ZF!=0 */
8141 /* Codes needing carry flag. */
8142 case GEU
: /* CF=0 */
8143 case GTU
: /* CF=0 & ZF=0 */
8144 case LTU
: /* CF=1 */
8145 case LEU
: /* CF=1 | ZF=1 */
8147 /* Codes possibly doable only with sign flag when
8148 comparing against zero. */
8149 case GE
: /* SF=OF or SF=0 */
8150 case LT
: /* SF<>OF or SF=1 */
8151 if (op1
== const0_rtx
)
8154 /* For other cases Carry flag is not required. */
8156 /* Codes doable only with sign flag when comparing
8157 against zero, but we miss jump instruction for it
8158 so we need to use relational tests agains overflow
8159 that thus needs to be zero. */
8160 case GT
: /* ZF=0 & SF=OF */
8161 case LE
: /* ZF=1 | SF<>OF */
8162 if (op1
== const0_rtx
)
8166 /* strcmp pattern do (use flags) and combine may ask us for proper
8175 /* Return true if we should use an FCOMI instruction for this fp comparison. */
8178 ix86_use_fcomi_compare (code
)
8179 enum rtx_code code ATTRIBUTE_UNUSED
;
8181 enum rtx_code swapped_code
= swap_condition (code
);
8182 return ((ix86_fp_comparison_cost (code
) == ix86_fp_comparison_fcomi_cost (code
))
8183 || (ix86_fp_comparison_cost (swapped_code
)
8184 == ix86_fp_comparison_fcomi_cost (swapped_code
)));
8187 /* Swap, force into registers, or otherwise massage the two operands
8188 to a fp comparison. The operands are updated in place; the new
8189 comparsion code is returned. */
8191 static enum rtx_code
8192 ix86_prepare_fp_compare_args (code
, pop0
, pop1
)
8196 enum machine_mode fpcmp_mode
= ix86_fp_compare_mode (code
);
8197 rtx op0
= *pop0
, op1
= *pop1
;
8198 enum machine_mode op_mode
= GET_MODE (op0
);
8199 int is_sse
= SSE_REG_P (op0
) | SSE_REG_P (op1
);
8201 /* All of the unordered compare instructions only work on registers.
8202 The same is true of the XFmode compare instructions. The same is
8203 true of the fcomi compare instructions. */
8206 && (fpcmp_mode
== CCFPUmode
8207 || op_mode
== XFmode
8208 || op_mode
== TFmode
8209 || ix86_use_fcomi_compare (code
)))
8211 op0
= force_reg (op_mode
, op0
);
8212 op1
= force_reg (op_mode
, op1
);
8216 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
8217 things around if they appear profitable, otherwise force op0
8220 if (standard_80387_constant_p (op0
) == 0
8221 || (GET_CODE (op0
) == MEM
8222 && ! (standard_80387_constant_p (op1
) == 0
8223 || GET_CODE (op1
) == MEM
)))
8226 tmp
= op0
, op0
= op1
, op1
= tmp
;
8227 code
= swap_condition (code
);
8230 if (GET_CODE (op0
) != REG
)
8231 op0
= force_reg (op_mode
, op0
);
8233 if (CONSTANT_P (op1
))
8235 if (standard_80387_constant_p (op1
))
8236 op1
= force_reg (op_mode
, op1
);
8238 op1
= validize_mem (force_const_mem (op_mode
, op1
));
8242 /* Try to rearrange the comparison to make it cheaper. */
8243 if (ix86_fp_comparison_cost (code
)
8244 > ix86_fp_comparison_cost (swap_condition (code
))
8245 && (GET_CODE (op1
) == REG
|| !no_new_pseudos
))
8248 tmp
= op0
, op0
= op1
, op1
= tmp
;
8249 code
= swap_condition (code
);
8250 if (GET_CODE (op0
) != REG
)
8251 op0
= force_reg (op_mode
, op0
);
8259 /* Convert comparison codes we use to represent FP comparison to integer
8260 code that will result in proper branch. Return UNKNOWN if no such code
8262 static enum rtx_code
8263 ix86_fp_compare_code_to_integer (code
)
8293 /* Split comparison code CODE into comparisons we can do using branch
8294 instructions. BYPASS_CODE is comparison code for branch that will
8295 branch around FIRST_CODE and SECOND_CODE. If some of branches
8296 is not required, set value to NIL.
8297 We never require more than two branches. */
8299 ix86_fp_comparison_codes (code
, bypass_code
, first_code
, second_code
)
8300 enum rtx_code code
, *bypass_code
, *first_code
, *second_code
;
8306 /* The fcomi comparison sets flags as follows:
8316 case GT
: /* GTU - CF=0 & ZF=0 */
8317 case GE
: /* GEU - CF=0 */
8318 case ORDERED
: /* PF=0 */
8319 case UNORDERED
: /* PF=1 */
8320 case UNEQ
: /* EQ - ZF=1 */
8321 case UNLT
: /* LTU - CF=1 */
8322 case UNLE
: /* LEU - CF=1 | ZF=1 */
8323 case LTGT
: /* EQ - ZF=0 */
8325 case LT
: /* LTU - CF=1 - fails on unordered */
8327 *bypass_code
= UNORDERED
;
8329 case LE
: /* LEU - CF=1 | ZF=1 - fails on unordered */
8331 *bypass_code
= UNORDERED
;
8333 case EQ
: /* EQ - ZF=1 - fails on unordered */
8335 *bypass_code
= UNORDERED
;
8337 case NE
: /* NE - ZF=0 - fails on unordered */
8339 *second_code
= UNORDERED
;
8341 case UNGE
: /* GEU - CF=0 - fails on unordered */
8343 *second_code
= UNORDERED
;
8345 case UNGT
: /* GTU - CF=0 & ZF=0 - fails on unordered */
8347 *second_code
= UNORDERED
;
8352 if (!TARGET_IEEE_FP
)
8359 /* Return cost of comparison done fcom + arithmetics operations on AX.
8360 All following functions do use number of instructions as an cost metrics.
8361 In future this should be tweaked to compute bytes for optimize_size and
8362 take into account performance of various instructions on various CPUs. */
8364 ix86_fp_comparison_arithmetics_cost (code
)
8367 if (!TARGET_IEEE_FP
)
8369 /* The cost of code output by ix86_expand_fp_compare. */
8397 /* Return cost of comparison done using fcomi operation.
8398 See ix86_fp_comparison_arithmetics_cost for the metrics. */
8400 ix86_fp_comparison_fcomi_cost (code
)
8403 enum rtx_code bypass_code
, first_code
, second_code
;
8404 /* Return arbitarily high cost when instruction is not supported - this
8405 prevents gcc from using it. */
8408 ix86_fp_comparison_codes (code
, &bypass_code
, &first_code
, &second_code
);
8409 return (bypass_code
!= NIL
|| second_code
!= NIL
) + 2;
8412 /* Return cost of comparison done using sahf operation.
8413 See ix86_fp_comparison_arithmetics_cost for the metrics. */
8415 ix86_fp_comparison_sahf_cost (code
)
8418 enum rtx_code bypass_code
, first_code
, second_code
;
8419 /* Return arbitarily high cost when instruction is not preferred - this
8420 avoids gcc from using it. */
8421 if (!TARGET_USE_SAHF
&& !optimize_size
)
8423 ix86_fp_comparison_codes (code
, &bypass_code
, &first_code
, &second_code
);
8424 return (bypass_code
!= NIL
|| second_code
!= NIL
) + 3;
8427 /* Compute cost of the comparison done using any method.
8428 See ix86_fp_comparison_arithmetics_cost for the metrics. */
8430 ix86_fp_comparison_cost (code
)
8433 int fcomi_cost
, sahf_cost
, arithmetics_cost
= 1024;
8436 fcomi_cost
= ix86_fp_comparison_fcomi_cost (code
);
8437 sahf_cost
= ix86_fp_comparison_sahf_cost (code
);
8439 min
= arithmetics_cost
= ix86_fp_comparison_arithmetics_cost (code
);
8440 if (min
> sahf_cost
)
8442 if (min
> fcomi_cost
)
8447 /* Generate insn patterns to do a floating point compare of OPERANDS. */
8450 ix86_expand_fp_compare (code
, op0
, op1
, scratch
, second_test
, bypass_test
)
8452 rtx op0
, op1
, scratch
;
8456 enum machine_mode fpcmp_mode
, intcmp_mode
;
8458 int cost
= ix86_fp_comparison_cost (code
);
8459 enum rtx_code bypass_code
, first_code
, second_code
;
8461 fpcmp_mode
= ix86_fp_compare_mode (code
);
8462 code
= ix86_prepare_fp_compare_args (code
, &op0
, &op1
);
8465 *second_test
= NULL_RTX
;
8467 *bypass_test
= NULL_RTX
;
8469 ix86_fp_comparison_codes (code
, &bypass_code
, &first_code
, &second_code
);
8471 /* Do fcomi/sahf based test when profitable. */
8472 if ((bypass_code
== NIL
|| bypass_test
)
8473 && (second_code
== NIL
|| second_test
)
8474 && ix86_fp_comparison_arithmetics_cost (code
) > cost
)
8478 tmp
= gen_rtx_COMPARE (fpcmp_mode
, op0
, op1
);
8479 tmp
= gen_rtx_SET (VOIDmode
, gen_rtx_REG (fpcmp_mode
, FLAGS_REG
),
8485 tmp
= gen_rtx_COMPARE (fpcmp_mode
, op0
, op1
);
8486 tmp2
= gen_rtx_UNSPEC (HImode
, gen_rtvec (1, tmp
), UNSPEC_FNSTSW
);
8488 scratch
= gen_reg_rtx (HImode
);
8489 emit_insn (gen_rtx_SET (VOIDmode
, scratch
, tmp2
));
8490 emit_insn (gen_x86_sahf_1 (scratch
));
8493 /* The FP codes work out to act like unsigned. */
8494 intcmp_mode
= fpcmp_mode
;
8496 if (bypass_code
!= NIL
)
8497 *bypass_test
= gen_rtx_fmt_ee (bypass_code
, VOIDmode
,
8498 gen_rtx_REG (intcmp_mode
, FLAGS_REG
),
8500 if (second_code
!= NIL
)
8501 *second_test
= gen_rtx_fmt_ee (second_code
, VOIDmode
,
8502 gen_rtx_REG (intcmp_mode
, FLAGS_REG
),
8507 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
8508 tmp
= gen_rtx_COMPARE (fpcmp_mode
, op0
, op1
);
8509 tmp2
= gen_rtx_UNSPEC (HImode
, gen_rtvec (1, tmp
), UNSPEC_FNSTSW
);
8511 scratch
= gen_reg_rtx (HImode
);
8512 emit_insn (gen_rtx_SET (VOIDmode
, scratch
, tmp2
));
8514 /* In the unordered case, we have to check C2 for NaN's, which
8515 doesn't happen to work out to anything nice combination-wise.
8516 So do some bit twiddling on the value we've got in AH to come
8517 up with an appropriate set of condition codes. */
8519 intcmp_mode
= CCNOmode
;
8524 if (code
== GT
|| !TARGET_IEEE_FP
)
8526 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x45)));
8531 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
8532 emit_insn (gen_addqi_ext_1 (scratch
, scratch
, constm1_rtx
));
8533 emit_insn (gen_cmpqi_ext_3 (scratch
, GEN_INT (0x44)));
8534 intcmp_mode
= CCmode
;
8540 if (code
== LT
&& TARGET_IEEE_FP
)
8542 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
8543 emit_insn (gen_cmpqi_ext_3 (scratch
, GEN_INT (0x01)));
8544 intcmp_mode
= CCmode
;
8549 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x01)));
8555 if (code
== GE
|| !TARGET_IEEE_FP
)
8557 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x05)));
8562 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
8563 emit_insn (gen_xorqi_cc_ext_1 (scratch
, scratch
,
8570 if (code
== LE
&& TARGET_IEEE_FP
)
8572 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
8573 emit_insn (gen_addqi_ext_1 (scratch
, scratch
, constm1_rtx
));
8574 emit_insn (gen_cmpqi_ext_3 (scratch
, GEN_INT (0x40)));
8575 intcmp_mode
= CCmode
;
8580 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x45)));
8586 if (code
== EQ
&& TARGET_IEEE_FP
)
8588 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
8589 emit_insn (gen_cmpqi_ext_3 (scratch
, GEN_INT (0x40)));
8590 intcmp_mode
= CCmode
;
8595 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x40)));
8602 if (code
== NE
&& TARGET_IEEE_FP
)
8604 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
8605 emit_insn (gen_xorqi_cc_ext_1 (scratch
, scratch
,
8611 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x40)));
8617 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x04)));
8621 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x04)));
8630 /* Return the test that should be put into the flags user, i.e.
8631 the bcc, scc, or cmov instruction. */
8632 return gen_rtx_fmt_ee (code
, VOIDmode
,
8633 gen_rtx_REG (intcmp_mode
, FLAGS_REG
),
8638 ix86_expand_compare (code
, second_test
, bypass_test
)
8640 rtx
*second_test
, *bypass_test
;
8643 op0
= ix86_compare_op0
;
8644 op1
= ix86_compare_op1
;
8647 *second_test
= NULL_RTX
;
8649 *bypass_test
= NULL_RTX
;
8651 if (GET_MODE_CLASS (GET_MODE (op0
)) == MODE_FLOAT
)
8652 ret
= ix86_expand_fp_compare (code
, op0
, op1
, NULL_RTX
,
8653 second_test
, bypass_test
);
8655 ret
= ix86_expand_int_compare (code
, op0
, op1
);
8660 /* Return true if the CODE will result in nontrivial jump sequence. */
8662 ix86_fp_jump_nontrivial_p (code
)
8665 enum rtx_code bypass_code
, first_code
, second_code
;
8668 ix86_fp_comparison_codes (code
, &bypass_code
, &first_code
, &second_code
);
8669 return bypass_code
!= NIL
|| second_code
!= NIL
;
8673 ix86_expand_branch (code
, label
)
8679 switch (GET_MODE (ix86_compare_op0
))
8685 tmp
= ix86_expand_compare (code
, NULL
, NULL
);
8686 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
8687 gen_rtx_LABEL_REF (VOIDmode
, label
),
8689 emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
));
8699 enum rtx_code bypass_code
, first_code
, second_code
;
8701 code
= ix86_prepare_fp_compare_args (code
, &ix86_compare_op0
,
8704 ix86_fp_comparison_codes (code
, &bypass_code
, &first_code
, &second_code
);
8706 /* Check whether we will use the natural sequence with one jump. If
8707 so, we can expand jump early. Otherwise delay expansion by
8708 creating compound insn to not confuse optimizers. */
8709 if (bypass_code
== NIL
&& second_code
== NIL
8712 ix86_split_fp_branch (code
, ix86_compare_op0
, ix86_compare_op1
,
8713 gen_rtx_LABEL_REF (VOIDmode
, label
),
8718 tmp
= gen_rtx_fmt_ee (code
, VOIDmode
,
8719 ix86_compare_op0
, ix86_compare_op1
);
8720 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
8721 gen_rtx_LABEL_REF (VOIDmode
, label
),
8723 tmp
= gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
);
8725 use_fcomi
= ix86_use_fcomi_compare (code
);
8726 vec
= rtvec_alloc (3 + !use_fcomi
);
8727 RTVEC_ELT (vec
, 0) = tmp
;
8729 = gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCFPmode
, 18));
8731 = gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCFPmode
, 17));
8734 = gen_rtx_CLOBBER (VOIDmode
, gen_rtx_SCRATCH (HImode
));
8736 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode
, vec
));
8744 /* Expand DImode branch into multiple compare+branch. */
8746 rtx lo
[2], hi
[2], label2
;
8747 enum rtx_code code1
, code2
, code3
;
8749 if (CONSTANT_P (ix86_compare_op0
) && ! CONSTANT_P (ix86_compare_op1
))
8751 tmp
= ix86_compare_op0
;
8752 ix86_compare_op0
= ix86_compare_op1
;
8753 ix86_compare_op1
= tmp
;
8754 code
= swap_condition (code
);
8756 split_di (&ix86_compare_op0
, 1, lo
+0, hi
+0);
8757 split_di (&ix86_compare_op1
, 1, lo
+1, hi
+1);
8759 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
8760 avoid two branches. This costs one extra insn, so disable when
8761 optimizing for size. */
8763 if ((code
== EQ
|| code
== NE
)
8765 || hi
[1] == const0_rtx
|| lo
[1] == const0_rtx
))
8770 if (hi
[1] != const0_rtx
)
8771 xor1
= expand_binop (SImode
, xor_optab
, xor1
, hi
[1],
8772 NULL_RTX
, 0, OPTAB_WIDEN
);
8775 if (lo
[1] != const0_rtx
)
8776 xor0
= expand_binop (SImode
, xor_optab
, xor0
, lo
[1],
8777 NULL_RTX
, 0, OPTAB_WIDEN
);
8779 tmp
= expand_binop (SImode
, ior_optab
, xor1
, xor0
,
8780 NULL_RTX
, 0, OPTAB_WIDEN
);
8782 ix86_compare_op0
= tmp
;
8783 ix86_compare_op1
= const0_rtx
;
8784 ix86_expand_branch (code
, label
);
8788 /* Otherwise, if we are doing less-than or greater-or-equal-than,
8789 op1 is a constant and the low word is zero, then we can just
8790 examine the high word. */
8792 if (GET_CODE (hi
[1]) == CONST_INT
&& lo
[1] == const0_rtx
)
8795 case LT
: case LTU
: case GE
: case GEU
:
8796 ix86_compare_op0
= hi
[0];
8797 ix86_compare_op1
= hi
[1];
8798 ix86_expand_branch (code
, label
);
8804 /* Otherwise, we need two or three jumps. */
8806 label2
= gen_label_rtx ();
8809 code2
= swap_condition (code
);
8810 code3
= unsigned_condition (code
);
8814 case LT
: case GT
: case LTU
: case GTU
:
8817 case LE
: code1
= LT
; code2
= GT
; break;
8818 case GE
: code1
= GT
; code2
= LT
; break;
8819 case LEU
: code1
= LTU
; code2
= GTU
; break;
8820 case GEU
: code1
= GTU
; code2
= LTU
; break;
8822 case EQ
: code1
= NIL
; code2
= NE
; break;
8823 case NE
: code2
= NIL
; break;
8831 * if (hi(a) < hi(b)) goto true;
8832 * if (hi(a) > hi(b)) goto false;
8833 * if (lo(a) < lo(b)) goto true;
8837 ix86_compare_op0
= hi
[0];
8838 ix86_compare_op1
= hi
[1];
8841 ix86_expand_branch (code1
, label
);
8843 ix86_expand_branch (code2
, label2
);
8845 ix86_compare_op0
= lo
[0];
8846 ix86_compare_op1
= lo
[1];
8847 ix86_expand_branch (code3
, label
);
8850 emit_label (label2
);
8859 /* Split branch based on floating point condition. */
8861 ix86_split_fp_branch (code
, op1
, op2
, target1
, target2
, tmp
)
8863 rtx op1
, op2
, target1
, target2
, tmp
;
8866 rtx label
= NULL_RTX
;
8868 int bypass_probability
= -1, second_probability
= -1, probability
= -1;
8871 if (target2
!= pc_rtx
)
8874 code
= reverse_condition_maybe_unordered (code
);
8879 condition
= ix86_expand_fp_compare (code
, op1
, op2
,
8880 tmp
, &second
, &bypass
);
8882 if (split_branch_probability
>= 0)
8884 /* Distribute the probabilities across the jumps.
8885 Assume the BYPASS and SECOND to be always test
8887 probability
= split_branch_probability
;
8889 /* Value of 1 is low enough to make no need for probability
8890 to be updated. Later we may run some experiments and see
8891 if unordered values are more frequent in practice. */
8893 bypass_probability
= 1;
8895 second_probability
= 1;
8897 if (bypass
!= NULL_RTX
)
8899 label
= gen_label_rtx ();
8900 i
= emit_jump_insn (gen_rtx_SET
8902 gen_rtx_IF_THEN_ELSE (VOIDmode
,
8904 gen_rtx_LABEL_REF (VOIDmode
,
8907 if (bypass_probability
>= 0)
8909 = gen_rtx_EXPR_LIST (REG_BR_PROB
,
8910 GEN_INT (bypass_probability
),
8913 i
= emit_jump_insn (gen_rtx_SET
8915 gen_rtx_IF_THEN_ELSE (VOIDmode
,
8916 condition
, target1
, target2
)));
8917 if (probability
>= 0)
8919 = gen_rtx_EXPR_LIST (REG_BR_PROB
,
8920 GEN_INT (probability
),
8922 if (second
!= NULL_RTX
)
8924 i
= emit_jump_insn (gen_rtx_SET
8926 gen_rtx_IF_THEN_ELSE (VOIDmode
, second
, target1
,
8928 if (second_probability
>= 0)
8930 = gen_rtx_EXPR_LIST (REG_BR_PROB
,
8931 GEN_INT (second_probability
),
8934 if (label
!= NULL_RTX
)
8939 ix86_expand_setcc (code
, dest
)
8943 rtx ret
, tmp
, tmpreg
;
8944 rtx second_test
, bypass_test
;
8946 if (GET_MODE (ix86_compare_op0
) == DImode
8948 return 0; /* FAIL */
8950 if (GET_MODE (dest
) != QImode
)
8953 ret
= ix86_expand_compare (code
, &second_test
, &bypass_test
);
8954 PUT_MODE (ret
, QImode
);
8959 emit_insn (gen_rtx_SET (VOIDmode
, tmp
, ret
));
8960 if (bypass_test
|| second_test
)
8962 rtx test
= second_test
;
8964 rtx tmp2
= gen_reg_rtx (QImode
);
8971 PUT_CODE (test
, reverse_condition_maybe_unordered (GET_CODE (test
)));
8973 PUT_MODE (test
, QImode
);
8974 emit_insn (gen_rtx_SET (VOIDmode
, tmp2
, test
));
8977 emit_insn (gen_andqi3 (tmp
, tmpreg
, tmp2
));
8979 emit_insn (gen_iorqi3 (tmp
, tmpreg
, tmp2
));
8982 return 1; /* DONE */
8986 ix86_expand_int_movcc (operands
)
8989 enum rtx_code code
= GET_CODE (operands
[1]), compare_code
;
8990 rtx compare_seq
, compare_op
;
8991 rtx second_test
, bypass_test
;
8992 enum machine_mode mode
= GET_MODE (operands
[0]);
8994 /* When the compare code is not LTU or GEU, we can not use sbbl case.
8995 In case comparsion is done with immediate, we can convert it to LTU or
8996 GEU by altering the integer. */
8998 if ((code
== LEU
|| code
== GTU
)
8999 && GET_CODE (ix86_compare_op1
) == CONST_INT
9001 && INTVAL (ix86_compare_op1
) != -1
9002 /* For x86-64, the immediate field in the instruction is 32-bit
9003 signed, so we can't increment a DImode value above 0x7fffffff. */
9005 || GET_MODE (ix86_compare_op0
) != DImode
9006 || INTVAL (ix86_compare_op1
) != 0x7fffffff)
9007 && GET_CODE (operands
[2]) == CONST_INT
9008 && GET_CODE (operands
[3]) == CONST_INT
)
9014 ix86_compare_op1
= gen_int_mode (INTVAL (ix86_compare_op1
) + 1,
9015 GET_MODE (ix86_compare_op0
));
9019 compare_op
= ix86_expand_compare (code
, &second_test
, &bypass_test
);
9020 compare_seq
= get_insns ();
9023 compare_code
= GET_CODE (compare_op
);
9025 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
9026 HImode insns, we'd be swallowed in word prefix ops. */
9029 && (mode
!= DImode
|| TARGET_64BIT
)
9030 && GET_CODE (operands
[2]) == CONST_INT
9031 && GET_CODE (operands
[3]) == CONST_INT
)
9033 rtx out
= operands
[0];
9034 HOST_WIDE_INT ct
= INTVAL (operands
[2]);
9035 HOST_WIDE_INT cf
= INTVAL (operands
[3]);
9038 if ((compare_code
== LTU
|| compare_code
== GEU
)
9039 && !second_test
&& !bypass_test
)
9041 /* Detect overlap between destination and compare sources. */
9044 /* To simplify rest of code, restrict to the GEU case. */
9045 if (compare_code
== LTU
)
9050 compare_code
= reverse_condition (compare_code
);
9051 code
= reverse_condition (code
);
9055 if (reg_overlap_mentioned_p (out
, ix86_compare_op0
)
9056 || reg_overlap_mentioned_p (out
, ix86_compare_op1
))
9057 tmp
= gen_reg_rtx (mode
);
9059 emit_insn (compare_seq
);
9061 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp
));
9063 emit_insn (gen_x86_movsicc_0_m1 (tmp
));
9075 tmp
= expand_simple_binop (mode
, PLUS
,
9077 tmp
, 1, OPTAB_DIRECT
);
9088 tmp
= expand_simple_binop (mode
, IOR
,
9090 tmp
, 1, OPTAB_DIRECT
);
9092 else if (diff
== -1 && ct
)
9102 tmp
= expand_simple_unop (mode
, NOT
, tmp
, tmp
, 1);
9104 tmp
= expand_simple_binop (mode
, PLUS
,
9106 tmp
, 1, OPTAB_DIRECT
);
9114 * andl cf - ct, dest
9124 tmp
= expand_simple_unop (mode
, NOT
, tmp
, tmp
, 1);
9127 tmp
= expand_simple_binop (mode
, AND
,
9129 gen_int_mode (cf
- ct
, mode
),
9130 tmp
, 1, OPTAB_DIRECT
);
9132 tmp
= expand_simple_binop (mode
, PLUS
,
9134 tmp
, 1, OPTAB_DIRECT
);
9138 emit_move_insn (out
, tmp
);
9140 return 1; /* DONE */
9147 tmp
= ct
, ct
= cf
, cf
= tmp
;
9149 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0
)))
9151 /* We may be reversing unordered compare to normal compare, that
9152 is not valid in general (we may convert non-trapping condition
9153 to trapping one), however on i386 we currently emit all
9154 comparisons unordered. */
9155 compare_code
= reverse_condition_maybe_unordered (compare_code
);
9156 code
= reverse_condition_maybe_unordered (code
);
9160 compare_code
= reverse_condition (compare_code
);
9161 code
= reverse_condition (code
);
9166 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0
)) == MODE_INT
9167 && GET_CODE (ix86_compare_op1
) == CONST_INT
)
9169 if (ix86_compare_op1
== const0_rtx
9170 && (code
== LT
|| code
== GE
))
9171 compare_code
= code
;
9172 else if (ix86_compare_op1
== constm1_rtx
)
9176 else if (code
== GT
)
9181 /* Optimize dest = (op0 < 0) ? -1 : cf. */
9182 if (compare_code
!= NIL
9183 && GET_MODE (ix86_compare_op0
) == GET_MODE (out
)
9184 && (cf
== -1 || ct
== -1))
9186 /* If lea code below could be used, only optimize
9187 if it results in a 2 insn sequence. */
9189 if (! (diff
== 1 || diff
== 2 || diff
== 4 || diff
== 8
9190 || diff
== 3 || diff
== 5 || diff
== 9)
9191 || (compare_code
== LT
&& ct
== -1)
9192 || (compare_code
== GE
&& cf
== -1))
9195 * notl op1 (if necessary)
9203 code
= reverse_condition (code
);
9206 out
= emit_store_flag (out
, code
, ix86_compare_op0
,
9207 ix86_compare_op1
, VOIDmode
, 0, -1);
9209 out
= expand_simple_binop (mode
, IOR
,
9211 out
, 1, OPTAB_DIRECT
);
9212 if (out
!= operands
[0])
9213 emit_move_insn (operands
[0], out
);
9215 return 1; /* DONE */
9219 if ((diff
== 1 || diff
== 2 || diff
== 4 || diff
== 8
9220 || diff
== 3 || diff
== 5 || diff
== 9)
9221 && (mode
!= DImode
|| x86_64_sign_extended_value (GEN_INT (cf
), 0)))
9227 * lea cf(dest*(ct-cf)),dest
9231 * This also catches the degenerate setcc-only case.
9237 out
= emit_store_flag (out
, code
, ix86_compare_op0
,
9238 ix86_compare_op1
, VOIDmode
, 0, 1);
9241 /* On x86_64 the lea instruction operates on Pmode, so we need
9242 to get arithmetics done in proper mode to match. */
9249 tmp
= gen_rtx_MULT (mode
, out1
, GEN_INT (diff
& ~1));
9253 tmp
= gen_rtx_PLUS (mode
, tmp
, out1
);
9259 tmp
= gen_rtx_PLUS (mode
, tmp
, GEN_INT (cf
));
9263 && (GET_CODE (tmp
) != SUBREG
|| SUBREG_REG (tmp
) != out
))
9269 clob
= gen_rtx_REG (CCmode
, FLAGS_REG
);
9270 clob
= gen_rtx_CLOBBER (VOIDmode
, clob
);
9272 tmp
= gen_rtx_SET (VOIDmode
, out
, tmp
);
9273 tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, tmp
, clob
));
9277 emit_insn (gen_rtx_SET (VOIDmode
, out
, tmp
));
9279 if (out
!= operands
[0])
9280 emit_move_insn (operands
[0], copy_rtx (out
));
9282 return 1; /* DONE */
9286 * General case: Jumpful:
9287 * xorl dest,dest cmpl op1, op2
9288 * cmpl op1, op2 movl ct, dest
9290 * decl dest movl cf, dest
9291 * andl (cf-ct),dest 1:
9296 * This is reasonably steep, but branch mispredict costs are
9297 * high on modern cpus, so consider failing only if optimizing
9300 * %%% Parameterize branch_cost on the tuning architecture, then
9301 * use that. The 80386 couldn't care less about mispredicts.
9304 if (!optimize_size
&& !TARGET_CMOVE
)
9310 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0
)))
9311 /* We may be reversing unordered compare to normal compare,
9312 that is not valid in general (we may convert non-trapping
9313 condition to trapping one), however on i386 we currently
9314 emit all comparisons unordered. */
9315 code
= reverse_condition_maybe_unordered (code
);
9318 code
= reverse_condition (code
);
9319 if (compare_code
!= NIL
)
9320 compare_code
= reverse_condition (compare_code
);
9324 if (compare_code
!= NIL
)
9326 /* notl op1 (if needed)
9331 For x < 0 (resp. x <= -1) there will be no notl,
9332 so if possible swap the constants to get rid of the
9334 True/false will be -1/0 while code below (store flag
9335 followed by decrement) is 0/-1, so the constants need
9336 to be exchanged once more. */
9338 if (compare_code
== GE
|| !cf
)
9340 code
= reverse_condition (code
);
9345 HOST_WIDE_INT tmp
= cf
;
9350 out
= emit_store_flag (out
, code
, ix86_compare_op0
,
9351 ix86_compare_op1
, VOIDmode
, 0, -1);
9355 out
= emit_store_flag (out
, code
, ix86_compare_op0
,
9356 ix86_compare_op1
, VOIDmode
, 0, 1);
9358 out
= expand_simple_binop (mode
, PLUS
, out
, constm1_rtx
,
9359 out
, 1, OPTAB_DIRECT
);
9362 out
= expand_simple_binop (mode
, AND
, out
,
9363 gen_int_mode (cf
- ct
, mode
),
9364 out
, 1, OPTAB_DIRECT
);
9366 out
= expand_simple_binop (mode
, PLUS
, out
, GEN_INT (ct
),
9367 out
, 1, OPTAB_DIRECT
);
9368 if (out
!= operands
[0])
9369 emit_move_insn (operands
[0], out
);
9371 return 1; /* DONE */
9377 /* Try a few things more with specific constants and a variable. */
9380 rtx var
, orig_out
, out
, tmp
;
9383 return 0; /* FAIL */
9385 /* If one of the two operands is an interesting constant, load a
9386 constant with the above and mask it in with a logical operation. */
9388 if (GET_CODE (operands
[2]) == CONST_INT
)
9391 if (INTVAL (operands
[2]) == 0)
9392 operands
[3] = constm1_rtx
, op
= and_optab
;
9393 else if (INTVAL (operands
[2]) == -1)
9394 operands
[3] = const0_rtx
, op
= ior_optab
;
9396 return 0; /* FAIL */
9398 else if (GET_CODE (operands
[3]) == CONST_INT
)
9401 if (INTVAL (operands
[3]) == 0)
9402 operands
[2] = constm1_rtx
, op
= and_optab
;
9403 else if (INTVAL (operands
[3]) == -1)
9404 operands
[2] = const0_rtx
, op
= ior_optab
;
9406 return 0; /* FAIL */
9409 return 0; /* FAIL */
9411 orig_out
= operands
[0];
9412 tmp
= gen_reg_rtx (mode
);
9415 /* Recurse to get the constant loaded. */
9416 if (ix86_expand_int_movcc (operands
) == 0)
9417 return 0; /* FAIL */
9419 /* Mask in the interesting variable. */
9420 out
= expand_binop (mode
, op
, var
, tmp
, orig_out
, 0,
9422 if (out
!= orig_out
)
9423 emit_move_insn (orig_out
, out
);
9425 return 1; /* DONE */
9429 * For comparison with above,
9439 if (! nonimmediate_operand (operands
[2], mode
))
9440 operands
[2] = force_reg (mode
, operands
[2]);
9441 if (! nonimmediate_operand (operands
[3], mode
))
9442 operands
[3] = force_reg (mode
, operands
[3]);
9444 if (bypass_test
&& reg_overlap_mentioned_p (operands
[0], operands
[3]))
9446 rtx tmp
= gen_reg_rtx (mode
);
9447 emit_move_insn (tmp
, operands
[3]);
9450 if (second_test
&& reg_overlap_mentioned_p (operands
[0], operands
[2]))
9452 rtx tmp
= gen_reg_rtx (mode
);
9453 emit_move_insn (tmp
, operands
[2]);
9456 if (! register_operand (operands
[2], VOIDmode
)
9457 && ! register_operand (operands
[3], VOIDmode
))
9458 operands
[2] = force_reg (mode
, operands
[2]);
9460 emit_insn (compare_seq
);
9461 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
9462 gen_rtx_IF_THEN_ELSE (mode
,
9463 compare_op
, operands
[2],
9466 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
9467 gen_rtx_IF_THEN_ELSE (mode
,
9472 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
9473 gen_rtx_IF_THEN_ELSE (mode
,
9478 return 1; /* DONE */
9482 ix86_expand_fp_movcc (operands
)
9487 rtx compare_op
, second_test
, bypass_test
;
9489 /* For SF/DFmode conditional moves based on comparisons
9490 in same mode, we may want to use SSE min/max instructions. */
9491 if (((TARGET_SSE_MATH
&& GET_MODE (operands
[0]) == SFmode
)
9492 || (TARGET_SSE2
&& TARGET_SSE_MATH
&& GET_MODE (operands
[0]) == DFmode
))
9493 && GET_MODE (ix86_compare_op0
) == GET_MODE (operands
[0])
9494 /* The SSE comparisons does not support the LTGT/UNEQ pair. */
9496 || (GET_CODE (operands
[1]) != LTGT
&& GET_CODE (operands
[1]) != UNEQ
))
9497 /* We may be called from the post-reload splitter. */
9498 && (!REG_P (operands
[0])
9499 || SSE_REG_P (operands
[0])
9500 || REGNO (operands
[0]) >= FIRST_PSEUDO_REGISTER
))
9502 rtx op0
= ix86_compare_op0
, op1
= ix86_compare_op1
;
9503 code
= GET_CODE (operands
[1]);
9505 /* See if we have (cross) match between comparison operands and
9506 conditional move operands. */
9507 if (rtx_equal_p (operands
[2], op1
))
9512 code
= reverse_condition_maybe_unordered (code
);
9514 if (rtx_equal_p (operands
[2], op0
) && rtx_equal_p (operands
[3], op1
))
9516 /* Check for min operation. */
9519 operands
[0] = force_reg (GET_MODE (operands
[0]), operands
[0]);
9520 if (memory_operand (op0
, VOIDmode
))
9521 op0
= force_reg (GET_MODE (operands
[0]), op0
);
9522 if (GET_MODE (operands
[0]) == SFmode
)
9523 emit_insn (gen_minsf3 (operands
[0], op0
, op1
));
9525 emit_insn (gen_mindf3 (operands
[0], op0
, op1
));
9528 /* Check for max operation. */
9531 operands
[0] = force_reg (GET_MODE (operands
[0]), operands
[0]);
9532 if (memory_operand (op0
, VOIDmode
))
9533 op0
= force_reg (GET_MODE (operands
[0]), op0
);
9534 if (GET_MODE (operands
[0]) == SFmode
)
9535 emit_insn (gen_maxsf3 (operands
[0], op0
, op1
));
9537 emit_insn (gen_maxdf3 (operands
[0], op0
, op1
));
9541 /* Manage condition to be sse_comparison_operator. In case we are
9542 in non-ieee mode, try to canonicalize the destination operand
9543 to be first in the comparison - this helps reload to avoid extra
9545 if (!sse_comparison_operator (operands
[1], VOIDmode
)
9546 || (rtx_equal_p (operands
[0], ix86_compare_op1
) && !TARGET_IEEE_FP
))
9548 rtx tmp
= ix86_compare_op0
;
9549 ix86_compare_op0
= ix86_compare_op1
;
9550 ix86_compare_op1
= tmp
;
9551 operands
[1] = gen_rtx_fmt_ee (swap_condition (GET_CODE (operands
[1])),
9552 VOIDmode
, ix86_compare_op0
,
9555 /* Similary try to manage result to be first operand of conditional
9556 move. We also don't support the NE comparison on SSE, so try to
9558 if ((rtx_equal_p (operands
[0], operands
[3])
9559 && (!TARGET_IEEE_FP
|| GET_CODE (operands
[1]) != EQ
))
9560 || (GET_CODE (operands
[1]) == NE
&& TARGET_IEEE_FP
))
9562 rtx tmp
= operands
[2];
9563 operands
[2] = operands
[3];
9565 operands
[1] = gen_rtx_fmt_ee (reverse_condition_maybe_unordered
9566 (GET_CODE (operands
[1])),
9567 VOIDmode
, ix86_compare_op0
,
9570 if (GET_MODE (operands
[0]) == SFmode
)
9571 emit_insn (gen_sse_movsfcc (operands
[0], operands
[1],
9572 operands
[2], operands
[3],
9573 ix86_compare_op0
, ix86_compare_op1
));
9575 emit_insn (gen_sse_movdfcc (operands
[0], operands
[1],
9576 operands
[2], operands
[3],
9577 ix86_compare_op0
, ix86_compare_op1
));
9581 /* The floating point conditional move instructions don't directly
9582 support conditions resulting from a signed integer comparison. */
9584 code
= GET_CODE (operands
[1]);
9585 compare_op
= ix86_expand_compare (code
, &second_test
, &bypass_test
);
9587 /* The floating point conditional move instructions don't directly
9588 support signed integer comparisons. */
9590 if (!fcmov_comparison_operator (compare_op
, VOIDmode
))
9592 if (second_test
!= NULL
|| bypass_test
!= NULL
)
9594 tmp
= gen_reg_rtx (QImode
);
9595 ix86_expand_setcc (code
, tmp
);
9597 ix86_compare_op0
= tmp
;
9598 ix86_compare_op1
= const0_rtx
;
9599 compare_op
= ix86_expand_compare (code
, &second_test
, &bypass_test
);
9601 if (bypass_test
&& reg_overlap_mentioned_p (operands
[0], operands
[3]))
9603 tmp
= gen_reg_rtx (GET_MODE (operands
[0]));
9604 emit_move_insn (tmp
, operands
[3]);
9607 if (second_test
&& reg_overlap_mentioned_p (operands
[0], operands
[2]))
9609 tmp
= gen_reg_rtx (GET_MODE (operands
[0]));
9610 emit_move_insn (tmp
, operands
[2]);
9614 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
9615 gen_rtx_IF_THEN_ELSE (GET_MODE (operands
[0]),
9620 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
9621 gen_rtx_IF_THEN_ELSE (GET_MODE (operands
[0]),
9626 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
9627 gen_rtx_IF_THEN_ELSE (GET_MODE (operands
[0]),
9635 /* Split operands 0 and 1 into SImode parts. Similar to split_di, but
9636 works for floating pointer parameters and nonoffsetable memories.
9637 For pushes, it returns just stack offsets; the values will be saved
9638 in the right order. Maximally three parts are generated. */
9641 ix86_split_to_parts (operand
, parts
, mode
)
9644 enum machine_mode mode
;
9649 size
= mode
== TFmode
? 3 : (GET_MODE_SIZE (mode
) / 4);
9651 size
= (GET_MODE_SIZE (mode
) + 4) / 8;
9653 if (GET_CODE (operand
) == REG
&& MMX_REGNO_P (REGNO (operand
)))
9655 if (size
< 2 || size
> 3)
9658 /* Optimize constant pool reference to immediates. This is used by fp
9659 moves, that force all constants to memory to allow combining. */
9660 if (GET_CODE (operand
) == MEM
&& RTX_UNCHANGING_P (operand
))
9662 rtx tmp
= maybe_get_pool_constant (operand
);
9667 if (GET_CODE (operand
) == MEM
&& !offsettable_memref_p (operand
))
9669 /* The only non-offsetable memories we handle are pushes. */
9670 if (! push_operand (operand
, VOIDmode
))
9673 operand
= copy_rtx (operand
);
9674 PUT_MODE (operand
, Pmode
);
9675 parts
[0] = parts
[1] = parts
[2] = operand
;
9677 else if (!TARGET_64BIT
)
9680 split_di (&operand
, 1, &parts
[0], &parts
[1]);
9683 if (REG_P (operand
))
9685 if (!reload_completed
)
9687 parts
[0] = gen_rtx_REG (SImode
, REGNO (operand
) + 0);
9688 parts
[1] = gen_rtx_REG (SImode
, REGNO (operand
) + 1);
9690 parts
[2] = gen_rtx_REG (SImode
, REGNO (operand
) + 2);
9692 else if (offsettable_memref_p (operand
))
9694 operand
= adjust_address (operand
, SImode
, 0);
9696 parts
[1] = adjust_address (operand
, SImode
, 4);
9698 parts
[2] = adjust_address (operand
, SImode
, 8);
9700 else if (GET_CODE (operand
) == CONST_DOUBLE
)
9705 REAL_VALUE_FROM_CONST_DOUBLE (r
, operand
);
9710 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r
, l
);
9711 parts
[2] = gen_int_mode (l
[2], SImode
);
9714 REAL_VALUE_TO_TARGET_DOUBLE (r
, l
);
9719 parts
[1] = gen_int_mode (l
[1], SImode
);
9720 parts
[0] = gen_int_mode (l
[0], SImode
);
9729 split_ti (&operand
, 1, &parts
[0], &parts
[1]);
9730 if (mode
== XFmode
|| mode
== TFmode
)
9732 if (REG_P (operand
))
9734 if (!reload_completed
)
9736 parts
[0] = gen_rtx_REG (DImode
, REGNO (operand
) + 0);
9737 parts
[1] = gen_rtx_REG (SImode
, REGNO (operand
) + 1);
9739 else if (offsettable_memref_p (operand
))
9741 operand
= adjust_address (operand
, DImode
, 0);
9743 parts
[1] = adjust_address (operand
, SImode
, 8);
9745 else if (GET_CODE (operand
) == CONST_DOUBLE
)
9750 REAL_VALUE_FROM_CONST_DOUBLE (r
, operand
);
9751 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r
, l
);
9752 /* Do not use shift by 32 to avoid warning on 32bit systems. */
9753 if (HOST_BITS_PER_WIDE_INT
>= 64)
9756 ((l
[0] & (((HOST_WIDE_INT
) 2 << 31) - 1))
9757 + ((((HOST_WIDE_INT
) l
[1]) << 31) << 1),
9760 parts
[0] = immed_double_const (l
[0], l
[1], DImode
);
9761 parts
[1] = gen_int_mode (l
[2], SImode
);
9771 /* Emit insns to perform a move or push of DI, DF, and XF values.
9772 Return false when normal moves are needed; true when all required
9773 insns have been emitted. Operands 2-4 contain the input values
9774 int the correct order; operands 5-7 contain the output values. */
9777 ix86_split_long_move (operands
)
9784 enum machine_mode mode
= GET_MODE (operands
[0]);
9786 /* The DFmode expanders may ask us to move double.
9787 For 64bit target this is single move. By hiding the fact
9788 here we simplify i386.md splitters. */
9789 if (GET_MODE_SIZE (GET_MODE (operands
[0])) == 8 && TARGET_64BIT
)
9791 /* Optimize constant pool reference to immediates. This is used by
9792 fp moves, that force all constants to memory to allow combining. */
9794 if (GET_CODE (operands
[1]) == MEM
9795 && GET_CODE (XEXP (operands
[1], 0)) == SYMBOL_REF
9796 && CONSTANT_POOL_ADDRESS_P (XEXP (operands
[1], 0)))
9797 operands
[1] = get_pool_constant (XEXP (operands
[1], 0));
9798 if (push_operand (operands
[0], VOIDmode
))
9800 operands
[0] = copy_rtx (operands
[0]);
9801 PUT_MODE (operands
[0], Pmode
);
9804 operands
[0] = gen_lowpart (DImode
, operands
[0]);
9805 operands
[1] = gen_lowpart (DImode
, operands
[1]);
9806 emit_move_insn (operands
[0], operands
[1]);
9810 /* The only non-offsettable memory we handle is push. */
9811 if (push_operand (operands
[0], VOIDmode
))
9813 else if (GET_CODE (operands
[0]) == MEM
9814 && ! offsettable_memref_p (operands
[0]))
9817 nparts
= ix86_split_to_parts (operands
[1], part
[1], GET_MODE (operands
[0]));
9818 ix86_split_to_parts (operands
[0], part
[0], GET_MODE (operands
[0]));
9820 /* When emitting push, take care for source operands on the stack. */
9821 if (push
&& GET_CODE (operands
[1]) == MEM
9822 && reg_overlap_mentioned_p (stack_pointer_rtx
, operands
[1]))
9825 part
[1][1] = change_address (part
[1][1], GET_MODE (part
[1][1]),
9826 XEXP (part
[1][2], 0));
9827 part
[1][0] = change_address (part
[1][0], GET_MODE (part
[1][0]),
9828 XEXP (part
[1][1], 0));
9831 /* We need to do copy in the right order in case an address register
9832 of the source overlaps the destination. */
9833 if (REG_P (part
[0][0]) && GET_CODE (part
[1][0]) == MEM
)
9835 if (reg_overlap_mentioned_p (part
[0][0], XEXP (part
[1][0], 0)))
9837 if (reg_overlap_mentioned_p (part
[0][1], XEXP (part
[1][0], 0)))
9840 && reg_overlap_mentioned_p (part
[0][2], XEXP (part
[1][0], 0)))
9843 /* Collision in the middle part can be handled by reordering. */
9844 if (collisions
== 1 && nparts
== 3
9845 && reg_overlap_mentioned_p (part
[0][1], XEXP (part
[1][0], 0)))
9848 tmp
= part
[0][1]; part
[0][1] = part
[0][2]; part
[0][2] = tmp
;
9849 tmp
= part
[1][1]; part
[1][1] = part
[1][2]; part
[1][2] = tmp
;
9852 /* If there are more collisions, we can't handle it by reordering.
9853 Do an lea to the last part and use only one colliding move. */
9854 else if (collisions
> 1)
9857 emit_insn (gen_rtx_SET (VOIDmode
, part
[0][nparts
- 1],
9858 XEXP (part
[1][0], 0)));
9859 part
[1][0] = change_address (part
[1][0],
9860 TARGET_64BIT
? DImode
: SImode
,
9861 part
[0][nparts
- 1]);
9862 part
[1][1] = adjust_address (part
[1][0], VOIDmode
, UNITS_PER_WORD
);
9864 part
[1][2] = adjust_address (part
[1][0], VOIDmode
, 8);
9874 /* We use only first 12 bytes of TFmode value, but for pushing we
9875 are required to adjust stack as if we were pushing real 16byte
9877 if (mode
== TFmode
&& !TARGET_64BIT
)
9878 emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
,
9880 emit_move_insn (part
[0][2], part
[1][2]);
9885 /* In 64bit mode we don't have 32bit push available. In case this is
9886 register, it is OK - we will just use larger counterpart. We also
9887 retype memory - these comes from attempt to avoid REX prefix on
9888 moving of second half of TFmode value. */
9889 if (GET_MODE (part
[1][1]) == SImode
)
9891 if (GET_CODE (part
[1][1]) == MEM
)
9892 part
[1][1] = adjust_address (part
[1][1], DImode
, 0);
9893 else if (REG_P (part
[1][1]))
9894 part
[1][1] = gen_rtx_REG (DImode
, REGNO (part
[1][1]));
9897 if (GET_MODE (part
[1][0]) == SImode
)
9898 part
[1][0] = part
[1][1];
9901 emit_move_insn (part
[0][1], part
[1][1]);
9902 emit_move_insn (part
[0][0], part
[1][0]);
9906 /* Choose correct order to not overwrite the source before it is copied. */
9907 if ((REG_P (part
[0][0])
9908 && REG_P (part
[1][1])
9909 && (REGNO (part
[0][0]) == REGNO (part
[1][1])
9911 && REGNO (part
[0][0]) == REGNO (part
[1][2]))))
9913 && reg_overlap_mentioned_p (part
[0][0], XEXP (part
[1][0], 0))))
9917 operands
[2] = part
[0][2];
9918 operands
[3] = part
[0][1];
9919 operands
[4] = part
[0][0];
9920 operands
[5] = part
[1][2];
9921 operands
[6] = part
[1][1];
9922 operands
[7] = part
[1][0];
9926 operands
[2] = part
[0][1];
9927 operands
[3] = part
[0][0];
9928 operands
[5] = part
[1][1];
9929 operands
[6] = part
[1][0];
9936 operands
[2] = part
[0][0];
9937 operands
[3] = part
[0][1];
9938 operands
[4] = part
[0][2];
9939 operands
[5] = part
[1][0];
9940 operands
[6] = part
[1][1];
9941 operands
[7] = part
[1][2];
9945 operands
[2] = part
[0][0];
9946 operands
[3] = part
[0][1];
9947 operands
[5] = part
[1][0];
9948 operands
[6] = part
[1][1];
9951 emit_move_insn (operands
[2], operands
[5]);
9952 emit_move_insn (operands
[3], operands
[6]);
9954 emit_move_insn (operands
[4], operands
[7]);
9960 ix86_split_ashldi (operands
, scratch
)
9961 rtx
*operands
, scratch
;
9963 rtx low
[2], high
[2];
9966 if (GET_CODE (operands
[2]) == CONST_INT
)
9968 split_di (operands
, 2, low
, high
);
9969 count
= INTVAL (operands
[2]) & 63;
9973 emit_move_insn (high
[0], low
[1]);
9974 emit_move_insn (low
[0], const0_rtx
);
9977 emit_insn (gen_ashlsi3 (high
[0], high
[0], GEN_INT (count
- 32)));
9981 if (!rtx_equal_p (operands
[0], operands
[1]))
9982 emit_move_insn (operands
[0], operands
[1]);
9983 emit_insn (gen_x86_shld_1 (high
[0], low
[0], GEN_INT (count
)));
9984 emit_insn (gen_ashlsi3 (low
[0], low
[0], GEN_INT (count
)));
9989 if (!rtx_equal_p (operands
[0], operands
[1]))
9990 emit_move_insn (operands
[0], operands
[1]);
9992 split_di (operands
, 1, low
, high
);
9994 emit_insn (gen_x86_shld_1 (high
[0], low
[0], operands
[2]));
9995 emit_insn (gen_ashlsi3 (low
[0], low
[0], operands
[2]));
9997 if (TARGET_CMOVE
&& (! no_new_pseudos
|| scratch
))
9999 if (! no_new_pseudos
)
10000 scratch
= force_reg (SImode
, const0_rtx
);
10002 emit_move_insn (scratch
, const0_rtx
);
10004 emit_insn (gen_x86_shift_adj_1 (high
[0], low
[0], operands
[2],
10008 emit_insn (gen_x86_shift_adj_2 (high
[0], low
[0], operands
[2]));
10013 ix86_split_ashrdi (operands
, scratch
)
10014 rtx
*operands
, scratch
;
10016 rtx low
[2], high
[2];
10019 if (GET_CODE (operands
[2]) == CONST_INT
)
10021 split_di (operands
, 2, low
, high
);
10022 count
= INTVAL (operands
[2]) & 63;
10026 emit_move_insn (low
[0], high
[1]);
10028 if (! reload_completed
)
10029 emit_insn (gen_ashrsi3 (high
[0], low
[0], GEN_INT (31)));
10032 emit_move_insn (high
[0], low
[0]);
10033 emit_insn (gen_ashrsi3 (high
[0], high
[0], GEN_INT (31)));
10037 emit_insn (gen_ashrsi3 (low
[0], low
[0], GEN_INT (count
- 32)));
10041 if (!rtx_equal_p (operands
[0], operands
[1]))
10042 emit_move_insn (operands
[0], operands
[1]);
10043 emit_insn (gen_x86_shrd_1 (low
[0], high
[0], GEN_INT (count
)));
10044 emit_insn (gen_ashrsi3 (high
[0], high
[0], GEN_INT (count
)));
10049 if (!rtx_equal_p (operands
[0], operands
[1]))
10050 emit_move_insn (operands
[0], operands
[1]);
10052 split_di (operands
, 1, low
, high
);
10054 emit_insn (gen_x86_shrd_1 (low
[0], high
[0], operands
[2]));
10055 emit_insn (gen_ashrsi3 (high
[0], high
[0], operands
[2]));
10057 if (TARGET_CMOVE
&& (! no_new_pseudos
|| scratch
))
10059 if (! no_new_pseudos
)
10060 scratch
= gen_reg_rtx (SImode
);
10061 emit_move_insn (scratch
, high
[0]);
10062 emit_insn (gen_ashrsi3 (scratch
, scratch
, GEN_INT (31)));
10063 emit_insn (gen_x86_shift_adj_1 (low
[0], high
[0], operands
[2],
10067 emit_insn (gen_x86_shift_adj_3 (low
[0], high
[0], operands
[2]));
10072 ix86_split_lshrdi (operands
, scratch
)
10073 rtx
*operands
, scratch
;
10075 rtx low
[2], high
[2];
10078 if (GET_CODE (operands
[2]) == CONST_INT
)
10080 split_di (operands
, 2, low
, high
);
10081 count
= INTVAL (operands
[2]) & 63;
10085 emit_move_insn (low
[0], high
[1]);
10086 emit_move_insn (high
[0], const0_rtx
);
10089 emit_insn (gen_lshrsi3 (low
[0], low
[0], GEN_INT (count
- 32)));
10093 if (!rtx_equal_p (operands
[0], operands
[1]))
10094 emit_move_insn (operands
[0], operands
[1]);
10095 emit_insn (gen_x86_shrd_1 (low
[0], high
[0], GEN_INT (count
)));
10096 emit_insn (gen_lshrsi3 (high
[0], high
[0], GEN_INT (count
)));
10101 if (!rtx_equal_p (operands
[0], operands
[1]))
10102 emit_move_insn (operands
[0], operands
[1]);
10104 split_di (operands
, 1, low
, high
);
10106 emit_insn (gen_x86_shrd_1 (low
[0], high
[0], operands
[2]));
10107 emit_insn (gen_lshrsi3 (high
[0], high
[0], operands
[2]));
10109 /* Heh. By reversing the arguments, we can reuse this pattern. */
10110 if (TARGET_CMOVE
&& (! no_new_pseudos
|| scratch
))
10112 if (! no_new_pseudos
)
10113 scratch
= force_reg (SImode
, const0_rtx
);
10115 emit_move_insn (scratch
, const0_rtx
);
10117 emit_insn (gen_x86_shift_adj_1 (low
[0], high
[0], operands
[2],
10121 emit_insn (gen_x86_shift_adj_2 (low
[0], high
[0], operands
[2]));
10125 /* Helper function for the string operations below. Dest VARIABLE whether
10126 it is aligned to VALUE bytes. If true, jump to the label. */
10128 ix86_expand_aligntest (variable
, value
)
10132 rtx label
= gen_label_rtx ();
10133 rtx tmpcount
= gen_reg_rtx (GET_MODE (variable
));
10134 if (GET_MODE (variable
) == DImode
)
10135 emit_insn (gen_anddi3 (tmpcount
, variable
, GEN_INT (value
)));
10137 emit_insn (gen_andsi3 (tmpcount
, variable
, GEN_INT (value
)));
10138 emit_cmp_and_jump_insns (tmpcount
, const0_rtx
, EQ
, 0, GET_MODE (variable
),
10143 /* Adjust COUNTER by the VALUE. */
10145 ix86_adjust_counter (countreg
, value
)
10147 HOST_WIDE_INT value
;
10149 if (GET_MODE (countreg
) == DImode
)
10150 emit_insn (gen_adddi3 (countreg
, countreg
, GEN_INT (-value
)));
10152 emit_insn (gen_addsi3 (countreg
, countreg
, GEN_INT (-value
)));
10155 /* Zero extend possibly SImode EXP to Pmode register. */
10157 ix86_zero_extend_to_Pmode (exp
)
10161 if (GET_MODE (exp
) == VOIDmode
)
10162 return force_reg (Pmode
, exp
);
10163 if (GET_MODE (exp
) == Pmode
)
10164 return copy_to_mode_reg (Pmode
, exp
);
10165 r
= gen_reg_rtx (Pmode
);
10166 emit_insn (gen_zero_extendsidi2 (r
, exp
));
10170 /* Expand string move (memcpy) operation. Use i386 string operations when
10171 profitable. expand_clrstr contains similar code. */
10173 ix86_expand_movstr (dst
, src
, count_exp
, align_exp
)
10174 rtx dst
, src
, count_exp
, align_exp
;
10176 rtx srcreg
, destreg
, countreg
;
10177 enum machine_mode counter_mode
;
10178 HOST_WIDE_INT align
= 0;
10179 unsigned HOST_WIDE_INT count
= 0;
10184 if (GET_CODE (align_exp
) == CONST_INT
)
10185 align
= INTVAL (align_exp
);
10187 /* This simple hack avoids all inlining code and simplifies code below. */
10188 if (!TARGET_ALIGN_STRINGOPS
)
10191 if (GET_CODE (count_exp
) == CONST_INT
)
10192 count
= INTVAL (count_exp
);
10194 /* Figure out proper mode for counter. For 32bits it is always SImode,
10195 for 64bits use SImode when possible, otherwise DImode.
10196 Set count to number of bytes copied when known at compile time. */
10197 if (!TARGET_64BIT
|| GET_MODE (count_exp
) == SImode
10198 || x86_64_zero_extended_value (count_exp
))
10199 counter_mode
= SImode
;
10201 counter_mode
= DImode
;
10203 if (counter_mode
!= SImode
&& counter_mode
!= DImode
)
10206 destreg
= copy_to_mode_reg (Pmode
, XEXP (dst
, 0));
10207 srcreg
= copy_to_mode_reg (Pmode
, XEXP (src
, 0));
10209 emit_insn (gen_cld ());
10211 /* When optimizing for size emit simple rep ; movsb instruction for
10212 counts not divisible by 4. */
10214 if ((!optimize
|| optimize_size
) && (count
== 0 || (count
& 0x03)))
10216 countreg
= ix86_zero_extend_to_Pmode (count_exp
);
10218 emit_insn (gen_rep_movqi_rex64 (destreg
, srcreg
, countreg
,
10219 destreg
, srcreg
, countreg
));
10221 emit_insn (gen_rep_movqi (destreg
, srcreg
, countreg
,
10222 destreg
, srcreg
, countreg
));
10225 /* For constant aligned (or small unaligned) copies use rep movsl
10226 followed by code copying the rest. For PentiumPro ensure 8 byte
10227 alignment to allow rep movsl acceleration. */
10229 else if (count
!= 0
10231 || (!TARGET_PENTIUMPRO
&& !TARGET_64BIT
&& align
>= 4)
10232 || optimize_size
|| count
< (unsigned int) 64))
10234 int size
= TARGET_64BIT
&& !optimize_size
? 8 : 4;
10235 if (count
& ~(size
- 1))
10237 countreg
= copy_to_mode_reg (counter_mode
,
10238 GEN_INT ((count
>> (size
== 4 ? 2 : 3))
10239 & (TARGET_64BIT
? -1 : 0x3fffffff)));
10240 countreg
= ix86_zero_extend_to_Pmode (countreg
);
10244 emit_insn (gen_rep_movsi_rex64 (destreg
, srcreg
, countreg
,
10245 destreg
, srcreg
, countreg
));
10247 emit_insn (gen_rep_movsi (destreg
, srcreg
, countreg
,
10248 destreg
, srcreg
, countreg
));
10251 emit_insn (gen_rep_movdi_rex64 (destreg
, srcreg
, countreg
,
10252 destreg
, srcreg
, countreg
));
10254 if (size
== 8 && (count
& 0x04))
10255 emit_insn (gen_strmovsi (destreg
, srcreg
));
10257 emit_insn (gen_strmovhi (destreg
, srcreg
));
10259 emit_insn (gen_strmovqi (destreg
, srcreg
));
10261 /* The generic code based on the glibc implementation:
10262 - align destination to 4 bytes (8 byte alignment is used for PentiumPro
10263 allowing accelerated copying there)
10264 - copy the data using rep movsl
10265 - copy the rest. */
10270 int desired_alignment
= (TARGET_PENTIUMPRO
10271 && (count
== 0 || count
>= (unsigned int) 260)
10272 ? 8 : UNITS_PER_WORD
);
10274 /* In case we don't know anything about the alignment, default to
10275 library version, since it is usually equally fast and result in
10277 if (!TARGET_INLINE_ALL_STRINGOPS
&& align
< UNITS_PER_WORD
)
10283 if (TARGET_SINGLE_STRINGOP
)
10284 emit_insn (gen_cld ());
10286 countreg2
= gen_reg_rtx (Pmode
);
10287 countreg
= copy_to_mode_reg (counter_mode
, count_exp
);
10289 /* We don't use loops to align destination and to copy parts smaller
10290 than 4 bytes, because gcc is able to optimize such code better (in
10291 the case the destination or the count really is aligned, gcc is often
10292 able to predict the branches) and also it is friendlier to the
10293 hardware branch prediction.
10295 Using loops is benefical for generic case, because we can
10296 handle small counts using the loops. Many CPUs (such as Athlon)
10297 have large REP prefix setup costs.
10299 This is quite costy. Maybe we can revisit this decision later or
10300 add some customizability to this code. */
10302 if (count
== 0 && align
< desired_alignment
)
10304 label
= gen_label_rtx ();
10305 emit_cmp_and_jump_insns (countreg
, GEN_INT (desired_alignment
- 1),
10306 LEU
, 0, counter_mode
, 1, label
);
10310 rtx label
= ix86_expand_aligntest (destreg
, 1);
10311 emit_insn (gen_strmovqi (destreg
, srcreg
));
10312 ix86_adjust_counter (countreg
, 1);
10313 emit_label (label
);
10314 LABEL_NUSES (label
) = 1;
10318 rtx label
= ix86_expand_aligntest (destreg
, 2);
10319 emit_insn (gen_strmovhi (destreg
, srcreg
));
10320 ix86_adjust_counter (countreg
, 2);
10321 emit_label (label
);
10322 LABEL_NUSES (label
) = 1;
10324 if (align
<= 4 && desired_alignment
> 4)
10326 rtx label
= ix86_expand_aligntest (destreg
, 4);
10327 emit_insn (gen_strmovsi (destreg
, srcreg
));
10328 ix86_adjust_counter (countreg
, 4);
10329 emit_label (label
);
10330 LABEL_NUSES (label
) = 1;
10333 if (label
&& desired_alignment
> 4 && !TARGET_64BIT
)
10335 emit_label (label
);
10336 LABEL_NUSES (label
) = 1;
10339 if (!TARGET_SINGLE_STRINGOP
)
10340 emit_insn (gen_cld ());
10343 emit_insn (gen_lshrdi3 (countreg2
, ix86_zero_extend_to_Pmode (countreg
),
10345 emit_insn (gen_rep_movdi_rex64 (destreg
, srcreg
, countreg2
,
10346 destreg
, srcreg
, countreg2
));
10350 emit_insn (gen_lshrsi3 (countreg2
, countreg
, GEN_INT (2)));
10351 emit_insn (gen_rep_movsi (destreg
, srcreg
, countreg2
,
10352 destreg
, srcreg
, countreg2
));
10357 emit_label (label
);
10358 LABEL_NUSES (label
) = 1;
10360 if (TARGET_64BIT
&& align
> 4 && count
!= 0 && (count
& 4))
10361 emit_insn (gen_strmovsi (destreg
, srcreg
));
10362 if ((align
<= 4 || count
== 0) && TARGET_64BIT
)
10364 rtx label
= ix86_expand_aligntest (countreg
, 4);
10365 emit_insn (gen_strmovsi (destreg
, srcreg
));
10366 emit_label (label
);
10367 LABEL_NUSES (label
) = 1;
10369 if (align
> 2 && count
!= 0 && (count
& 2))
10370 emit_insn (gen_strmovhi (destreg
, srcreg
));
10371 if (align
<= 2 || count
== 0)
10373 rtx label
= ix86_expand_aligntest (countreg
, 2);
10374 emit_insn (gen_strmovhi (destreg
, srcreg
));
10375 emit_label (label
);
10376 LABEL_NUSES (label
) = 1;
10378 if (align
> 1 && count
!= 0 && (count
& 1))
10379 emit_insn (gen_strmovqi (destreg
, srcreg
));
10380 if (align
<= 1 || count
== 0)
10382 rtx label
= ix86_expand_aligntest (countreg
, 1);
10383 emit_insn (gen_strmovqi (destreg
, srcreg
));
10384 emit_label (label
);
10385 LABEL_NUSES (label
) = 1;
10389 insns
= get_insns ();
10392 ix86_set_move_mem_attrs (insns
, dst
, src
, destreg
, srcreg
);
10397 /* Expand string clear operation (bzero). Use i386 string operations when
10398 profitable. expand_movstr contains similar code. */
10400 ix86_expand_clrstr (src
, count_exp
, align_exp
)
10401 rtx src
, count_exp
, align_exp
;
10403 rtx destreg
, zeroreg
, countreg
;
10404 enum machine_mode counter_mode
;
10405 HOST_WIDE_INT align
= 0;
10406 unsigned HOST_WIDE_INT count
= 0;
10408 if (GET_CODE (align_exp
) == CONST_INT
)
10409 align
= INTVAL (align_exp
);
10411 /* This simple hack avoids all inlining code and simplifies code below. */
10412 if (!TARGET_ALIGN_STRINGOPS
)
10415 if (GET_CODE (count_exp
) == CONST_INT
)
10416 count
= INTVAL (count_exp
);
10417 /* Figure out proper mode for counter. For 32bits it is always SImode,
10418 for 64bits use SImode when possible, otherwise DImode.
10419 Set count to number of bytes copied when known at compile time. */
10420 if (!TARGET_64BIT
|| GET_MODE (count_exp
) == SImode
10421 || x86_64_zero_extended_value (count_exp
))
10422 counter_mode
= SImode
;
10424 counter_mode
= DImode
;
10426 destreg
= copy_to_mode_reg (Pmode
, XEXP (src
, 0));
10428 emit_insn (gen_cld ());
10430 /* When optimizing for size emit simple rep ; movsb instruction for
10431 counts not divisible by 4. */
10433 if ((!optimize
|| optimize_size
) && (count
== 0 || (count
& 0x03)))
10435 countreg
= ix86_zero_extend_to_Pmode (count_exp
);
10436 zeroreg
= copy_to_mode_reg (QImode
, const0_rtx
);
10438 emit_insn (gen_rep_stosqi_rex64 (destreg
, countreg
, zeroreg
,
10439 destreg
, countreg
));
10441 emit_insn (gen_rep_stosqi (destreg
, countreg
, zeroreg
,
10442 destreg
, countreg
));
10444 else if (count
!= 0
10446 || (!TARGET_PENTIUMPRO
&& !TARGET_64BIT
&& align
>= 4)
10447 || optimize_size
|| count
< (unsigned int) 64))
10449 int size
= TARGET_64BIT
&& !optimize_size
? 8 : 4;
10450 zeroreg
= copy_to_mode_reg (size
== 4 ? SImode
: DImode
, const0_rtx
);
10451 if (count
& ~(size
- 1))
10453 countreg
= copy_to_mode_reg (counter_mode
,
10454 GEN_INT ((count
>> (size
== 4 ? 2 : 3))
10455 & (TARGET_64BIT
? -1 : 0x3fffffff)));
10456 countreg
= ix86_zero_extend_to_Pmode (countreg
);
10460 emit_insn (gen_rep_stossi_rex64 (destreg
, countreg
, zeroreg
,
10461 destreg
, countreg
));
10463 emit_insn (gen_rep_stossi (destreg
, countreg
, zeroreg
,
10464 destreg
, countreg
));
10467 emit_insn (gen_rep_stosdi_rex64 (destreg
, countreg
, zeroreg
,
10468 destreg
, countreg
));
10470 if (size
== 8 && (count
& 0x04))
10471 emit_insn (gen_strsetsi (destreg
,
10472 gen_rtx_SUBREG (SImode
, zeroreg
, 0)));
10474 emit_insn (gen_strsethi (destreg
,
10475 gen_rtx_SUBREG (HImode
, zeroreg
, 0)));
10477 emit_insn (gen_strsetqi (destreg
,
10478 gen_rtx_SUBREG (QImode
, zeroreg
, 0)));
10484 /* Compute desired alignment of the string operation. */
10485 int desired_alignment
= (TARGET_PENTIUMPRO
10486 && (count
== 0 || count
>= (unsigned int) 260)
10487 ? 8 : UNITS_PER_WORD
);
10489 /* In case we don't know anything about the alignment, default to
10490 library version, since it is usually equally fast and result in
10492 if (!TARGET_INLINE_ALL_STRINGOPS
&& align
< UNITS_PER_WORD
)
10495 if (TARGET_SINGLE_STRINGOP
)
10496 emit_insn (gen_cld ());
10498 countreg2
= gen_reg_rtx (Pmode
);
10499 countreg
= copy_to_mode_reg (counter_mode
, count_exp
);
10500 zeroreg
= copy_to_mode_reg (Pmode
, const0_rtx
);
10502 if (count
== 0 && align
< desired_alignment
)
10504 label
= gen_label_rtx ();
10505 emit_cmp_and_jump_insns (countreg
, GEN_INT (desired_alignment
- 1),
10506 LEU
, 0, counter_mode
, 1, label
);
10510 rtx label
= ix86_expand_aligntest (destreg
, 1);
10511 emit_insn (gen_strsetqi (destreg
,
10512 gen_rtx_SUBREG (QImode
, zeroreg
, 0)));
10513 ix86_adjust_counter (countreg
, 1);
10514 emit_label (label
);
10515 LABEL_NUSES (label
) = 1;
10519 rtx label
= ix86_expand_aligntest (destreg
, 2);
10520 emit_insn (gen_strsethi (destreg
,
10521 gen_rtx_SUBREG (HImode
, zeroreg
, 0)));
10522 ix86_adjust_counter (countreg
, 2);
10523 emit_label (label
);
10524 LABEL_NUSES (label
) = 1;
10526 if (align
<= 4 && desired_alignment
> 4)
10528 rtx label
= ix86_expand_aligntest (destreg
, 4);
10529 emit_insn (gen_strsetsi (destreg
, (TARGET_64BIT
10530 ? gen_rtx_SUBREG (SImode
, zeroreg
, 0)
10532 ix86_adjust_counter (countreg
, 4);
10533 emit_label (label
);
10534 LABEL_NUSES (label
) = 1;
10537 if (label
&& desired_alignment
> 4 && !TARGET_64BIT
)
10539 emit_label (label
);
10540 LABEL_NUSES (label
) = 1;
10544 if (!TARGET_SINGLE_STRINGOP
)
10545 emit_insn (gen_cld ());
10548 emit_insn (gen_lshrdi3 (countreg2
, ix86_zero_extend_to_Pmode (countreg
),
10550 emit_insn (gen_rep_stosdi_rex64 (destreg
, countreg2
, zeroreg
,
10551 destreg
, countreg2
));
10555 emit_insn (gen_lshrsi3 (countreg2
, countreg
, GEN_INT (2)));
10556 emit_insn (gen_rep_stossi (destreg
, countreg2
, zeroreg
,
10557 destreg
, countreg2
));
10561 emit_label (label
);
10562 LABEL_NUSES (label
) = 1;
10565 if (TARGET_64BIT
&& align
> 4 && count
!= 0 && (count
& 4))
10566 emit_insn (gen_strsetsi (destreg
,
10567 gen_rtx_SUBREG (SImode
, zeroreg
, 0)));
10568 if (TARGET_64BIT
&& (align
<= 4 || count
== 0))
10570 rtx label
= ix86_expand_aligntest (countreg
, 4);
10571 emit_insn (gen_strsetsi (destreg
,
10572 gen_rtx_SUBREG (SImode
, zeroreg
, 0)));
10573 emit_label (label
);
10574 LABEL_NUSES (label
) = 1;
10576 if (align
> 2 && count
!= 0 && (count
& 2))
10577 emit_insn (gen_strsethi (destreg
,
10578 gen_rtx_SUBREG (HImode
, zeroreg
, 0)));
10579 if (align
<= 2 || count
== 0)
10581 rtx label
= ix86_expand_aligntest (countreg
, 2);
10582 emit_insn (gen_strsethi (destreg
,
10583 gen_rtx_SUBREG (HImode
, zeroreg
, 0)));
10584 emit_label (label
);
10585 LABEL_NUSES (label
) = 1;
10587 if (align
> 1 && count
!= 0 && (count
& 1))
10588 emit_insn (gen_strsetqi (destreg
,
10589 gen_rtx_SUBREG (QImode
, zeroreg
, 0)));
10590 if (align
<= 1 || count
== 0)
10592 rtx label
= ix86_expand_aligntest (countreg
, 1);
10593 emit_insn (gen_strsetqi (destreg
,
10594 gen_rtx_SUBREG (QImode
, zeroreg
, 0)));
10595 emit_label (label
);
10596 LABEL_NUSES (label
) = 1;
10601 /* Expand strlen. */
10603 ix86_expand_strlen (out
, src
, eoschar
, align
)
10604 rtx out
, src
, eoschar
, align
;
10606 rtx addr
, scratch1
, scratch2
, scratch3
, scratch4
;
10608 /* The generic case of strlen expander is long. Avoid it's
10609 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
10611 if (TARGET_UNROLL_STRLEN
&& eoschar
== const0_rtx
&& optimize
> 1
10612 && !TARGET_INLINE_ALL_STRINGOPS
10614 && (GET_CODE (align
) != CONST_INT
|| INTVAL (align
) < 4))
10617 addr
= force_reg (Pmode
, XEXP (src
, 0));
10618 scratch1
= gen_reg_rtx (Pmode
);
10620 if (TARGET_UNROLL_STRLEN
&& eoschar
== const0_rtx
&& optimize
> 1
10623 /* Well it seems that some optimizer does not combine a call like
10624 foo(strlen(bar), strlen(bar));
10625 when the move and the subtraction is done here. It does calculate
10626 the length just once when these instructions are done inside of
10627 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
10628 often used and I use one fewer register for the lifetime of
10629 output_strlen_unroll() this is better. */
10631 emit_move_insn (out
, addr
);
10633 ix86_expand_strlensi_unroll_1 (out
, align
);
10635 /* strlensi_unroll_1 returns the address of the zero at the end of
10636 the string, like memchr(), so compute the length by subtracting
10637 the start address. */
10639 emit_insn (gen_subdi3 (out
, out
, addr
));
10641 emit_insn (gen_subsi3 (out
, out
, addr
));
10645 scratch2
= gen_reg_rtx (Pmode
);
10646 scratch3
= gen_reg_rtx (Pmode
);
10647 scratch4
= force_reg (Pmode
, constm1_rtx
);
10649 emit_move_insn (scratch3
, addr
);
10650 eoschar
= force_reg (QImode
, eoschar
);
10652 emit_insn (gen_cld ());
10655 emit_insn (gen_strlenqi_rex_1 (scratch1
, scratch3
, eoschar
,
10656 align
, scratch4
, scratch3
));
10657 emit_insn (gen_one_cmpldi2 (scratch2
, scratch1
));
10658 emit_insn (gen_adddi3 (out
, scratch2
, constm1_rtx
));
10662 emit_insn (gen_strlenqi_1 (scratch1
, scratch3
, eoschar
,
10663 align
, scratch4
, scratch3
));
10664 emit_insn (gen_one_cmplsi2 (scratch2
, scratch1
));
10665 emit_insn (gen_addsi3 (out
, scratch2
, constm1_rtx
));
10671 /* Expand the appropriate insns for doing strlen if not just doing
10674 out = result, initialized with the start address
10675 align_rtx = alignment of the address.
10676 scratch = scratch register, initialized with the startaddress when
10677 not aligned, otherwise undefined
10679 This is just the body. It needs the initialisations mentioned above and
10680 some address computing at the end. These things are done in i386.md. */
10683 ix86_expand_strlensi_unroll_1 (out
, align_rtx
)
10684 rtx out
, align_rtx
;
10688 rtx align_2_label
= NULL_RTX
;
10689 rtx align_3_label
= NULL_RTX
;
10690 rtx align_4_label
= gen_label_rtx ();
10691 rtx end_0_label
= gen_label_rtx ();
10693 rtx tmpreg
= gen_reg_rtx (SImode
);
10694 rtx scratch
= gen_reg_rtx (SImode
);
10697 if (GET_CODE (align_rtx
) == CONST_INT
)
10698 align
= INTVAL (align_rtx
);
10700 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
10702 /* Is there a known alignment and is it less than 4? */
10705 rtx scratch1
= gen_reg_rtx (Pmode
);
10706 emit_move_insn (scratch1
, out
);
10707 /* Is there a known alignment and is it not 2? */
10710 align_3_label
= gen_label_rtx (); /* Label when aligned to 3-byte */
10711 align_2_label
= gen_label_rtx (); /* Label when aligned to 2-byte */
10713 /* Leave just the 3 lower bits. */
10714 align_rtx
= expand_binop (Pmode
, and_optab
, scratch1
, GEN_INT (3),
10715 NULL_RTX
, 0, OPTAB_WIDEN
);
10717 emit_cmp_and_jump_insns (align_rtx
, const0_rtx
, EQ
, NULL
,
10718 Pmode
, 1, align_4_label
);
10719 emit_cmp_and_jump_insns (align_rtx
, GEN_INT (2), EQ
, NULL
,
10720 Pmode
, 1, align_2_label
);
10721 emit_cmp_and_jump_insns (align_rtx
, GEN_INT (2), GTU
, NULL
,
10722 Pmode
, 1, align_3_label
);
10726 /* Since the alignment is 2, we have to check 2 or 0 bytes;
10727 check if is aligned to 4 - byte. */
10729 align_rtx
= expand_binop (Pmode
, and_optab
, scratch1
, GEN_INT (2),
10730 NULL_RTX
, 0, OPTAB_WIDEN
);
10732 emit_cmp_and_jump_insns (align_rtx
, const0_rtx
, EQ
, NULL
,
10733 Pmode
, 1, align_4_label
);
10736 mem
= gen_rtx_MEM (QImode
, out
);
10738 /* Now compare the bytes. */
10740 /* Compare the first n unaligned byte on a byte per byte basis. */
10741 emit_cmp_and_jump_insns (mem
, const0_rtx
, EQ
, NULL
,
10742 QImode
, 1, end_0_label
);
10744 /* Increment the address. */
10746 emit_insn (gen_adddi3 (out
, out
, const1_rtx
));
10748 emit_insn (gen_addsi3 (out
, out
, const1_rtx
));
10750 /* Not needed with an alignment of 2 */
10753 emit_label (align_2_label
);
10755 emit_cmp_and_jump_insns (mem
, const0_rtx
, EQ
, NULL
, QImode
, 1,
10759 emit_insn (gen_adddi3 (out
, out
, const1_rtx
));
10761 emit_insn (gen_addsi3 (out
, out
, const1_rtx
));
10763 emit_label (align_3_label
);
10766 emit_cmp_and_jump_insns (mem
, const0_rtx
, EQ
, NULL
, QImode
, 1,
10770 emit_insn (gen_adddi3 (out
, out
, const1_rtx
));
10772 emit_insn (gen_addsi3 (out
, out
, const1_rtx
));
10775 /* Generate loop to check 4 bytes at a time. It is not a good idea to
10776 align this loop. It gives only huge programs, but does not help to
10778 emit_label (align_4_label
);
10780 mem
= gen_rtx_MEM (SImode
, out
);
10781 emit_move_insn (scratch
, mem
);
10783 emit_insn (gen_adddi3 (out
, out
, GEN_INT (4)));
10785 emit_insn (gen_addsi3 (out
, out
, GEN_INT (4)));
10787 /* This formula yields a nonzero result iff one of the bytes is zero.
10788 This saves three branches inside loop and many cycles. */
10790 emit_insn (gen_addsi3 (tmpreg
, scratch
, GEN_INT (-0x01010101)));
10791 emit_insn (gen_one_cmplsi2 (scratch
, scratch
));
10792 emit_insn (gen_andsi3 (tmpreg
, tmpreg
, scratch
));
10793 emit_insn (gen_andsi3 (tmpreg
, tmpreg
,
10794 gen_int_mode (0x80808080, SImode
)));
10795 emit_cmp_and_jump_insns (tmpreg
, const0_rtx
, EQ
, 0, SImode
, 1,
10800 rtx reg
= gen_reg_rtx (SImode
);
10801 rtx reg2
= gen_reg_rtx (Pmode
);
10802 emit_move_insn (reg
, tmpreg
);
10803 emit_insn (gen_lshrsi3 (reg
, reg
, GEN_INT (16)));
10805 /* If zero is not in the first two bytes, move two bytes forward. */
10806 emit_insn (gen_testsi_ccno_1 (tmpreg
, GEN_INT (0x8080)));
10807 tmp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
10808 tmp
= gen_rtx_EQ (VOIDmode
, tmp
, const0_rtx
);
10809 emit_insn (gen_rtx_SET (VOIDmode
, tmpreg
,
10810 gen_rtx_IF_THEN_ELSE (SImode
, tmp
,
10813 /* Emit lea manually to avoid clobbering of flags. */
10814 emit_insn (gen_rtx_SET (SImode
, reg2
,
10815 gen_rtx_PLUS (Pmode
, out
, GEN_INT (2))));
10817 tmp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
10818 tmp
= gen_rtx_EQ (VOIDmode
, tmp
, const0_rtx
);
10819 emit_insn (gen_rtx_SET (VOIDmode
, out
,
10820 gen_rtx_IF_THEN_ELSE (Pmode
, tmp
,
10827 rtx end_2_label
= gen_label_rtx ();
10828 /* Is zero in the first two bytes? */
10830 emit_insn (gen_testsi_ccno_1 (tmpreg
, GEN_INT (0x8080)));
10831 tmp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
10832 tmp
= gen_rtx_NE (VOIDmode
, tmp
, const0_rtx
);
10833 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
10834 gen_rtx_LABEL_REF (VOIDmode
, end_2_label
),
10836 tmp
= emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
));
10837 JUMP_LABEL (tmp
) = end_2_label
;
10839 /* Not in the first two. Move two bytes forward. */
10840 emit_insn (gen_lshrsi3 (tmpreg
, tmpreg
, GEN_INT (16)));
10842 emit_insn (gen_adddi3 (out
, out
, GEN_INT (2)));
10844 emit_insn (gen_addsi3 (out
, out
, GEN_INT (2)));
10846 emit_label (end_2_label
);
10850 /* Avoid branch in fixing the byte. */
10851 tmpreg
= gen_lowpart (QImode
, tmpreg
);
10852 emit_insn (gen_addqi3_cc (tmpreg
, tmpreg
, tmpreg
));
10854 emit_insn (gen_subdi3_carry_rex64 (out
, out
, GEN_INT (3)));
10856 emit_insn (gen_subsi3_carry (out
, out
, GEN_INT (3)));
10858 emit_label (end_0_label
);
10862 ix86_expand_call (retval
, fnaddr
, callarg1
, callarg2
, pop
)
10863 rtx retval
, fnaddr
, callarg1
, callarg2
, pop
;
10865 rtx use
= NULL
, call
;
10867 if (pop
== const0_rtx
)
10869 if (TARGET_64BIT
&& pop
)
10873 if (flag_pic
&& GET_CODE (XEXP (fnaddr
, 0)) == SYMBOL_REF
)
10874 fnaddr
= machopic_indirect_call_target (fnaddr
);
10876 /* Static functions and indirect calls don't need the pic register. */
10877 if (! TARGET_64BIT
&& flag_pic
10878 && GET_CODE (XEXP (fnaddr
, 0)) == SYMBOL_REF
10879 && ! SYMBOL_REF_FLAG (XEXP (fnaddr
, 0)))
10880 use_reg (&use
, pic_offset_table_rtx
);
10882 if (TARGET_64BIT
&& INTVAL (callarg2
) >= 0)
10884 rtx al
= gen_rtx_REG (QImode
, 0);
10885 emit_move_insn (al
, callarg2
);
10886 use_reg (&use
, al
);
10888 #endif /* TARGET_MACHO */
10890 if (! call_insn_operand (XEXP (fnaddr
, 0), Pmode
))
10892 fnaddr
= copy_to_mode_reg (Pmode
, XEXP (fnaddr
, 0));
10893 fnaddr
= gen_rtx_MEM (QImode
, fnaddr
);
10896 call
= gen_rtx_CALL (VOIDmode
, fnaddr
, callarg1
);
10898 call
= gen_rtx_SET (VOIDmode
, retval
, call
);
10901 pop
= gen_rtx_PLUS (Pmode
, stack_pointer_rtx
, pop
);
10902 pop
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, pop
);
10903 call
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, call
, pop
));
10906 call
= emit_call_insn (call
);
10908 CALL_INSN_FUNCTION_USAGE (call
) = use
;
10912 /* Clear stack slot assignments remembered from previous functions.
10913 This is called from INIT_EXPANDERS once before RTL is emitted for each
10916 static struct machine_function
*
10917 ix86_init_machine_status ()
10919 return ggc_alloc_cleared (sizeof (struct machine_function
));
10922 /* Return a MEM corresponding to a stack slot with mode MODE.
10923 Allocate a new slot if necessary.
10925 The RTL for a function can have several slots available: N is
10926 which slot to use. */
10929 assign_386_stack_local (mode
, n
)
10930 enum machine_mode mode
;
10933 if (n
< 0 || n
>= MAX_386_STACK_LOCALS
)
10936 if (ix86_stack_locals
[(int) mode
][n
] == NULL_RTX
)
10937 ix86_stack_locals
[(int) mode
][n
]
10938 = assign_stack_local (mode
, GET_MODE_SIZE (mode
), 0);
10940 return ix86_stack_locals
[(int) mode
][n
];
10943 /* Construct the SYMBOL_REF for the tls_get_addr function. */
10945 static GTY(()) rtx ix86_tls_symbol
;
10947 ix86_tls_get_addr ()
10950 if (!ix86_tls_symbol
)
10952 ix86_tls_symbol
= gen_rtx_SYMBOL_REF (Pmode
,
10953 (TARGET_GNU_TLS
&& !TARGET_64BIT
)
10954 ? "___tls_get_addr"
10955 : "__tls_get_addr");
10958 return ix86_tls_symbol
;
10961 /* Calculate the length of the memory address in the instruction
10962 encoding. Does not include the one-byte modrm, opcode, or prefix. */
10965 memory_address_length (addr
)
10968 struct ix86_address parts
;
10969 rtx base
, index
, disp
;
10972 if (GET_CODE (addr
) == PRE_DEC
10973 || GET_CODE (addr
) == POST_INC
10974 || GET_CODE (addr
) == PRE_MODIFY
10975 || GET_CODE (addr
) == POST_MODIFY
)
10978 if (! ix86_decompose_address (addr
, &parts
))
10982 index
= parts
.index
;
10986 /* Register Indirect. */
10987 if (base
&& !index
&& !disp
)
10989 /* Special cases: ebp and esp need the two-byte modrm form. */
10990 if (addr
== stack_pointer_rtx
10991 || addr
== arg_pointer_rtx
10992 || addr
== frame_pointer_rtx
10993 || addr
== hard_frame_pointer_rtx
)
10997 /* Direct Addressing. */
10998 else if (disp
&& !base
&& !index
)
11003 /* Find the length of the displacement constant. */
11006 if (GET_CODE (disp
) == CONST_INT
11007 && CONST_OK_FOR_LETTER_P (INTVAL (disp
), 'K'))
11013 /* An index requires the two-byte modrm form. */
11021 /* Compute default value for "length_immediate" attribute. When SHORTFORM
11022 is set, expect that insn have 8bit immediate alternative. */
11024 ix86_attr_length_immediate_default (insn
, shortform
)
11030 extract_insn_cached (insn
);
11031 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
11032 if (CONSTANT_P (recog_data
.operand
[i
]))
11037 && GET_CODE (recog_data
.operand
[i
]) == CONST_INT
11038 && CONST_OK_FOR_LETTER_P (INTVAL (recog_data
.operand
[i
]), 'K'))
11042 switch (get_attr_mode (insn
))
11053 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
11058 fatal_insn ("unknown insn mode", insn
);
11064 /* Compute default value for "length_address" attribute. */
11066 ix86_attr_length_address_default (insn
)
11070 extract_insn_cached (insn
);
11071 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
11072 if (GET_CODE (recog_data
.operand
[i
]) == MEM
)
11074 return memory_address_length (XEXP (recog_data
.operand
[i
], 0));
11080 /* Return the maximum number of instructions a cpu can issue. */
11087 case PROCESSOR_PENTIUM
:
11091 case PROCESSOR_PENTIUMPRO
:
11092 case PROCESSOR_PENTIUM4
:
11093 case PROCESSOR_ATHLON
:
11101 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
11102 by DEP_INSN and nothing set by DEP_INSN. */
11105 ix86_flags_dependant (insn
, dep_insn
, insn_type
)
11106 rtx insn
, dep_insn
;
11107 enum attr_type insn_type
;
11111 /* Simplify the test for uninteresting insns. */
11112 if (insn_type
!= TYPE_SETCC
11113 && insn_type
!= TYPE_ICMOV
11114 && insn_type
!= TYPE_FCMOV
11115 && insn_type
!= TYPE_IBR
)
11118 if ((set
= single_set (dep_insn
)) != 0)
11120 set
= SET_DEST (set
);
11123 else if (GET_CODE (PATTERN (dep_insn
)) == PARALLEL
11124 && XVECLEN (PATTERN (dep_insn
), 0) == 2
11125 && GET_CODE (XVECEXP (PATTERN (dep_insn
), 0, 0)) == SET
11126 && GET_CODE (XVECEXP (PATTERN (dep_insn
), 0, 1)) == SET
)
11128 set
= SET_DEST (XVECEXP (PATTERN (dep_insn
), 0, 0));
11129 set2
= SET_DEST (XVECEXP (PATTERN (dep_insn
), 0, 0));
11134 if (GET_CODE (set
) != REG
|| REGNO (set
) != FLAGS_REG
)
11137 /* This test is true if the dependent insn reads the flags but
11138 not any other potentially set register. */
11139 if (!reg_overlap_mentioned_p (set
, PATTERN (insn
)))
11142 if (set2
&& reg_overlap_mentioned_p (set2
, PATTERN (insn
)))
11148 /* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
11149 address with operands set by DEP_INSN. */
11152 ix86_agi_dependant (insn
, dep_insn
, insn_type
)
11153 rtx insn
, dep_insn
;
11154 enum attr_type insn_type
;
11158 if (insn_type
== TYPE_LEA
11161 addr
= PATTERN (insn
);
11162 if (GET_CODE (addr
) == SET
)
11164 else if (GET_CODE (addr
) == PARALLEL
11165 && GET_CODE (XVECEXP (addr
, 0, 0)) == SET
)
11166 addr
= XVECEXP (addr
, 0, 0);
11169 addr
= SET_SRC (addr
);
11174 extract_insn_cached (insn
);
11175 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
11176 if (GET_CODE (recog_data
.operand
[i
]) == MEM
)
11178 addr
= XEXP (recog_data
.operand
[i
], 0);
11185 return modified_in_p (addr
, dep_insn
);
11189 ix86_adjust_cost (insn
, link
, dep_insn
, cost
)
11190 rtx insn
, link
, dep_insn
;
11193 enum attr_type insn_type
, dep_insn_type
;
11194 enum attr_memory memory
, dep_memory
;
11196 int dep_insn_code_number
;
11198 /* Anti and output depenancies have zero cost on all CPUs. */
11199 if (REG_NOTE_KIND (link
) != 0)
11202 dep_insn_code_number
= recog_memoized (dep_insn
);
11204 /* If we can't recognize the insns, we can't really do anything. */
11205 if (dep_insn_code_number
< 0 || recog_memoized (insn
) < 0)
11208 insn_type
= get_attr_type (insn
);
11209 dep_insn_type
= get_attr_type (dep_insn
);
11213 case PROCESSOR_PENTIUM
:
11214 /* Address Generation Interlock adds a cycle of latency. */
11215 if (ix86_agi_dependant (insn
, dep_insn
, insn_type
))
11218 /* ??? Compares pair with jump/setcc. */
11219 if (ix86_flags_dependant (insn
, dep_insn
, insn_type
))
11222 /* Floating point stores require value to be ready one cycle ealier. */
11223 if (insn_type
== TYPE_FMOV
11224 && get_attr_memory (insn
) == MEMORY_STORE
11225 && !ix86_agi_dependant (insn
, dep_insn
, insn_type
))
11229 case PROCESSOR_PENTIUMPRO
:
11230 memory
= get_attr_memory (insn
);
11231 dep_memory
= get_attr_memory (dep_insn
);
11233 /* Since we can't represent delayed latencies of load+operation,
11234 increase the cost here for non-imov insns. */
11235 if (dep_insn_type
!= TYPE_IMOV
11236 && dep_insn_type
!= TYPE_FMOV
11237 && (dep_memory
== MEMORY_LOAD
|| dep_memory
== MEMORY_BOTH
))
11240 /* INT->FP conversion is expensive. */
11241 if (get_attr_fp_int_src (dep_insn
))
11244 /* There is one cycle extra latency between an FP op and a store. */
11245 if (insn_type
== TYPE_FMOV
11246 && (set
= single_set (dep_insn
)) != NULL_RTX
11247 && (set2
= single_set (insn
)) != NULL_RTX
11248 && rtx_equal_p (SET_DEST (set
), SET_SRC (set2
))
11249 && GET_CODE (SET_DEST (set2
)) == MEM
)
11252 /* Show ability of reorder buffer to hide latency of load by executing
11253 in parallel with previous instruction in case
11254 previous instruction is not needed to compute the address. */
11255 if ((memory
== MEMORY_LOAD
|| memory
== MEMORY_BOTH
)
11256 && !ix86_agi_dependant (insn
, dep_insn
, insn_type
))
11258 /* Claim moves to take one cycle, as core can issue one load
11259 at time and the next load can start cycle later. */
11260 if (dep_insn_type
== TYPE_IMOV
11261 || dep_insn_type
== TYPE_FMOV
)
11269 memory
= get_attr_memory (insn
);
11270 dep_memory
= get_attr_memory (dep_insn
);
11271 /* The esp dependency is resolved before the instruction is really
11273 if ((insn_type
== TYPE_PUSH
|| insn_type
== TYPE_POP
)
11274 && (dep_insn_type
== TYPE_PUSH
|| dep_insn_type
== TYPE_POP
))
11277 /* Since we can't represent delayed latencies of load+operation,
11278 increase the cost here for non-imov insns. */
11279 if (dep_memory
== MEMORY_LOAD
|| dep_memory
== MEMORY_BOTH
)
11280 cost
+= (dep_insn_type
!= TYPE_IMOV
) ? 2 : 1;
11282 /* INT->FP conversion is expensive. */
11283 if (get_attr_fp_int_src (dep_insn
))
11286 /* Show ability of reorder buffer to hide latency of load by executing
11287 in parallel with previous instruction in case
11288 previous instruction is not needed to compute the address. */
11289 if ((memory
== MEMORY_LOAD
|| memory
== MEMORY_BOTH
)
11290 && !ix86_agi_dependant (insn
, dep_insn
, insn_type
))
11292 /* Claim moves to take one cycle, as core can issue one load
11293 at time and the next load can start cycle later. */
11294 if (dep_insn_type
== TYPE_IMOV
11295 || dep_insn_type
== TYPE_FMOV
)
11304 case PROCESSOR_ATHLON
:
11305 memory
= get_attr_memory (insn
);
11306 dep_memory
= get_attr_memory (dep_insn
);
11308 if (dep_memory
== MEMORY_LOAD
|| dep_memory
== MEMORY_BOTH
)
11310 if (dep_insn_type
== TYPE_IMOV
|| dep_insn_type
== TYPE_FMOV
)
11315 /* Show ability of reorder buffer to hide latency of load by executing
11316 in parallel with previous instruction in case
11317 previous instruction is not needed to compute the address. */
11318 if ((memory
== MEMORY_LOAD
|| memory
== MEMORY_BOTH
)
11319 && !ix86_agi_dependant (insn
, dep_insn
, insn_type
))
11321 /* Claim moves to take one cycle, as core can issue one load
11322 at time and the next load can start cycle later. */
11323 if (dep_insn_type
== TYPE_IMOV
11324 || dep_insn_type
== TYPE_FMOV
)
11326 else if (cost
>= 3)
11341 struct ppro_sched_data
11344 int issued_this_cycle
;
11348 static enum attr_ppro_uops
11349 ix86_safe_ppro_uops (insn
)
11352 if (recog_memoized (insn
) >= 0)
11353 return get_attr_ppro_uops (insn
);
11355 return PPRO_UOPS_MANY
;
11359 ix86_dump_ppro_packet (dump
)
11362 if (ix86_sched_data
.ppro
.decode
[0])
11364 fprintf (dump
, "PPRO packet: %d",
11365 INSN_UID (ix86_sched_data
.ppro
.decode
[0]));
11366 if (ix86_sched_data
.ppro
.decode
[1])
11367 fprintf (dump
, " %d", INSN_UID (ix86_sched_data
.ppro
.decode
[1]));
11368 if (ix86_sched_data
.ppro
.decode
[2])
11369 fprintf (dump
, " %d", INSN_UID (ix86_sched_data
.ppro
.decode
[2]));
11370 fputc ('\n', dump
);
11374 /* We're beginning a new block. Initialize data structures as necessary. */
11377 ix86_sched_init (dump
, sched_verbose
, veclen
)
11378 FILE *dump ATTRIBUTE_UNUSED
;
11379 int sched_verbose ATTRIBUTE_UNUSED
;
11380 int veclen ATTRIBUTE_UNUSED
;
11382 memset (&ix86_sched_data
, 0, sizeof (ix86_sched_data
));
11385 /* Shift INSN to SLOT, and shift everything else down. */
11388 ix86_reorder_insn (insnp
, slot
)
11395 insnp
[0] = insnp
[1];
11396 while (++insnp
!= slot
);
11402 ix86_sched_reorder_ppro (ready
, e_ready
)
11407 enum attr_ppro_uops cur_uops
;
11408 int issued_this_cycle
;
11412 /* At this point .ppro.decode contains the state of the three
11413 decoders from last "cycle". That is, those insns that were
11414 actually independent. But here we're scheduling for the
11415 decoder, and we may find things that are decodable in the
11418 memcpy (decode
, ix86_sched_data
.ppro
.decode
, sizeof (decode
));
11419 issued_this_cycle
= 0;
11422 cur_uops
= ix86_safe_ppro_uops (*insnp
);
11424 /* If the decoders are empty, and we've a complex insn at the
11425 head of the priority queue, let it issue without complaint. */
11426 if (decode
[0] == NULL
)
11428 if (cur_uops
== PPRO_UOPS_MANY
)
11430 decode
[0] = *insnp
;
11434 /* Otherwise, search for a 2-4 uop unsn to issue. */
11435 while (cur_uops
!= PPRO_UOPS_FEW
)
11437 if (insnp
== ready
)
11439 cur_uops
= ix86_safe_ppro_uops (*--insnp
);
11442 /* If so, move it to the head of the line. */
11443 if (cur_uops
== PPRO_UOPS_FEW
)
11444 ix86_reorder_insn (insnp
, e_ready
);
11446 /* Issue the head of the queue. */
11447 issued_this_cycle
= 1;
11448 decode
[0] = *e_ready
--;
11451 /* Look for simple insns to fill in the other two slots. */
11452 for (i
= 1; i
< 3; ++i
)
11453 if (decode
[i
] == NULL
)
11455 if (ready
> e_ready
)
11459 cur_uops
= ix86_safe_ppro_uops (*insnp
);
11460 while (cur_uops
!= PPRO_UOPS_ONE
)
11462 if (insnp
== ready
)
11464 cur_uops
= ix86_safe_ppro_uops (*--insnp
);
11467 /* Found one. Move it to the head of the queue and issue it. */
11468 if (cur_uops
== PPRO_UOPS_ONE
)
11470 ix86_reorder_insn (insnp
, e_ready
);
11471 decode
[i
] = *e_ready
--;
11472 issued_this_cycle
++;
11476 /* ??? Didn't find one. Ideally, here we would do a lazy split
11477 of 2-uop insns, issue one and queue the other. */
11481 if (issued_this_cycle
== 0)
11482 issued_this_cycle
= 1;
11483 ix86_sched_data
.ppro
.issued_this_cycle
= issued_this_cycle
;
11486 /* We are about to being issuing insns for this clock cycle.
11487 Override the default sort algorithm to better slot instructions. */
11489 ix86_sched_reorder (dump
, sched_verbose
, ready
, n_readyp
, clock_var
)
11490 FILE *dump ATTRIBUTE_UNUSED
;
11491 int sched_verbose ATTRIBUTE_UNUSED
;
11494 int clock_var ATTRIBUTE_UNUSED
;
11496 int n_ready
= *n_readyp
;
11497 rtx
*e_ready
= ready
+ n_ready
- 1;
11499 /* Make sure to go ahead and initialize key items in
11500 ix86_sched_data if we are not going to bother trying to
11501 reorder the ready queue. */
11504 ix86_sched_data
.ppro
.issued_this_cycle
= 1;
11513 case PROCESSOR_PENTIUMPRO
:
11514 ix86_sched_reorder_ppro (ready
, e_ready
);
11519 return ix86_issue_rate ();
11522 /* We are about to issue INSN. Return the number of insns left on the
11523 ready queue that can be issued this cycle. */
11526 ix86_variable_issue (dump
, sched_verbose
, insn
, can_issue_more
)
11530 int can_issue_more
;
11536 return can_issue_more
- 1;
11538 case PROCESSOR_PENTIUMPRO
:
11540 enum attr_ppro_uops uops
= ix86_safe_ppro_uops (insn
);
11542 if (uops
== PPRO_UOPS_MANY
)
11545 ix86_dump_ppro_packet (dump
);
11546 ix86_sched_data
.ppro
.decode
[0] = insn
;
11547 ix86_sched_data
.ppro
.decode
[1] = NULL
;
11548 ix86_sched_data
.ppro
.decode
[2] = NULL
;
11550 ix86_dump_ppro_packet (dump
);
11551 ix86_sched_data
.ppro
.decode
[0] = NULL
;
11553 else if (uops
== PPRO_UOPS_FEW
)
11556 ix86_dump_ppro_packet (dump
);
11557 ix86_sched_data
.ppro
.decode
[0] = insn
;
11558 ix86_sched_data
.ppro
.decode
[1] = NULL
;
11559 ix86_sched_data
.ppro
.decode
[2] = NULL
;
11563 for (i
= 0; i
< 3; ++i
)
11564 if (ix86_sched_data
.ppro
.decode
[i
] == NULL
)
11566 ix86_sched_data
.ppro
.decode
[i
] = insn
;
11574 ix86_dump_ppro_packet (dump
);
11575 ix86_sched_data
.ppro
.decode
[0] = NULL
;
11576 ix86_sched_data
.ppro
.decode
[1] = NULL
;
11577 ix86_sched_data
.ppro
.decode
[2] = NULL
;
11581 return --ix86_sched_data
.ppro
.issued_this_cycle
;
11586 ia32_use_dfa_pipeline_interface ()
11588 if (ix86_cpu
== PROCESSOR_PENTIUM
)
11593 /* How many alternative schedules to try. This should be as wide as the
11594 scheduling freedom in the DFA, but no wider. Making this value too
11595 large results extra work for the scheduler. */
11598 ia32_multipass_dfa_lookahead ()
11600 if (ix86_cpu
== PROCESSOR_PENTIUM
)
11607 /* Walk through INSNS and look for MEM references whose address is DSTREG or
11608 SRCREG and set the memory attribute to those of DSTREF and SRCREF, as
11612 ix86_set_move_mem_attrs (insns
, dstref
, srcref
, dstreg
, srcreg
)
11614 rtx dstref
, srcref
, dstreg
, srcreg
;
11618 for (insn
= insns
; insn
!= 0 ; insn
= NEXT_INSN (insn
))
11620 ix86_set_move_mem_attrs_1 (PATTERN (insn
), dstref
, srcref
,
11624 /* Subroutine of above to actually do the updating by recursively walking
11628 ix86_set_move_mem_attrs_1 (x
, dstref
, srcref
, dstreg
, srcreg
)
11630 rtx dstref
, srcref
, dstreg
, srcreg
;
11632 enum rtx_code code
= GET_CODE (x
);
11633 const char *format_ptr
= GET_RTX_FORMAT (code
);
11636 if (code
== MEM
&& XEXP (x
, 0) == dstreg
)
11637 MEM_COPY_ATTRIBUTES (x
, dstref
);
11638 else if (code
== MEM
&& XEXP (x
, 0) == srcreg
)
11639 MEM_COPY_ATTRIBUTES (x
, srcref
);
11641 for (i
= 0; i
< GET_RTX_LENGTH (code
); i
++, format_ptr
++)
11643 if (*format_ptr
== 'e')
11644 ix86_set_move_mem_attrs_1 (XEXP (x
, i
), dstref
, srcref
,
11646 else if (*format_ptr
== 'E')
11647 for (j
= XVECLEN (x
, i
) - 1; j
>= 0; j
--)
11648 ix86_set_move_mem_attrs_1 (XVECEXP (x
, i
, j
), dstref
, srcref
,
11653 /* Compute the alignment given to a constant that is being placed in memory.
11654 EXP is the constant and ALIGN is the alignment that the object would
11656 The value of this function is used instead of that alignment to align
11660 ix86_constant_alignment (exp
, align
)
11664 if (TREE_CODE (exp
) == REAL_CST
)
11666 if (TYPE_MODE (TREE_TYPE (exp
)) == DFmode
&& align
< 64)
11668 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp
))) && align
< 128)
11671 else if (TREE_CODE (exp
) == STRING_CST
&& TREE_STRING_LENGTH (exp
) >= 31
11678 /* Compute the alignment for a static variable.
11679 TYPE is the data type, and ALIGN is the alignment that
11680 the object would ordinarily have. The value of this function is used
11681 instead of that alignment to align the object. */
11684 ix86_data_alignment (type
, align
)
11688 if (AGGREGATE_TYPE_P (type
)
11689 && TYPE_SIZE (type
)
11690 && TREE_CODE (TYPE_SIZE (type
)) == INTEGER_CST
11691 && (TREE_INT_CST_LOW (TYPE_SIZE (type
)) >= 256
11692 || TREE_INT_CST_HIGH (TYPE_SIZE (type
))) && align
< 256)
11695 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
11696 to 16byte boundary. */
11699 if (AGGREGATE_TYPE_P (type
)
11700 && TYPE_SIZE (type
)
11701 && TREE_CODE (TYPE_SIZE (type
)) == INTEGER_CST
11702 && (TREE_INT_CST_LOW (TYPE_SIZE (type
)) >= 128
11703 || TREE_INT_CST_HIGH (TYPE_SIZE (type
))) && align
< 128)
11707 if (TREE_CODE (type
) == ARRAY_TYPE
)
11709 if (TYPE_MODE (TREE_TYPE (type
)) == DFmode
&& align
< 64)
11711 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type
))) && align
< 128)
11714 else if (TREE_CODE (type
) == COMPLEX_TYPE
)
11717 if (TYPE_MODE (type
) == DCmode
&& align
< 64)
11719 if (TYPE_MODE (type
) == XCmode
&& align
< 128)
11722 else if ((TREE_CODE (type
) == RECORD_TYPE
11723 || TREE_CODE (type
) == UNION_TYPE
11724 || TREE_CODE (type
) == QUAL_UNION_TYPE
)
11725 && TYPE_FIELDS (type
))
11727 if (DECL_MODE (TYPE_FIELDS (type
)) == DFmode
&& align
< 64)
11729 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type
))) && align
< 128)
11732 else if (TREE_CODE (type
) == REAL_TYPE
|| TREE_CODE (type
) == VECTOR_TYPE
11733 || TREE_CODE (type
) == INTEGER_TYPE
)
11735 if (TYPE_MODE (type
) == DFmode
&& align
< 64)
11737 if (ALIGN_MODE_128 (TYPE_MODE (type
)) && align
< 128)
11744 /* Compute the alignment for a local variable.
11745 TYPE is the data type, and ALIGN is the alignment that
11746 the object would ordinarily have. The value of this macro is used
11747 instead of that alignment to align the object. */
11750 ix86_local_alignment (type
, align
)
11754 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
11755 to 16byte boundary. */
11758 if (AGGREGATE_TYPE_P (type
)
11759 && TYPE_SIZE (type
)
11760 && TREE_CODE (TYPE_SIZE (type
)) == INTEGER_CST
11761 && (TREE_INT_CST_LOW (TYPE_SIZE (type
)) >= 16
11762 || TREE_INT_CST_HIGH (TYPE_SIZE (type
))) && align
< 128)
11765 if (TREE_CODE (type
) == ARRAY_TYPE
)
11767 if (TYPE_MODE (TREE_TYPE (type
)) == DFmode
&& align
< 64)
11769 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type
))) && align
< 128)
11772 else if (TREE_CODE (type
) == COMPLEX_TYPE
)
11774 if (TYPE_MODE (type
) == DCmode
&& align
< 64)
11776 if (TYPE_MODE (type
) == XCmode
&& align
< 128)
11779 else if ((TREE_CODE (type
) == RECORD_TYPE
11780 || TREE_CODE (type
) == UNION_TYPE
11781 || TREE_CODE (type
) == QUAL_UNION_TYPE
)
11782 && TYPE_FIELDS (type
))
11784 if (DECL_MODE (TYPE_FIELDS (type
)) == DFmode
&& align
< 64)
11786 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type
))) && align
< 128)
11789 else if (TREE_CODE (type
) == REAL_TYPE
|| TREE_CODE (type
) == VECTOR_TYPE
11790 || TREE_CODE (type
) == INTEGER_TYPE
)
11793 if (TYPE_MODE (type
) == DFmode
&& align
< 64)
11795 if (ALIGN_MODE_128 (TYPE_MODE (type
)) && align
< 128)
11801 /* Emit RTL insns to initialize the variable parts of a trampoline.
11802 FNADDR is an RTX for the address of the function's pure code.
11803 CXT is an RTX for the static chain value for the function. */
11805 x86_initialize_trampoline (tramp
, fnaddr
, cxt
)
11806 rtx tramp
, fnaddr
, cxt
;
11810 /* Compute offset from the end of the jmp to the target function. */
11811 rtx disp
= expand_binop (SImode
, sub_optab
, fnaddr
,
11812 plus_constant (tramp
, 10),
11813 NULL_RTX
, 1, OPTAB_DIRECT
);
11814 emit_move_insn (gen_rtx_MEM (QImode
, tramp
),
11815 gen_int_mode (0xb9, QImode
));
11816 emit_move_insn (gen_rtx_MEM (SImode
, plus_constant (tramp
, 1)), cxt
);
11817 emit_move_insn (gen_rtx_MEM (QImode
, plus_constant (tramp
, 5)),
11818 gen_int_mode (0xe9, QImode
));
11819 emit_move_insn (gen_rtx_MEM (SImode
, plus_constant (tramp
, 6)), disp
);
11824 /* Try to load address using shorter movl instead of movabs.
11825 We may want to support movq for kernel mode, but kernel does not use
11826 trampolines at the moment. */
11827 if (x86_64_zero_extended_value (fnaddr
))
11829 fnaddr
= copy_to_mode_reg (DImode
, fnaddr
);
11830 emit_move_insn (gen_rtx_MEM (HImode
, plus_constant (tramp
, offset
)),
11831 gen_int_mode (0xbb41, HImode
));
11832 emit_move_insn (gen_rtx_MEM (SImode
, plus_constant (tramp
, offset
+ 2)),
11833 gen_lowpart (SImode
, fnaddr
));
11838 emit_move_insn (gen_rtx_MEM (HImode
, plus_constant (tramp
, offset
)),
11839 gen_int_mode (0xbb49, HImode
));
11840 emit_move_insn (gen_rtx_MEM (DImode
, plus_constant (tramp
, offset
+ 2)),
11844 /* Load static chain using movabs to r10. */
11845 emit_move_insn (gen_rtx_MEM (HImode
, plus_constant (tramp
, offset
)),
11846 gen_int_mode (0xba49, HImode
));
11847 emit_move_insn (gen_rtx_MEM (DImode
, plus_constant (tramp
, offset
+ 2)),
11850 /* Jump to the r11 */
11851 emit_move_insn (gen_rtx_MEM (HImode
, plus_constant (tramp
, offset
)),
11852 gen_int_mode (0xff49, HImode
));
11853 emit_move_insn (gen_rtx_MEM (QImode
, plus_constant (tramp
, offset
+2)),
11854 gen_int_mode (0xe3, QImode
));
11856 if (offset
> TRAMPOLINE_SIZE
)
11860 #ifdef TRANSFER_FROM_TRAMPOLINE
11861 emit_library_call (gen_rtx (SYMBOL_REF
, Pmode
, "__enable_execute_stack"),
11862 LCT_NORMAL
, VOIDmode
, 1, tramp
, Pmode
);
11866 #define def_builtin(MASK, NAME, TYPE, CODE) \
11868 if ((MASK) & target_flags) \
11869 builtin_function ((NAME), (TYPE), (CODE), BUILT_IN_MD, \
11870 NULL, NULL_TREE); \
11873 struct builtin_description
11875 const unsigned int mask
;
11876 const enum insn_code icode
;
11877 const char *const name
;
11878 const enum ix86_builtins code
;
11879 const enum rtx_code comparison
;
11880 const unsigned int flag
;
11883 /* Used for builtins that are enabled both by -msse and -msse2. */
11884 #define MASK_SSE1 (MASK_SSE | MASK_SSE2)
11886 static const struct builtin_description bdesc_comi
[] =
11888 { MASK_SSE1
, CODE_FOR_sse_comi
, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS
, UNEQ
, 0 },
11889 { MASK_SSE1
, CODE_FOR_sse_comi
, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS
, UNLT
, 0 },
11890 { MASK_SSE1
, CODE_FOR_sse_comi
, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS
, UNLE
, 0 },
11891 { MASK_SSE1
, CODE_FOR_sse_comi
, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS
, GT
, 0 },
11892 { MASK_SSE1
, CODE_FOR_sse_comi
, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS
, GE
, 0 },
11893 { MASK_SSE1
, CODE_FOR_sse_comi
, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS
, LTGT
, 0 },
11894 { MASK_SSE1
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS
, UNEQ
, 0 },
11895 { MASK_SSE1
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS
, UNLT
, 0 },
11896 { MASK_SSE1
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS
, UNLE
, 0 },
11897 { MASK_SSE1
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS
, GT
, 0 },
11898 { MASK_SSE1
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS
, GE
, 0 },
11899 { MASK_SSE1
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS
, LTGT
, 0 },
11900 { MASK_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD
, UNEQ
, 0 },
11901 { MASK_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD
, UNLT
, 0 },
11902 { MASK_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD
, UNLE
, 0 },
11903 { MASK_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD
, GT
, 0 },
11904 { MASK_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD
, GE
, 0 },
11905 { MASK_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD
, LTGT
, 0 },
11906 { MASK_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD
, UNEQ
, 0 },
11907 { MASK_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD
, UNLT
, 0 },
11908 { MASK_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD
, UNLE
, 0 },
11909 { MASK_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD
, GT
, 0 },
11910 { MASK_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD
, GE
, 0 },
11911 { MASK_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD
, LTGT
, 0 },
11914 static const struct builtin_description bdesc_2arg
[] =
11917 { MASK_SSE1
, CODE_FOR_addv4sf3
, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS
, 0, 0 },
11918 { MASK_SSE1
, CODE_FOR_subv4sf3
, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS
, 0, 0 },
11919 { MASK_SSE1
, CODE_FOR_mulv4sf3
, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS
, 0, 0 },
11920 { MASK_SSE1
, CODE_FOR_divv4sf3
, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS
, 0, 0 },
11921 { MASK_SSE1
, CODE_FOR_vmaddv4sf3
, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS
, 0, 0 },
11922 { MASK_SSE1
, CODE_FOR_vmsubv4sf3
, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS
, 0, 0 },
11923 { MASK_SSE1
, CODE_FOR_vmmulv4sf3
, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS
, 0, 0 },
11924 { MASK_SSE1
, CODE_FOR_vmdivv4sf3
, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS
, 0, 0 },
11926 { MASK_SSE1
, CODE_FOR_maskcmpv4sf3
, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS
, EQ
, 0 },
11927 { MASK_SSE1
, CODE_FOR_maskcmpv4sf3
, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS
, LT
, 0 },
11928 { MASK_SSE1
, CODE_FOR_maskcmpv4sf3
, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS
, LE
, 0 },
11929 { MASK_SSE1
, CODE_FOR_maskcmpv4sf3
, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS
, LT
, 1 },
11930 { MASK_SSE1
, CODE_FOR_maskcmpv4sf3
, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS
, LE
, 1 },
11931 { MASK_SSE1
, CODE_FOR_maskcmpv4sf3
, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS
, UNORDERED
, 0 },
11932 { MASK_SSE1
, CODE_FOR_maskncmpv4sf3
, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS
, EQ
, 0 },
11933 { MASK_SSE1
, CODE_FOR_maskncmpv4sf3
, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS
, LT
, 0 },
11934 { MASK_SSE1
, CODE_FOR_maskncmpv4sf3
, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS
, LE
, 0 },
11935 { MASK_SSE1
, CODE_FOR_maskncmpv4sf3
, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS
, LT
, 1 },
11936 { MASK_SSE1
, CODE_FOR_maskncmpv4sf3
, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS
, LE
, 1 },
11937 { MASK_SSE1
, CODE_FOR_maskncmpv4sf3
, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS
, UNORDERED
, 0 },
11938 { MASK_SSE1
, CODE_FOR_vmmaskcmpv4sf3
, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS
, EQ
, 0 },
11939 { MASK_SSE1
, CODE_FOR_vmmaskcmpv4sf3
, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS
, LT
, 0 },
11940 { MASK_SSE1
, CODE_FOR_vmmaskcmpv4sf3
, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS
, LE
, 0 },
11941 { MASK_SSE1
, CODE_FOR_vmmaskcmpv4sf3
, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS
, UNORDERED
, 0 },
11942 { MASK_SSE1
, CODE_FOR_vmmaskncmpv4sf3
, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS
, EQ
, 0 },
11943 { MASK_SSE1
, CODE_FOR_vmmaskncmpv4sf3
, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS
, LT
, 0 },
11944 { MASK_SSE1
, CODE_FOR_vmmaskncmpv4sf3
, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS
, LE
, 0 },
11945 { MASK_SSE1
, CODE_FOR_vmmaskncmpv4sf3
, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS
, UNORDERED
, 0 },
11947 { MASK_SSE1
, CODE_FOR_sminv4sf3
, "__builtin_ia32_minps", IX86_BUILTIN_MINPS
, 0, 0 },
11948 { MASK_SSE1
, CODE_FOR_smaxv4sf3
, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS
, 0, 0 },
11949 { MASK_SSE1
, CODE_FOR_vmsminv4sf3
, "__builtin_ia32_minss", IX86_BUILTIN_MINSS
, 0, 0 },
11950 { MASK_SSE1
, CODE_FOR_vmsmaxv4sf3
, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS
, 0, 0 },
11952 { MASK_SSE1
, CODE_FOR_sse_andv4sf3
, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS
, 0, 0 },
11953 { MASK_SSE1
, CODE_FOR_sse_nandv4sf3
, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS
, 0, 0 },
11954 { MASK_SSE1
, CODE_FOR_sse_iorv4sf3
, "__builtin_ia32_orps", IX86_BUILTIN_ORPS
, 0, 0 },
11955 { MASK_SSE1
, CODE_FOR_sse_xorv4sf3
, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS
, 0, 0 },
11957 { MASK_SSE1
, CODE_FOR_sse_movss
, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS
, 0, 0 },
11958 { MASK_SSE1
, CODE_FOR_sse_movhlps
, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS
, 0, 0 },
11959 { MASK_SSE1
, CODE_FOR_sse_movlhps
, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS
, 0, 0 },
11960 { MASK_SSE1
, CODE_FOR_sse_unpckhps
, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS
, 0, 0 },
11961 { MASK_SSE1
, CODE_FOR_sse_unpcklps
, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS
, 0, 0 },
11964 { MASK_MMX
, CODE_FOR_addv8qi3
, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB
, 0, 0 },
11965 { MASK_MMX
, CODE_FOR_addv4hi3
, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW
, 0, 0 },
11966 { MASK_MMX
, CODE_FOR_addv2si3
, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD
, 0, 0 },
11967 { MASK_MMX
, CODE_FOR_subv8qi3
, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB
, 0, 0 },
11968 { MASK_MMX
, CODE_FOR_subv4hi3
, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW
, 0, 0 },
11969 { MASK_MMX
, CODE_FOR_subv2si3
, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD
, 0, 0 },
11971 { MASK_MMX
, CODE_FOR_ssaddv8qi3
, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB
, 0, 0 },
11972 { MASK_MMX
, CODE_FOR_ssaddv4hi3
, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW
, 0, 0 },
11973 { MASK_MMX
, CODE_FOR_sssubv8qi3
, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB
, 0, 0 },
11974 { MASK_MMX
, CODE_FOR_sssubv4hi3
, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW
, 0, 0 },
11975 { MASK_MMX
, CODE_FOR_usaddv8qi3
, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB
, 0, 0 },
11976 { MASK_MMX
, CODE_FOR_usaddv4hi3
, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW
, 0, 0 },
11977 { MASK_MMX
, CODE_FOR_ussubv8qi3
, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB
, 0, 0 },
11978 { MASK_MMX
, CODE_FOR_ussubv4hi3
, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW
, 0, 0 },
11980 { MASK_MMX
, CODE_FOR_mulv4hi3
, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW
, 0, 0 },
11981 { MASK_MMX
, CODE_FOR_smulv4hi3_highpart
, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW
, 0, 0 },
11982 { MASK_SSE1
| MASK_3DNOW_A
, CODE_FOR_umulv4hi3_highpart
, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW
, 0, 0 },
11984 { MASK_MMX
, CODE_FOR_mmx_anddi3
, "__builtin_ia32_pand", IX86_BUILTIN_PAND
, 0, 0 },
11985 { MASK_MMX
, CODE_FOR_mmx_nanddi3
, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN
, 0, 0 },
11986 { MASK_MMX
, CODE_FOR_mmx_iordi3
, "__builtin_ia32_por", IX86_BUILTIN_POR
, 0, 0 },
11987 { MASK_MMX
, CODE_FOR_mmx_xordi3
, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR
, 0, 0 },
11989 { MASK_SSE1
| MASK_3DNOW_A
, CODE_FOR_mmx_uavgv8qi3
, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB
, 0, 0 },
11990 { MASK_SSE1
| MASK_3DNOW_A
, CODE_FOR_mmx_uavgv4hi3
, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW
, 0, 0 },
11992 { MASK_MMX
, CODE_FOR_eqv8qi3
, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB
, 0, 0 },
11993 { MASK_MMX
, CODE_FOR_eqv4hi3
, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW
, 0, 0 },
11994 { MASK_MMX
, CODE_FOR_eqv2si3
, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD
, 0, 0 },
11995 { MASK_MMX
, CODE_FOR_gtv8qi3
, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB
, 0, 0 },
11996 { MASK_MMX
, CODE_FOR_gtv4hi3
, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW
, 0, 0 },
11997 { MASK_MMX
, CODE_FOR_gtv2si3
, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD
, 0, 0 },
11999 { MASK_SSE1
| MASK_3DNOW_A
, CODE_FOR_umaxv8qi3
, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB
, 0, 0 },
12000 { MASK_SSE1
| MASK_3DNOW_A
, CODE_FOR_smaxv4hi3
, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW
, 0, 0 },
12001 { MASK_SSE1
| MASK_3DNOW_A
, CODE_FOR_uminv8qi3
, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB
, 0, 0 },
12002 { MASK_SSE1
| MASK_3DNOW_A
, CODE_FOR_sminv4hi3
, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW
, 0, 0 },
12004 { MASK_MMX
, CODE_FOR_mmx_punpckhbw
, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW
, 0, 0 },
12005 { MASK_MMX
, CODE_FOR_mmx_punpckhwd
, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD
, 0, 0 },
12006 { MASK_MMX
, CODE_FOR_mmx_punpckhdq
, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ
, 0, 0 },
12007 { MASK_MMX
, CODE_FOR_mmx_punpcklbw
, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW
, 0, 0 },
12008 { MASK_MMX
, CODE_FOR_mmx_punpcklwd
, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD
, 0, 0 },
12009 { MASK_MMX
, CODE_FOR_mmx_punpckldq
, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ
, 0, 0 },
12012 { MASK_MMX
, CODE_FOR_mmx_packsswb
, 0, IX86_BUILTIN_PACKSSWB
, 0, 0 },
12013 { MASK_MMX
, CODE_FOR_mmx_packssdw
, 0, IX86_BUILTIN_PACKSSDW
, 0, 0 },
12014 { MASK_MMX
, CODE_FOR_mmx_packuswb
, 0, IX86_BUILTIN_PACKUSWB
, 0, 0 },
12016 { MASK_SSE1
, CODE_FOR_cvtpi2ps
, 0, IX86_BUILTIN_CVTPI2PS
, 0, 0 },
12017 { MASK_SSE1
, CODE_FOR_cvtsi2ss
, 0, IX86_BUILTIN_CVTSI2SS
, 0, 0 },
12019 { MASK_MMX
, CODE_FOR_ashlv4hi3
, 0, IX86_BUILTIN_PSLLW
, 0, 0 },
12020 { MASK_MMX
, CODE_FOR_ashlv4hi3
, 0, IX86_BUILTIN_PSLLWI
, 0, 0 },
12021 { MASK_MMX
, CODE_FOR_ashlv2si3
, 0, IX86_BUILTIN_PSLLD
, 0, 0 },
12022 { MASK_MMX
, CODE_FOR_ashlv2si3
, 0, IX86_BUILTIN_PSLLDI
, 0, 0 },
12023 { MASK_MMX
, CODE_FOR_mmx_ashldi3
, 0, IX86_BUILTIN_PSLLQ
, 0, 0 },
12024 { MASK_MMX
, CODE_FOR_mmx_ashldi3
, 0, IX86_BUILTIN_PSLLQI
, 0, 0 },
12026 { MASK_MMX
, CODE_FOR_lshrv4hi3
, 0, IX86_BUILTIN_PSRLW
, 0, 0 },
12027 { MASK_MMX
, CODE_FOR_lshrv4hi3
, 0, IX86_BUILTIN_PSRLWI
, 0, 0 },
12028 { MASK_MMX
, CODE_FOR_lshrv2si3
, 0, IX86_BUILTIN_PSRLD
, 0, 0 },
12029 { MASK_MMX
, CODE_FOR_lshrv2si3
, 0, IX86_BUILTIN_PSRLDI
, 0, 0 },
12030 { MASK_MMX
, CODE_FOR_mmx_lshrdi3
, 0, IX86_BUILTIN_PSRLQ
, 0, 0 },
12031 { MASK_MMX
, CODE_FOR_mmx_lshrdi3
, 0, IX86_BUILTIN_PSRLQI
, 0, 0 },
12033 { MASK_MMX
, CODE_FOR_ashrv4hi3
, 0, IX86_BUILTIN_PSRAW
, 0, 0 },
12034 { MASK_MMX
, CODE_FOR_ashrv4hi3
, 0, IX86_BUILTIN_PSRAWI
, 0, 0 },
12035 { MASK_MMX
, CODE_FOR_ashrv2si3
, 0, IX86_BUILTIN_PSRAD
, 0, 0 },
12036 { MASK_MMX
, CODE_FOR_ashrv2si3
, 0, IX86_BUILTIN_PSRADI
, 0, 0 },
12038 { MASK_SSE1
| MASK_3DNOW_A
, CODE_FOR_mmx_psadbw
, 0, IX86_BUILTIN_PSADBW
, 0, 0 },
12039 { MASK_MMX
, CODE_FOR_mmx_pmaddwd
, 0, IX86_BUILTIN_PMADDWD
, 0, 0 },
12042 { MASK_SSE2
, CODE_FOR_addv2df3
, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD
, 0, 0 },
12043 { MASK_SSE2
, CODE_FOR_subv2df3
, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD
, 0, 0 },
12044 { MASK_SSE2
, CODE_FOR_mulv2df3
, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD
, 0, 0 },
12045 { MASK_SSE2
, CODE_FOR_divv2df3
, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD
, 0, 0 },
12046 { MASK_SSE2
, CODE_FOR_vmaddv2df3
, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD
, 0, 0 },
12047 { MASK_SSE2
, CODE_FOR_vmsubv2df3
, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD
, 0, 0 },
12048 { MASK_SSE2
, CODE_FOR_vmmulv2df3
, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD
, 0, 0 },
12049 { MASK_SSE2
, CODE_FOR_vmdivv2df3
, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD
, 0, 0 },
12051 { MASK_SSE2
, CODE_FOR_maskcmpv2df3
, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD
, EQ
, 0 },
12052 { MASK_SSE2
, CODE_FOR_maskcmpv2df3
, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD
, LT
, 0 },
12053 { MASK_SSE2
, CODE_FOR_maskcmpv2df3
, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD
, LE
, 0 },
12054 { MASK_SSE2
, CODE_FOR_maskcmpv2df3
, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD
, LT
, 1 },
12055 { MASK_SSE2
, CODE_FOR_maskcmpv2df3
, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD
, LE
, 1 },
12056 { MASK_SSE2
, CODE_FOR_maskcmpv2df3
, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD
, UNORDERED
, 0 },
12057 { MASK_SSE2
, CODE_FOR_maskncmpv2df3
, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD
, EQ
, 0 },
12058 { MASK_SSE2
, CODE_FOR_maskncmpv2df3
, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD
, LT
, 0 },
12059 { MASK_SSE2
, CODE_FOR_maskncmpv2df3
, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD
, LE
, 0 },
12060 { MASK_SSE2
, CODE_FOR_maskncmpv2df3
, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD
, LT
, 1 },
12061 { MASK_SSE2
, CODE_FOR_maskncmpv2df3
, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD
, LE
, 1 },
12062 { MASK_SSE2
, CODE_FOR_maskncmpv2df3
, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD
, UNORDERED
, 0 },
12063 { MASK_SSE2
, CODE_FOR_vmmaskcmpv2df3
, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD
, EQ
, 0 },
12064 { MASK_SSE2
, CODE_FOR_vmmaskcmpv2df3
, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD
, LT
, 0 },
12065 { MASK_SSE2
, CODE_FOR_vmmaskcmpv2df3
, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD
, LE
, 0 },
12066 { MASK_SSE2
, CODE_FOR_vmmaskcmpv2df3
, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD
, UNORDERED
, 0 },
12067 { MASK_SSE2
, CODE_FOR_vmmaskncmpv2df3
, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD
, EQ
, 0 },
12068 { MASK_SSE2
, CODE_FOR_vmmaskncmpv2df3
, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD
, LT
, 0 },
12069 { MASK_SSE2
, CODE_FOR_vmmaskncmpv2df3
, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD
, LE
, 0 },
12070 { MASK_SSE2
, CODE_FOR_vmmaskncmpv2df3
, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD
, UNORDERED
, 0 },
12072 { MASK_SSE2
, CODE_FOR_sminv2df3
, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD
, 0, 0 },
12073 { MASK_SSE2
, CODE_FOR_smaxv2df3
, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD
, 0, 0 },
12074 { MASK_SSE2
, CODE_FOR_vmsminv2df3
, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD
, 0, 0 },
12075 { MASK_SSE2
, CODE_FOR_vmsmaxv2df3
, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD
, 0, 0 },
12077 { MASK_SSE2
, CODE_FOR_sse2_andv2df3
, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD
, 0, 0 },
12078 { MASK_SSE2
, CODE_FOR_sse2_nandv2df3
, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD
, 0, 0 },
12079 { MASK_SSE2
, CODE_FOR_sse2_iorv2df3
, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD
, 0, 0 },
12080 { MASK_SSE2
, CODE_FOR_sse2_xorv2df3
, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD
, 0, 0 },
12082 { MASK_SSE2
, CODE_FOR_sse2_movsd
, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD
, 0, 0 },
12083 { MASK_SSE2
, CODE_FOR_sse2_unpckhpd
, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD
, 0, 0 },
12084 { MASK_SSE2
, CODE_FOR_sse2_unpcklpd
, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD
, 0, 0 },
12087 { MASK_SSE2
, CODE_FOR_addv16qi3
, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128
, 0, 0 },
12088 { MASK_SSE2
, CODE_FOR_addv8hi3
, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128
, 0, 0 },
12089 { MASK_SSE2
, CODE_FOR_addv4si3
, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128
, 0, 0 },
12090 { MASK_SSE2
, CODE_FOR_addv4si3
, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128
, 0, 0 },
12091 { MASK_SSE2
, CODE_FOR_subv16qi3
, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128
, 0, 0 },
12092 { MASK_SSE2
, CODE_FOR_subv8hi3
, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128
, 0, 0 },
12093 { MASK_SSE2
, CODE_FOR_subv4si3
, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128
, 0, 0 },
12094 { MASK_SSE2
, CODE_FOR_subv4si3
, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128
, 0, 0 },
12096 { MASK_MMX
, CODE_FOR_ssaddv16qi3
, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128
, 0, 0 },
12097 { MASK_MMX
, CODE_FOR_ssaddv8hi3
, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128
, 0, 0 },
12098 { MASK_MMX
, CODE_FOR_sssubv16qi3
, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128
, 0, 0 },
12099 { MASK_MMX
, CODE_FOR_sssubv8hi3
, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128
, 0, 0 },
12100 { MASK_MMX
, CODE_FOR_usaddv16qi3
, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128
, 0, 0 },
12101 { MASK_MMX
, CODE_FOR_usaddv8hi3
, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128
, 0, 0 },
12102 { MASK_MMX
, CODE_FOR_ussubv16qi3
, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128
, 0, 0 },
12103 { MASK_MMX
, CODE_FOR_ussubv8hi3
, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128
, 0, 0 },
12105 { MASK_SSE2
, CODE_FOR_mulv8hi3
, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128
, 0, 0 },
12106 { MASK_SSE2
, CODE_FOR_smulv8hi3_highpart
, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128
, 0, 0 },
12107 { MASK_SSE2
, CODE_FOR_sse2_umulsidi3
, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ
, 0, 0 },
12108 { MASK_SSE2
, CODE_FOR_sse2_umulv2siv2di3
, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128
, 0, 0 },
12110 { MASK_SSE2
, CODE_FOR_sse2_andv2di3
, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128
, 0, 0 },
12111 { MASK_SSE2
, CODE_FOR_sse2_nandv2di3
, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128
, 0, 0 },
12112 { MASK_SSE2
, CODE_FOR_sse2_iorv2di3
, "__builtin_ia32_por128", IX86_BUILTIN_POR128
, 0, 0 },
12113 { MASK_SSE2
, CODE_FOR_sse2_xorv2di3
, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128
, 0, 0 },
12115 { MASK_SSE2
, CODE_FOR_sse2_uavgv16qi3
, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128
, 0, 0 },
12116 { MASK_SSE2
, CODE_FOR_sse2_uavgv8hi3
, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128
, 0, 0 },
12118 { MASK_SSE2
, CODE_FOR_eqv16qi3
, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128
, 0, 0 },
12119 { MASK_SSE2
, CODE_FOR_eqv8hi3
, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128
, 0, 0 },
12120 { MASK_SSE2
, CODE_FOR_eqv4si3
, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128
, 0, 0 },
12121 { MASK_SSE2
, CODE_FOR_gtv16qi3
, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128
, 0, 0 },
12122 { MASK_SSE2
, CODE_FOR_gtv8hi3
, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128
, 0, 0 },
12123 { MASK_SSE2
, CODE_FOR_gtv4si3
, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128
, 0, 0 },
12125 { MASK_SSE2
, CODE_FOR_umaxv16qi3
, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128
, 0, 0 },
12126 { MASK_SSE2
, CODE_FOR_smaxv8hi3
, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128
, 0, 0 },
12127 { MASK_SSE2
, CODE_FOR_uminv16qi3
, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128
, 0, 0 },
12128 { MASK_SSE2
, CODE_FOR_sminv8hi3
, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128
, 0, 0 },
12130 { MASK_SSE2
, CODE_FOR_sse2_punpckhbw
, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128
, 0, 0 },
12131 { MASK_SSE2
, CODE_FOR_sse2_punpckhwd
, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128
, 0, 0 },
12132 { MASK_SSE2
, CODE_FOR_sse2_punpckhdq
, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128
, 0, 0 },
12133 { MASK_SSE2
, CODE_FOR_sse2_punpckhqdq
, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128
, 0, 0 },
12134 { MASK_SSE2
, CODE_FOR_sse2_punpcklbw
, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128
, 0, 0 },
12135 { MASK_SSE2
, CODE_FOR_sse2_punpcklwd
, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128
, 0, 0 },
12136 { MASK_SSE2
, CODE_FOR_sse2_punpckldq
, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128
, 0, 0 },
12137 { MASK_SSE2
, CODE_FOR_sse2_punpcklqdq
, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128
, 0, 0 },
12139 { MASK_SSE2
, CODE_FOR_sse2_packsswb
, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128
, 0, 0 },
12140 { MASK_SSE2
, CODE_FOR_sse2_packssdw
, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128
, 0, 0 },
12141 { MASK_SSE2
, CODE_FOR_sse2_packuswb
, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128
, 0, 0 },
12143 { MASK_SSE2
, CODE_FOR_umulv8hi3_highpart
, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128
, 0, 0 },
12144 { MASK_SSE2
, CODE_FOR_sse2_psadbw
, 0, IX86_BUILTIN_PSADBW128
, 0, 0 },
12146 { MASK_SSE2
, CODE_FOR_ashlv8hi3_ti
, 0, IX86_BUILTIN_PSLLW128
, 0, 0 },
12147 { MASK_SSE2
, CODE_FOR_ashlv8hi3
, 0, IX86_BUILTIN_PSLLWI128
, 0, 0 },
12148 { MASK_SSE2
, CODE_FOR_ashlv4si3_ti
, 0, IX86_BUILTIN_PSLLD128
, 0, 0 },
12149 { MASK_SSE2
, CODE_FOR_ashlv4si3
, 0, IX86_BUILTIN_PSLLDI128
, 0, 0 },
12150 { MASK_SSE2
, CODE_FOR_ashlv2di3_ti
, 0, IX86_BUILTIN_PSLLQ128
, 0, 0 },
12151 { MASK_SSE2
, CODE_FOR_ashlv2di3
, 0, IX86_BUILTIN_PSLLQI128
, 0, 0 },
12153 { MASK_SSE2
, CODE_FOR_lshrv8hi3_ti
, 0, IX86_BUILTIN_PSRLW128
, 0, 0 },
12154 { MASK_SSE2
, CODE_FOR_lshrv8hi3
, 0, IX86_BUILTIN_PSRLWI128
, 0, 0 },
12155 { MASK_SSE2
, CODE_FOR_lshrv4si3_ti
, 0, IX86_BUILTIN_PSRLD128
, 0, 0 },
12156 { MASK_SSE2
, CODE_FOR_lshrv4si3
, 0, IX86_BUILTIN_PSRLDI128
, 0, 0 },
12157 { MASK_SSE2
, CODE_FOR_lshrv2di3_ti
, 0, IX86_BUILTIN_PSRLQ128
, 0, 0 },
12158 { MASK_SSE2
, CODE_FOR_lshrv2di3
, 0, IX86_BUILTIN_PSRLQI128
, 0, 0 },
12160 { MASK_SSE2
, CODE_FOR_ashrv8hi3_ti
, 0, IX86_BUILTIN_PSRAW128
, 0, 0 },
12161 { MASK_SSE2
, CODE_FOR_ashrv8hi3
, 0, IX86_BUILTIN_PSRAWI128
, 0, 0 },
12162 { MASK_SSE2
, CODE_FOR_ashrv4si3_ti
, 0, IX86_BUILTIN_PSRAD128
, 0, 0 },
12163 { MASK_SSE2
, CODE_FOR_ashrv4si3
, 0, IX86_BUILTIN_PSRADI128
, 0, 0 },
12165 { MASK_SSE2
, CODE_FOR_sse2_pmaddwd
, 0, IX86_BUILTIN_PMADDWD128
, 0, 0 },
12167 { MASK_SSE2
, CODE_FOR_cvtsi2sd
, 0, IX86_BUILTIN_CVTSI2SD
, 0, 0 },
12168 { MASK_SSE2
, CODE_FOR_cvtsd2ss
, 0, IX86_BUILTIN_CVTSD2SS
, 0, 0 },
12169 { MASK_SSE2
, CODE_FOR_cvtss2sd
, 0, IX86_BUILTIN_CVTSS2SD
, 0, 0 }
12172 static const struct builtin_description bdesc_1arg
[] =
12174 { MASK_SSE1
| MASK_3DNOW_A
, CODE_FOR_mmx_pmovmskb
, 0, IX86_BUILTIN_PMOVMSKB
, 0, 0 },
12175 { MASK_SSE1
, CODE_FOR_sse_movmskps
, 0, IX86_BUILTIN_MOVMSKPS
, 0, 0 },
12177 { MASK_SSE1
, CODE_FOR_sqrtv4sf2
, 0, IX86_BUILTIN_SQRTPS
, 0, 0 },
12178 { MASK_SSE1
, CODE_FOR_rsqrtv4sf2
, 0, IX86_BUILTIN_RSQRTPS
, 0, 0 },
12179 { MASK_SSE1
, CODE_FOR_rcpv4sf2
, 0, IX86_BUILTIN_RCPPS
, 0, 0 },
12181 { MASK_SSE1
, CODE_FOR_cvtps2pi
, 0, IX86_BUILTIN_CVTPS2PI
, 0, 0 },
12182 { MASK_SSE1
, CODE_FOR_cvtss2si
, 0, IX86_BUILTIN_CVTSS2SI
, 0, 0 },
12183 { MASK_SSE1
, CODE_FOR_cvttps2pi
, 0, IX86_BUILTIN_CVTTPS2PI
, 0, 0 },
12184 { MASK_SSE1
, CODE_FOR_cvttss2si
, 0, IX86_BUILTIN_CVTTSS2SI
, 0, 0 },
12186 { MASK_SSE2
, CODE_FOR_sse2_pmovmskb
, 0, IX86_BUILTIN_PMOVMSKB128
, 0, 0 },
12187 { MASK_SSE2
, CODE_FOR_sse2_movmskpd
, 0, IX86_BUILTIN_MOVMSKPD
, 0, 0 },
12188 { MASK_SSE2
, CODE_FOR_sse2_movq2dq
, 0, IX86_BUILTIN_MOVQ2DQ
, 0, 0 },
12189 { MASK_SSE2
, CODE_FOR_sse2_movdq2q
, 0, IX86_BUILTIN_MOVDQ2Q
, 0, 0 },
12191 { MASK_SSE2
, CODE_FOR_sqrtv2df2
, 0, IX86_BUILTIN_SQRTPD
, 0, 0 },
12193 { MASK_SSE2
, CODE_FOR_cvtdq2pd
, 0, IX86_BUILTIN_CVTDQ2PD
, 0, 0 },
12194 { MASK_SSE2
, CODE_FOR_cvtdq2ps
, 0, IX86_BUILTIN_CVTDQ2PS
, 0, 0 },
12196 { MASK_SSE2
, CODE_FOR_cvtpd2dq
, 0, IX86_BUILTIN_CVTPD2DQ
, 0, 0 },
12197 { MASK_SSE2
, CODE_FOR_cvtpd2pi
, 0, IX86_BUILTIN_CVTPD2PI
, 0, 0 },
12198 { MASK_SSE2
, CODE_FOR_cvtpd2ps
, 0, IX86_BUILTIN_CVTPD2PS
, 0, 0 },
12199 { MASK_SSE2
, CODE_FOR_cvttpd2dq
, 0, IX86_BUILTIN_CVTTPD2DQ
, 0, 0 },
12200 { MASK_SSE2
, CODE_FOR_cvttpd2pi
, 0, IX86_BUILTIN_CVTTPD2PI
, 0, 0 },
12202 { MASK_SSE2
, CODE_FOR_cvtpi2pd
, 0, IX86_BUILTIN_CVTPI2PD
, 0, 0 },
12204 { MASK_SSE2
, CODE_FOR_cvtsd2si
, 0, IX86_BUILTIN_CVTSD2SI
, 0, 0 },
12205 { MASK_SSE2
, CODE_FOR_cvttsd2si
, 0, IX86_BUILTIN_CVTTSD2SI
, 0, 0 },
12207 { MASK_SSE2
, CODE_FOR_cvtps2dq
, 0, IX86_BUILTIN_CVTPS2DQ
, 0, 0 },
12208 { MASK_SSE2
, CODE_FOR_cvtps2pd
, 0, IX86_BUILTIN_CVTPS2PD
, 0, 0 },
12209 { MASK_SSE2
, CODE_FOR_cvttps2dq
, 0, IX86_BUILTIN_CVTTPS2DQ
, 0, 0 },
12211 { MASK_SSE2
, CODE_FOR_sse2_movq
, 0, IX86_BUILTIN_MOVQ
, 0, 0 }
12215 ix86_init_builtins ()
12218 ix86_init_mmx_sse_builtins ();
12221 /* Set up all the MMX/SSE builtins. This is not called if TARGET_MMX
12222 is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX
12225 ix86_init_mmx_sse_builtins ()
12227 const struct builtin_description
* d
;
12230 tree pchar_type_node
= build_pointer_type (char_type_node
);
12231 tree pfloat_type_node
= build_pointer_type (float_type_node
);
12232 tree pv2si_type_node
= build_pointer_type (V2SI_type_node
);
12233 tree pv2di_type_node
= build_pointer_type (V2DI_type_node
);
12234 tree pdi_type_node
= build_pointer_type (long_long_unsigned_type_node
);
12237 tree int_ftype_v4sf_v4sf
12238 = build_function_type_list (integer_type_node
,
12239 V4SF_type_node
, V4SF_type_node
, NULL_TREE
);
12240 tree v4si_ftype_v4sf_v4sf
12241 = build_function_type_list (V4SI_type_node
,
12242 V4SF_type_node
, V4SF_type_node
, NULL_TREE
);
12243 /* MMX/SSE/integer conversions. */
12244 tree int_ftype_v4sf
12245 = build_function_type_list (integer_type_node
,
12246 V4SF_type_node
, NULL_TREE
);
12247 tree int_ftype_v8qi
12248 = build_function_type_list (integer_type_node
, V8QI_type_node
, NULL_TREE
);
12249 tree v4sf_ftype_v4sf_int
12250 = build_function_type_list (V4SF_type_node
,
12251 V4SF_type_node
, integer_type_node
, NULL_TREE
);
12252 tree v4sf_ftype_v4sf_v2si
12253 = build_function_type_list (V4SF_type_node
,
12254 V4SF_type_node
, V2SI_type_node
, NULL_TREE
);
12255 tree int_ftype_v4hi_int
12256 = build_function_type_list (integer_type_node
,
12257 V4HI_type_node
, integer_type_node
, NULL_TREE
);
12258 tree v4hi_ftype_v4hi_int_int
12259 = build_function_type_list (V4HI_type_node
, V4HI_type_node
,
12260 integer_type_node
, integer_type_node
,
12262 /* Miscellaneous. */
12263 tree v8qi_ftype_v4hi_v4hi
12264 = build_function_type_list (V8QI_type_node
,
12265 V4HI_type_node
, V4HI_type_node
, NULL_TREE
);
12266 tree v4hi_ftype_v2si_v2si
12267 = build_function_type_list (V4HI_type_node
,
12268 V2SI_type_node
, V2SI_type_node
, NULL_TREE
);
12269 tree v4sf_ftype_v4sf_v4sf_int
12270 = build_function_type_list (V4SF_type_node
,
12271 V4SF_type_node
, V4SF_type_node
,
12272 integer_type_node
, NULL_TREE
);
12273 tree v2si_ftype_v4hi_v4hi
12274 = build_function_type_list (V2SI_type_node
,
12275 V4HI_type_node
, V4HI_type_node
, NULL_TREE
);
12276 tree v4hi_ftype_v4hi_int
12277 = build_function_type_list (V4HI_type_node
,
12278 V4HI_type_node
, integer_type_node
, NULL_TREE
);
12279 tree v4hi_ftype_v4hi_di
12280 = build_function_type_list (V4HI_type_node
,
12281 V4HI_type_node
, long_long_unsigned_type_node
,
12283 tree v2si_ftype_v2si_di
12284 = build_function_type_list (V2SI_type_node
,
12285 V2SI_type_node
, long_long_unsigned_type_node
,
12287 tree void_ftype_void
12288 = build_function_type (void_type_node
, void_list_node
);
12289 tree void_ftype_unsigned
12290 = build_function_type_list (void_type_node
, unsigned_type_node
, NULL_TREE
);
12291 tree unsigned_ftype_void
12292 = build_function_type (unsigned_type_node
, void_list_node
);
12294 = build_function_type (long_long_unsigned_type_node
, void_list_node
);
12295 tree v4sf_ftype_void
12296 = build_function_type (V4SF_type_node
, void_list_node
);
12297 tree v2si_ftype_v4sf
12298 = build_function_type_list (V2SI_type_node
, V4SF_type_node
, NULL_TREE
);
12299 /* Loads/stores. */
12300 tree void_ftype_v8qi_v8qi_pchar
12301 = build_function_type_list (void_type_node
,
12302 V8QI_type_node
, V8QI_type_node
,
12303 pchar_type_node
, NULL_TREE
);
12304 tree v4sf_ftype_pfloat
12305 = build_function_type_list (V4SF_type_node
, pfloat_type_node
, NULL_TREE
);
12306 /* @@@ the type is bogus */
12307 tree v4sf_ftype_v4sf_pv2si
12308 = build_function_type_list (V4SF_type_node
,
12309 V4SF_type_node
, pv2si_type_node
, NULL_TREE
);
12310 tree void_ftype_pv2si_v4sf
12311 = build_function_type_list (void_type_node
,
12312 pv2si_type_node
, V4SF_type_node
, NULL_TREE
);
12313 tree void_ftype_pfloat_v4sf
12314 = build_function_type_list (void_type_node
,
12315 pfloat_type_node
, V4SF_type_node
, NULL_TREE
);
12316 tree void_ftype_pdi_di
12317 = build_function_type_list (void_type_node
,
12318 pdi_type_node
, long_long_unsigned_type_node
,
12320 tree void_ftype_pv2di_v2di
12321 = build_function_type_list (void_type_node
,
12322 pv2di_type_node
, V2DI_type_node
, NULL_TREE
);
12323 /* Normal vector unops. */
12324 tree v4sf_ftype_v4sf
12325 = build_function_type_list (V4SF_type_node
, V4SF_type_node
, NULL_TREE
);
12327 /* Normal vector binops. */
12328 tree v4sf_ftype_v4sf_v4sf
12329 = build_function_type_list (V4SF_type_node
,
12330 V4SF_type_node
, V4SF_type_node
, NULL_TREE
);
12331 tree v8qi_ftype_v8qi_v8qi
12332 = build_function_type_list (V8QI_type_node
,
12333 V8QI_type_node
, V8QI_type_node
, NULL_TREE
);
12334 tree v4hi_ftype_v4hi_v4hi
12335 = build_function_type_list (V4HI_type_node
,
12336 V4HI_type_node
, V4HI_type_node
, NULL_TREE
);
12337 tree v2si_ftype_v2si_v2si
12338 = build_function_type_list (V2SI_type_node
,
12339 V2SI_type_node
, V2SI_type_node
, NULL_TREE
);
12340 tree di_ftype_di_di
12341 = build_function_type_list (long_long_unsigned_type_node
,
12342 long_long_unsigned_type_node
,
12343 long_long_unsigned_type_node
, NULL_TREE
);
12345 tree v2si_ftype_v2sf
12346 = build_function_type_list (V2SI_type_node
, V2SF_type_node
, NULL_TREE
);
12347 tree v2sf_ftype_v2si
12348 = build_function_type_list (V2SF_type_node
, V2SI_type_node
, NULL_TREE
);
12349 tree v2si_ftype_v2si
12350 = build_function_type_list (V2SI_type_node
, V2SI_type_node
, NULL_TREE
);
12351 tree v2sf_ftype_v2sf
12352 = build_function_type_list (V2SF_type_node
, V2SF_type_node
, NULL_TREE
);
12353 tree v2sf_ftype_v2sf_v2sf
12354 = build_function_type_list (V2SF_type_node
,
12355 V2SF_type_node
, V2SF_type_node
, NULL_TREE
);
12356 tree v2si_ftype_v2sf_v2sf
12357 = build_function_type_list (V2SI_type_node
,
12358 V2SF_type_node
, V2SF_type_node
, NULL_TREE
);
12359 tree pint_type_node
= build_pointer_type (integer_type_node
);
12360 tree pdouble_type_node
= build_pointer_type (double_type_node
);
12361 tree int_ftype_v2df_v2df
12362 = build_function_type_list (integer_type_node
,
12363 V2DF_type_node
, V2DF_type_node
, NULL_TREE
);
12366 = build_function_type (intTI_type_node
, void_list_node
);
12367 tree v2di_ftype_void
12368 = build_function_type (V2DI_type_node
, void_list_node
);
12369 tree ti_ftype_ti_ti
12370 = build_function_type_list (intTI_type_node
,
12371 intTI_type_node
, intTI_type_node
, NULL_TREE
);
12372 tree void_ftype_pvoid
12373 = build_function_type_list (void_type_node
, ptr_type_node
, NULL_TREE
);
12375 = build_function_type_list (V2DI_type_node
,
12376 long_long_unsigned_type_node
, NULL_TREE
);
12378 = build_function_type_list (long_long_unsigned_type_node
,
12379 V2DI_type_node
, NULL_TREE
);
12380 tree v4sf_ftype_v4si
12381 = build_function_type_list (V4SF_type_node
, V4SI_type_node
, NULL_TREE
);
12382 tree v4si_ftype_v4sf
12383 = build_function_type_list (V4SI_type_node
, V4SF_type_node
, NULL_TREE
);
12384 tree v2df_ftype_v4si
12385 = build_function_type_list (V2DF_type_node
, V4SI_type_node
, NULL_TREE
);
12386 tree v4si_ftype_v2df
12387 = build_function_type_list (V4SI_type_node
, V2DF_type_node
, NULL_TREE
);
12388 tree v2si_ftype_v2df
12389 = build_function_type_list (V2SI_type_node
, V2DF_type_node
, NULL_TREE
);
12390 tree v4sf_ftype_v2df
12391 = build_function_type_list (V4SF_type_node
, V2DF_type_node
, NULL_TREE
);
12392 tree v2df_ftype_v2si
12393 = build_function_type_list (V2DF_type_node
, V2SI_type_node
, NULL_TREE
);
12394 tree v2df_ftype_v4sf
12395 = build_function_type_list (V2DF_type_node
, V4SF_type_node
, NULL_TREE
);
12396 tree int_ftype_v2df
12397 = build_function_type_list (integer_type_node
, V2DF_type_node
, NULL_TREE
);
12398 tree v2df_ftype_v2df_int
12399 = build_function_type_list (V2DF_type_node
,
12400 V2DF_type_node
, integer_type_node
, NULL_TREE
);
12401 tree v4sf_ftype_v4sf_v2df
12402 = build_function_type_list (V4SF_type_node
,
12403 V4SF_type_node
, V2DF_type_node
, NULL_TREE
);
12404 tree v2df_ftype_v2df_v4sf
12405 = build_function_type_list (V2DF_type_node
,
12406 V2DF_type_node
, V4SF_type_node
, NULL_TREE
);
12407 tree v2df_ftype_v2df_v2df_int
12408 = build_function_type_list (V2DF_type_node
,
12409 V2DF_type_node
, V2DF_type_node
,
12412 tree v2df_ftype_v2df_pv2si
12413 = build_function_type_list (V2DF_type_node
,
12414 V2DF_type_node
, pv2si_type_node
, NULL_TREE
);
12415 tree void_ftype_pv2si_v2df
12416 = build_function_type_list (void_type_node
,
12417 pv2si_type_node
, V2DF_type_node
, NULL_TREE
);
12418 tree void_ftype_pdouble_v2df
12419 = build_function_type_list (void_type_node
,
12420 pdouble_type_node
, V2DF_type_node
, NULL_TREE
);
12421 tree void_ftype_pint_int
12422 = build_function_type_list (void_type_node
,
12423 pint_type_node
, integer_type_node
, NULL_TREE
);
12424 tree void_ftype_v16qi_v16qi_pchar
12425 = build_function_type_list (void_type_node
,
12426 V16QI_type_node
, V16QI_type_node
,
12427 pchar_type_node
, NULL_TREE
);
12428 tree v2df_ftype_pdouble
12429 = build_function_type_list (V2DF_type_node
, pdouble_type_node
, NULL_TREE
);
12430 tree v2df_ftype_v2df_v2df
12431 = build_function_type_list (V2DF_type_node
,
12432 V2DF_type_node
, V2DF_type_node
, NULL_TREE
);
12433 tree v16qi_ftype_v16qi_v16qi
12434 = build_function_type_list (V16QI_type_node
,
12435 V16QI_type_node
, V16QI_type_node
, NULL_TREE
);
12436 tree v8hi_ftype_v8hi_v8hi
12437 = build_function_type_list (V8HI_type_node
,
12438 V8HI_type_node
, V8HI_type_node
, NULL_TREE
);
12439 tree v4si_ftype_v4si_v4si
12440 = build_function_type_list (V4SI_type_node
,
12441 V4SI_type_node
, V4SI_type_node
, NULL_TREE
);
12442 tree v2di_ftype_v2di_v2di
12443 = build_function_type_list (V2DI_type_node
,
12444 V2DI_type_node
, V2DI_type_node
, NULL_TREE
);
12445 tree v2di_ftype_v2df_v2df
12446 = build_function_type_list (V2DI_type_node
,
12447 V2DF_type_node
, V2DF_type_node
, NULL_TREE
);
12448 tree v2df_ftype_v2df
12449 = build_function_type_list (V2DF_type_node
, V2DF_type_node
, NULL_TREE
);
12450 tree v2df_ftype_double
12451 = build_function_type_list (V2DF_type_node
, double_type_node
, NULL_TREE
);
12452 tree v2df_ftype_double_double
12453 = build_function_type_list (V2DF_type_node
,
12454 double_type_node
, double_type_node
, NULL_TREE
);
12455 tree int_ftype_v8hi_int
12456 = build_function_type_list (integer_type_node
,
12457 V8HI_type_node
, integer_type_node
, NULL_TREE
);
12458 tree v8hi_ftype_v8hi_int_int
12459 = build_function_type_list (V8HI_type_node
,
12460 V8HI_type_node
, integer_type_node
,
12461 integer_type_node
, NULL_TREE
);
12462 tree v2di_ftype_v2di_int
12463 = build_function_type_list (V2DI_type_node
,
12464 V2DI_type_node
, integer_type_node
, NULL_TREE
);
12465 tree v4si_ftype_v4si_int
12466 = build_function_type_list (V4SI_type_node
,
12467 V4SI_type_node
, integer_type_node
, NULL_TREE
);
12468 tree v8hi_ftype_v8hi_int
12469 = build_function_type_list (V8HI_type_node
,
12470 V8HI_type_node
, integer_type_node
, NULL_TREE
);
12471 tree v8hi_ftype_v8hi_v2di
12472 = build_function_type_list (V8HI_type_node
,
12473 V8HI_type_node
, V2DI_type_node
, NULL_TREE
);
12474 tree v4si_ftype_v4si_v2di
12475 = build_function_type_list (V4SI_type_node
,
12476 V4SI_type_node
, V2DI_type_node
, NULL_TREE
);
12477 tree v4si_ftype_v8hi_v8hi
12478 = build_function_type_list (V4SI_type_node
,
12479 V8HI_type_node
, V8HI_type_node
, NULL_TREE
);
12480 tree di_ftype_v8qi_v8qi
12481 = build_function_type_list (long_long_unsigned_type_node
,
12482 V8QI_type_node
, V8QI_type_node
, NULL_TREE
);
12483 tree v2di_ftype_v16qi_v16qi
12484 = build_function_type_list (V2DI_type_node
,
12485 V16QI_type_node
, V16QI_type_node
, NULL_TREE
);
12486 tree int_ftype_v16qi
12487 = build_function_type_list (integer_type_node
, V16QI_type_node
, NULL_TREE
);
12488 tree v16qi_ftype_pchar
12489 = build_function_type_list (V16QI_type_node
, pchar_type_node
, NULL_TREE
);
12490 tree void_ftype_pchar_v16qi
12491 = build_function_type_list (void_type_node
,
12492 pchar_type_node
, V16QI_type_node
, NULL_TREE
);
12493 tree v4si_ftype_pchar
12494 = build_function_type_list (V4SI_type_node
, pchar_type_node
, NULL_TREE
);
12495 tree void_ftype_pchar_v4si
12496 = build_function_type_list (void_type_node
,
12497 pchar_type_node
, V4SI_type_node
, NULL_TREE
);
12498 tree v2di_ftype_v2di
12499 = build_function_type_list (V2DI_type_node
, V2DI_type_node
, NULL_TREE
);
12501 /* Add all builtins that are more or less simple operations on two
12503 for (i
= 0, d
= bdesc_2arg
; i
< ARRAY_SIZE (bdesc_2arg
); i
++, d
++)
12505 /* Use one of the operands; the target can have a different mode for
12506 mask-generating compares. */
12507 enum machine_mode mode
;
12512 mode
= insn_data
[d
->icode
].operand
[1].mode
;
12517 type
= v16qi_ftype_v16qi_v16qi
;
12520 type
= v8hi_ftype_v8hi_v8hi
;
12523 type
= v4si_ftype_v4si_v4si
;
12526 type
= v2di_ftype_v2di_v2di
;
12529 type
= v2df_ftype_v2df_v2df
;
12532 type
= ti_ftype_ti_ti
;
12535 type
= v4sf_ftype_v4sf_v4sf
;
12538 type
= v8qi_ftype_v8qi_v8qi
;
12541 type
= v4hi_ftype_v4hi_v4hi
;
12544 type
= v2si_ftype_v2si_v2si
;
12547 type
= di_ftype_di_di
;
12554 /* Override for comparisons. */
12555 if (d
->icode
== CODE_FOR_maskcmpv4sf3
12556 || d
->icode
== CODE_FOR_maskncmpv4sf3
12557 || d
->icode
== CODE_FOR_vmmaskcmpv4sf3
12558 || d
->icode
== CODE_FOR_vmmaskncmpv4sf3
)
12559 type
= v4si_ftype_v4sf_v4sf
;
12561 if (d
->icode
== CODE_FOR_maskcmpv2df3
12562 || d
->icode
== CODE_FOR_maskncmpv2df3
12563 || d
->icode
== CODE_FOR_vmmaskcmpv2df3
12564 || d
->icode
== CODE_FOR_vmmaskncmpv2df3
)
12565 type
= v2di_ftype_v2df_v2df
;
12567 def_builtin (d
->mask
, d
->name
, type
, d
->code
);
12570 /* Add the remaining MMX insns with somewhat more complicated types. */
12571 def_builtin (MASK_MMX
, "__builtin_ia32_mmx_zero", di_ftype_void
, IX86_BUILTIN_MMX_ZERO
);
12572 def_builtin (MASK_MMX
, "__builtin_ia32_emms", void_ftype_void
, IX86_BUILTIN_EMMS
);
12573 def_builtin (MASK_MMX
, "__builtin_ia32_ldmxcsr", void_ftype_unsigned
, IX86_BUILTIN_LDMXCSR
);
12574 def_builtin (MASK_MMX
, "__builtin_ia32_stmxcsr", unsigned_ftype_void
, IX86_BUILTIN_STMXCSR
);
12575 def_builtin (MASK_MMX
, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di
, IX86_BUILTIN_PSLLW
);
12576 def_builtin (MASK_MMX
, "__builtin_ia32_pslld", v2si_ftype_v2si_di
, IX86_BUILTIN_PSLLD
);
12577 def_builtin (MASK_MMX
, "__builtin_ia32_psllq", di_ftype_di_di
, IX86_BUILTIN_PSLLQ
);
12579 def_builtin (MASK_MMX
, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_di
, IX86_BUILTIN_PSRLW
);
12580 def_builtin (MASK_MMX
, "__builtin_ia32_psrld", v2si_ftype_v2si_di
, IX86_BUILTIN_PSRLD
);
12581 def_builtin (MASK_MMX
, "__builtin_ia32_psrlq", di_ftype_di_di
, IX86_BUILTIN_PSRLQ
);
12583 def_builtin (MASK_MMX
, "__builtin_ia32_psraw", v4hi_ftype_v4hi_di
, IX86_BUILTIN_PSRAW
);
12584 def_builtin (MASK_MMX
, "__builtin_ia32_psrad", v2si_ftype_v2si_di
, IX86_BUILTIN_PSRAD
);
12586 def_builtin (MASK_MMX
, "__builtin_ia32_pshufw", v4hi_ftype_v4hi_int
, IX86_BUILTIN_PSHUFW
);
12587 def_builtin (MASK_MMX
, "__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi
, IX86_BUILTIN_PMADDWD
);
12589 /* comi/ucomi insns. */
12590 for (i
= 0, d
= bdesc_comi
; i
< ARRAY_SIZE (bdesc_comi
); i
++, d
++)
12591 if (d
->mask
== MASK_SSE2
)
12592 def_builtin (d
->mask
, d
->name
, int_ftype_v2df_v2df
, d
->code
);
12594 def_builtin (d
->mask
, d
->name
, int_ftype_v4sf_v4sf
, d
->code
);
12596 def_builtin (MASK_MMX
, "__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi
, IX86_BUILTIN_PACKSSWB
);
12597 def_builtin (MASK_MMX
, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si
, IX86_BUILTIN_PACKSSDW
);
12598 def_builtin (MASK_MMX
, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi
, IX86_BUILTIN_PACKUSWB
);
12600 def_builtin (MASK_SSE1
, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si
, IX86_BUILTIN_CVTPI2PS
);
12601 def_builtin (MASK_SSE1
, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf
, IX86_BUILTIN_CVTPS2PI
);
12602 def_builtin (MASK_SSE1
, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int
, IX86_BUILTIN_CVTSI2SS
);
12603 def_builtin (MASK_SSE1
, "__builtin_ia32_cvtss2si", int_ftype_v4sf
, IX86_BUILTIN_CVTSS2SI
);
12604 def_builtin (MASK_SSE1
, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf
, IX86_BUILTIN_CVTTPS2PI
);
12605 def_builtin (MASK_SSE1
, "__builtin_ia32_cvttss2si", int_ftype_v4sf
, IX86_BUILTIN_CVTTSS2SI
);
12607 def_builtin (MASK_SSE1
| MASK_3DNOW_A
, "__builtin_ia32_pextrw", int_ftype_v4hi_int
, IX86_BUILTIN_PEXTRW
);
12608 def_builtin (MASK_SSE1
| MASK_3DNOW_A
, "__builtin_ia32_pinsrw", v4hi_ftype_v4hi_int_int
, IX86_BUILTIN_PINSRW
);
12610 def_builtin (MASK_SSE1
| MASK_3DNOW_A
, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar
, IX86_BUILTIN_MASKMOVQ
);
12612 def_builtin (MASK_SSE1
, "__builtin_ia32_loadaps", v4sf_ftype_pfloat
, IX86_BUILTIN_LOADAPS
);
12613 def_builtin (MASK_SSE1
, "__builtin_ia32_loadups", v4sf_ftype_pfloat
, IX86_BUILTIN_LOADUPS
);
12614 def_builtin (MASK_SSE1
, "__builtin_ia32_loadss", v4sf_ftype_pfloat
, IX86_BUILTIN_LOADSS
);
12615 def_builtin (MASK_SSE1
, "__builtin_ia32_storeaps", void_ftype_pfloat_v4sf
, IX86_BUILTIN_STOREAPS
);
12616 def_builtin (MASK_SSE1
, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf
, IX86_BUILTIN_STOREUPS
);
12617 def_builtin (MASK_SSE1
, "__builtin_ia32_storess", void_ftype_pfloat_v4sf
, IX86_BUILTIN_STORESS
);
12619 def_builtin (MASK_SSE1
, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si
, IX86_BUILTIN_LOADHPS
);
12620 def_builtin (MASK_SSE1
, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si
, IX86_BUILTIN_LOADLPS
);
12621 def_builtin (MASK_SSE1
, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf
, IX86_BUILTIN_STOREHPS
);
12622 def_builtin (MASK_SSE1
, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf
, IX86_BUILTIN_STORELPS
);
12624 def_builtin (MASK_SSE1
, "__builtin_ia32_movmskps", int_ftype_v4sf
, IX86_BUILTIN_MOVMSKPS
);
12625 def_builtin (MASK_SSE1
| MASK_3DNOW_A
, "__builtin_ia32_pmovmskb", int_ftype_v8qi
, IX86_BUILTIN_PMOVMSKB
);
12626 def_builtin (MASK_SSE1
, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf
, IX86_BUILTIN_MOVNTPS
);
12627 def_builtin (MASK_SSE1
| MASK_3DNOW_A
, "__builtin_ia32_movntq", void_ftype_pdi_di
, IX86_BUILTIN_MOVNTQ
);
12629 def_builtin (MASK_SSE1
| MASK_3DNOW_A
, "__builtin_ia32_sfence", void_ftype_void
, IX86_BUILTIN_SFENCE
);
12631 def_builtin (MASK_SSE1
| MASK_3DNOW_A
, "__builtin_ia32_psadbw", di_ftype_v8qi_v8qi
, IX86_BUILTIN_PSADBW
);
12633 def_builtin (MASK_SSE1
, "__builtin_ia32_rcpps", v4sf_ftype_v4sf
, IX86_BUILTIN_RCPPS
);
12634 def_builtin (MASK_SSE1
, "__builtin_ia32_rcpss", v4sf_ftype_v4sf
, IX86_BUILTIN_RCPSS
);
12635 def_builtin (MASK_SSE1
, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf
, IX86_BUILTIN_RSQRTPS
);
12636 def_builtin (MASK_SSE1
, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf
, IX86_BUILTIN_RSQRTSS
);
12637 def_builtin (MASK_SSE1
, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf
, IX86_BUILTIN_SQRTPS
);
12638 def_builtin (MASK_SSE1
, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf
, IX86_BUILTIN_SQRTSS
);
12640 def_builtin (MASK_SSE1
, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int
, IX86_BUILTIN_SHUFPS
);
12642 /* Original 3DNow! */
12643 def_builtin (MASK_3DNOW
, "__builtin_ia32_femms", void_ftype_void
, IX86_BUILTIN_FEMMS
);
12644 def_builtin (MASK_3DNOW
, "__builtin_ia32_pavgusb", v8qi_ftype_v8qi_v8qi
, IX86_BUILTIN_PAVGUSB
);
12645 def_builtin (MASK_3DNOW
, "__builtin_ia32_pf2id", v2si_ftype_v2sf
, IX86_BUILTIN_PF2ID
);
12646 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfacc", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFACC
);
12647 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfadd", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFADD
);
12648 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfcmpeq", v2si_ftype_v2sf_v2sf
, IX86_BUILTIN_PFCMPEQ
);
12649 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfcmpge", v2si_ftype_v2sf_v2sf
, IX86_BUILTIN_PFCMPGE
);
12650 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfcmpgt", v2si_ftype_v2sf_v2sf
, IX86_BUILTIN_PFCMPGT
);
12651 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfmax", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFMAX
);
12652 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfmin", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFMIN
);
12653 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfmul", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFMUL
);
12654 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfrcp", v2sf_ftype_v2sf
, IX86_BUILTIN_PFRCP
);
12655 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfrcpit1", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFRCPIT1
);
12656 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfrcpit2", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFRCPIT2
);
12657 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfrsqrt", v2sf_ftype_v2sf
, IX86_BUILTIN_PFRSQRT
);
12658 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfrsqit1", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFRSQIT1
);
12659 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfsub", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFSUB
);
12660 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFSUBR
);
12661 def_builtin (MASK_3DNOW
, "__builtin_ia32_pi2fd", v2sf_ftype_v2si
, IX86_BUILTIN_PI2FD
);
12662 def_builtin (MASK_3DNOW
, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi
, IX86_BUILTIN_PMULHRW
);
12664 /* 3DNow! extension as used in the Athlon CPU. */
12665 def_builtin (MASK_3DNOW_A
, "__builtin_ia32_pf2iw", v2si_ftype_v2sf
, IX86_BUILTIN_PF2IW
);
12666 def_builtin (MASK_3DNOW_A
, "__builtin_ia32_pfnacc", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFNACC
);
12667 def_builtin (MASK_3DNOW_A
, "__builtin_ia32_pfpnacc", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFPNACC
);
12668 def_builtin (MASK_3DNOW_A
, "__builtin_ia32_pi2fw", v2sf_ftype_v2si
, IX86_BUILTIN_PI2FW
);
12669 def_builtin (MASK_3DNOW_A
, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf
, IX86_BUILTIN_PSWAPDSF
);
12670 def_builtin (MASK_3DNOW_A
, "__builtin_ia32_pswapdsi", v2si_ftype_v2si
, IX86_BUILTIN_PSWAPDSI
);
12672 def_builtin (MASK_SSE1
, "__builtin_ia32_setzerops", v4sf_ftype_void
, IX86_BUILTIN_SSE_ZERO
);
12675 def_builtin (MASK_SSE2
, "__builtin_ia32_pextrw128", int_ftype_v8hi_int
, IX86_BUILTIN_PEXTRW128
);
12676 def_builtin (MASK_SSE2
, "__builtin_ia32_pinsrw128", v8hi_ftype_v8hi_int_int
, IX86_BUILTIN_PINSRW128
);
12678 def_builtin (MASK_SSE2
, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar
, IX86_BUILTIN_MASKMOVDQU
);
12679 def_builtin (MASK_SSE2
, "__builtin_ia32_movq2dq", v2di_ftype_di
, IX86_BUILTIN_MOVQ2DQ
);
12680 def_builtin (MASK_SSE2
, "__builtin_ia32_movdq2q", di_ftype_v2di
, IX86_BUILTIN_MOVDQ2Q
);
12682 def_builtin (MASK_SSE2
, "__builtin_ia32_loadapd", v2df_ftype_pdouble
, IX86_BUILTIN_LOADAPD
);
12683 def_builtin (MASK_SSE2
, "__builtin_ia32_loadupd", v2df_ftype_pdouble
, IX86_BUILTIN_LOADUPD
);
12684 def_builtin (MASK_SSE2
, "__builtin_ia32_loadsd", v2df_ftype_pdouble
, IX86_BUILTIN_LOADSD
);
12685 def_builtin (MASK_SSE2
, "__builtin_ia32_storeapd", void_ftype_pdouble_v2df
, IX86_BUILTIN_STOREAPD
);
12686 def_builtin (MASK_SSE2
, "__builtin_ia32_storeupd", void_ftype_pdouble_v2df
, IX86_BUILTIN_STOREUPD
);
12687 def_builtin (MASK_SSE2
, "__builtin_ia32_storesd", void_ftype_pdouble_v2df
, IX86_BUILTIN_STORESD
);
12689 def_builtin (MASK_SSE2
, "__builtin_ia32_loadhpd", v2df_ftype_v2df_pv2si
, IX86_BUILTIN_LOADHPD
);
12690 def_builtin (MASK_SSE2
, "__builtin_ia32_loadlpd", v2df_ftype_v2df_pv2si
, IX86_BUILTIN_LOADLPD
);
12691 def_builtin (MASK_SSE2
, "__builtin_ia32_storehpd", void_ftype_pv2si_v2df
, IX86_BUILTIN_STOREHPD
);
12692 def_builtin (MASK_SSE2
, "__builtin_ia32_storelpd", void_ftype_pv2si_v2df
, IX86_BUILTIN_STORELPD
);
12694 def_builtin (MASK_SSE2
, "__builtin_ia32_movmskpd", int_ftype_v2df
, IX86_BUILTIN_MOVMSKPD
);
12695 def_builtin (MASK_SSE2
, "__builtin_ia32_pmovmskb128", int_ftype_v16qi
, IX86_BUILTIN_PMOVMSKB128
);
12696 def_builtin (MASK_SSE2
, "__builtin_ia32_movnti", void_ftype_pint_int
, IX86_BUILTIN_MOVNTI
);
12697 def_builtin (MASK_SSE2
, "__builtin_ia32_movntpd", void_ftype_pdouble_v2df
, IX86_BUILTIN_MOVNTPD
);
12698 def_builtin (MASK_SSE2
, "__builtin_ia32_movntdq", void_ftype_pv2di_v2di
, IX86_BUILTIN_MOVNTDQ
);
12700 def_builtin (MASK_SSE2
, "__builtin_ia32_pshufd", v4si_ftype_v4si_int
, IX86_BUILTIN_PSHUFD
);
12701 def_builtin (MASK_SSE2
, "__builtin_ia32_pshuflw", v8hi_ftype_v8hi_int
, IX86_BUILTIN_PSHUFLW
);
12702 def_builtin (MASK_SSE2
, "__builtin_ia32_pshufhw", v8hi_ftype_v8hi_int
, IX86_BUILTIN_PSHUFHW
);
12703 def_builtin (MASK_SSE2
, "__builtin_ia32_psadbw128", v2di_ftype_v16qi_v16qi
, IX86_BUILTIN_PSADBW128
);
12705 def_builtin (MASK_SSE2
, "__builtin_ia32_sqrtpd", v2df_ftype_v2df
, IX86_BUILTIN_SQRTPD
);
12706 def_builtin (MASK_SSE2
, "__builtin_ia32_sqrtsd", v2df_ftype_v2df
, IX86_BUILTIN_SQRTSD
);
12708 def_builtin (MASK_SSE2
, "__builtin_ia32_shufpd", v2df_ftype_v2df_v2df_int
, IX86_BUILTIN_SHUFPD
);
12710 def_builtin (MASK_SSE2
, "__builtin_ia32_cvtdq2pd", v2df_ftype_v4si
, IX86_BUILTIN_CVTDQ2PD
);
12711 def_builtin (MASK_SSE2
, "__builtin_ia32_cvtdq2ps", v4sf_ftype_v4si
, IX86_BUILTIN_CVTDQ2PS
);
12713 def_builtin (MASK_SSE2
, "__builtin_ia32_cvtpd2dq", v4si_ftype_v2df
, IX86_BUILTIN_CVTPD2DQ
);
12714 def_builtin (MASK_SSE2
, "__builtin_ia32_cvtpd2pi", v2si_ftype_v2df
, IX86_BUILTIN_CVTPD2PI
);
12715 def_builtin (MASK_SSE2
, "__builtin_ia32_cvtpd2ps", v4sf_ftype_v2df
, IX86_BUILTIN_CVTPD2PS
);
12716 def_builtin (MASK_SSE2
, "__builtin_ia32_cvttpd2dq", v4si_ftype_v2df
, IX86_BUILTIN_CVTTPD2DQ
);
12717 def_builtin (MASK_SSE2
, "__builtin_ia32_cvttpd2pi", v2si_ftype_v2df
, IX86_BUILTIN_CVTTPD2PI
);
12719 def_builtin (MASK_SSE2
, "__builtin_ia32_cvtpi2pd", v2df_ftype_v2si
, IX86_BUILTIN_CVTPI2PD
);
12721 def_builtin (MASK_SSE2
, "__builtin_ia32_cvtsd2si", int_ftype_v2df
, IX86_BUILTIN_CVTSD2SI
);
12722 def_builtin (MASK_SSE2
, "__builtin_ia32_cvttsd2si", int_ftype_v2df
, IX86_BUILTIN_CVTTSD2SI
);
12724 def_builtin (MASK_SSE2
, "__builtin_ia32_cvtps2dq", v4si_ftype_v4sf
, IX86_BUILTIN_CVTPS2DQ
);
12725 def_builtin (MASK_SSE2
, "__builtin_ia32_cvtps2pd", v2df_ftype_v4sf
, IX86_BUILTIN_CVTPS2PD
);
12726 def_builtin (MASK_SSE2
, "__builtin_ia32_cvttps2dq", v4si_ftype_v4sf
, IX86_BUILTIN_CVTTPS2DQ
);
12728 def_builtin (MASK_SSE2
, "__builtin_ia32_cvtsi2sd", v2df_ftype_v2df_int
, IX86_BUILTIN_CVTSI2SD
);
12729 def_builtin (MASK_SSE2
, "__builtin_ia32_cvtsd2ss", v4sf_ftype_v4sf_v2df
, IX86_BUILTIN_CVTSD2SS
);
12730 def_builtin (MASK_SSE2
, "__builtin_ia32_cvtss2sd", v2df_ftype_v2df_v4sf
, IX86_BUILTIN_CVTSS2SD
);
12732 def_builtin (MASK_SSE2
, "__builtin_ia32_setpd1", v2df_ftype_double
, IX86_BUILTIN_SETPD1
);
12733 def_builtin (MASK_SSE2
, "__builtin_ia32_setpd", v2df_ftype_double_double
, IX86_BUILTIN_SETPD
);
12734 def_builtin (MASK_SSE2
, "__builtin_ia32_setzeropd", ti_ftype_void
, IX86_BUILTIN_CLRPD
);
12735 def_builtin (MASK_SSE2
, "__builtin_ia32_loadpd1", v2df_ftype_pdouble
, IX86_BUILTIN_LOADPD1
);
12736 def_builtin (MASK_SSE2
, "__builtin_ia32_loadrpd", v2df_ftype_pdouble
, IX86_BUILTIN_LOADRPD
);
12737 def_builtin (MASK_SSE2
, "__builtin_ia32_storepd1", void_ftype_pdouble_v2df
, IX86_BUILTIN_STOREPD1
);
12738 def_builtin (MASK_SSE2
, "__builtin_ia32_storerpd", void_ftype_pdouble_v2df
, IX86_BUILTIN_STORERPD
);
12740 def_builtin (MASK_SSE2
, "__builtin_ia32_clflush", void_ftype_pvoid
, IX86_BUILTIN_CLFLUSH
);
12741 def_builtin (MASK_SSE2
, "__builtin_ia32_lfence", void_ftype_void
, IX86_BUILTIN_LFENCE
);
12742 def_builtin (MASK_SSE2
, "__builtin_ia32_mfence", void_ftype_void
, IX86_BUILTIN_MFENCE
);
12744 def_builtin (MASK_SSE2
, "__builtin_ia32_loaddqa", v16qi_ftype_pchar
, IX86_BUILTIN_LOADDQA
);
12745 def_builtin (MASK_SSE2
, "__builtin_ia32_loaddqu", v16qi_ftype_pchar
, IX86_BUILTIN_LOADDQU
);
12746 def_builtin (MASK_SSE2
, "__builtin_ia32_loadd", v4si_ftype_pchar
, IX86_BUILTIN_LOADD
);
12747 def_builtin (MASK_SSE2
, "__builtin_ia32_storedqa", void_ftype_pchar_v16qi
, IX86_BUILTIN_STOREDQA
);
12748 def_builtin (MASK_SSE2
, "__builtin_ia32_storedqu", void_ftype_pchar_v16qi
, IX86_BUILTIN_STOREDQU
);
12749 def_builtin (MASK_SSE2
, "__builtin_ia32_stored", void_ftype_pchar_v4si
, IX86_BUILTIN_STORED
);
12750 def_builtin (MASK_SSE2
, "__builtin_ia32_movq", v2di_ftype_v2di
, IX86_BUILTIN_MOVQ
);
12752 def_builtin (MASK_SSE1
, "__builtin_ia32_setzero128", v2di_ftype_void
, IX86_BUILTIN_CLRTI
);
12754 def_builtin (MASK_SSE2
, "__builtin_ia32_psllw128", v8hi_ftype_v8hi_v2di
, IX86_BUILTIN_PSLLW128
);
12755 def_builtin (MASK_SSE2
, "__builtin_ia32_pslld128", v4si_ftype_v4si_v2di
, IX86_BUILTIN_PSLLD128
);
12756 def_builtin (MASK_SSE2
, "__builtin_ia32_psllq128", v2di_ftype_v2di_v2di
, IX86_BUILTIN_PSLLQ128
);
12758 def_builtin (MASK_SSE2
, "__builtin_ia32_psrlw128", v8hi_ftype_v8hi_v2di
, IX86_BUILTIN_PSRLW128
);
12759 def_builtin (MASK_SSE2
, "__builtin_ia32_psrld128", v4si_ftype_v4si_v2di
, IX86_BUILTIN_PSRLD128
);
12760 def_builtin (MASK_SSE2
, "__builtin_ia32_psrlq128", v2di_ftype_v2di_v2di
, IX86_BUILTIN_PSRLQ128
);
12762 def_builtin (MASK_SSE2
, "__builtin_ia32_psraw128", v8hi_ftype_v8hi_v2di
, IX86_BUILTIN_PSRAW128
);
12763 def_builtin (MASK_SSE2
, "__builtin_ia32_psrad128", v4si_ftype_v4si_v2di
, IX86_BUILTIN_PSRAD128
);
12765 def_builtin (MASK_SSE2
, "__builtin_ia32_pslldqi128", v2di_ftype_v2di_int
, IX86_BUILTIN_PSLLDQI128
);
12766 def_builtin (MASK_SSE2
, "__builtin_ia32_psllwi128", v8hi_ftype_v8hi_int
, IX86_BUILTIN_PSLLWI128
);
12767 def_builtin (MASK_SSE2
, "__builtin_ia32_pslldi128", v4si_ftype_v4si_int
, IX86_BUILTIN_PSLLDI128
);
12768 def_builtin (MASK_SSE2
, "__builtin_ia32_psllqi128", v2di_ftype_v2di_int
, IX86_BUILTIN_PSLLQI128
);
12770 def_builtin (MASK_SSE2
, "__builtin_ia32_psrldqi128", v2di_ftype_v2di_int
, IX86_BUILTIN_PSRLDQI128
);
12771 def_builtin (MASK_SSE2
, "__builtin_ia32_psrlwi128", v8hi_ftype_v8hi_int
, IX86_BUILTIN_PSRLWI128
);
12772 def_builtin (MASK_SSE2
, "__builtin_ia32_psrldi128", v4si_ftype_v4si_int
, IX86_BUILTIN_PSRLDI128
);
12773 def_builtin (MASK_SSE2
, "__builtin_ia32_psrlqi128", v2di_ftype_v2di_int
, IX86_BUILTIN_PSRLQI128
);
12775 def_builtin (MASK_SSE2
, "__builtin_ia32_psrawi128", v8hi_ftype_v8hi_int
, IX86_BUILTIN_PSRAWI128
);
12776 def_builtin (MASK_SSE2
, "__builtin_ia32_psradi128", v4si_ftype_v4si_int
, IX86_BUILTIN_PSRADI128
);
12778 def_builtin (MASK_SSE2
, "__builtin_ia32_pmaddwd128", v4si_ftype_v8hi_v8hi
, IX86_BUILTIN_PMADDWD128
);
12781 /* Errors in the source file can cause expand_expr to return const0_rtx
12782 where we expect a vector. To avoid crashing, use one of the vector
12783 clear instructions. */
12785 safe_vector_operand (x
, mode
)
12787 enum machine_mode mode
;
12789 if (x
!= const0_rtx
)
12791 x
= gen_reg_rtx (mode
);
12793 if (VALID_MMX_REG_MODE (mode
) || VALID_MMX_REG_MODE_3DNOW (mode
))
12794 emit_insn (gen_mmx_clrdi (mode
== DImode
? x
12795 : gen_rtx_SUBREG (DImode
, x
, 0)));
12797 emit_insn (gen_sse_clrv4sf (mode
== V4SFmode
? x
12798 : gen_rtx_SUBREG (V4SFmode
, x
, 0)));
12802 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
12805 ix86_expand_binop_builtin (icode
, arglist
, target
)
12806 enum insn_code icode
;
12811 tree arg0
= TREE_VALUE (arglist
);
12812 tree arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
12813 rtx op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
12814 rtx op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
12815 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
12816 enum machine_mode mode0
= insn_data
[icode
].operand
[1].mode
;
12817 enum machine_mode mode1
= insn_data
[icode
].operand
[2].mode
;
12819 if (VECTOR_MODE_P (mode0
))
12820 op0
= safe_vector_operand (op0
, mode0
);
12821 if (VECTOR_MODE_P (mode1
))
12822 op1
= safe_vector_operand (op1
, mode1
);
12825 || GET_MODE (target
) != tmode
12826 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
12827 target
= gen_reg_rtx (tmode
);
12829 /* In case the insn wants input operands in modes different from
12830 the result, abort. */
12831 if (GET_MODE (op0
) != mode0
|| GET_MODE (op1
) != mode1
)
12834 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
12835 op0
= copy_to_mode_reg (mode0
, op0
);
12836 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
12837 op1
= copy_to_mode_reg (mode1
, op1
);
12839 /* In the commutative cases, both op0 and op1 are nonimmediate_operand,
12840 yet one of the two must not be a memory. This is normally enforced
12841 by expanders, but we didn't bother to create one here. */
12842 if (GET_CODE (op0
) == MEM
&& GET_CODE (op1
) == MEM
)
12843 op0
= copy_to_mode_reg (mode0
, op0
);
12845 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
12852 /* Subroutine of ix86_expand_builtin to take care of stores. */
12855 ix86_expand_store_builtin (icode
, arglist
)
12856 enum insn_code icode
;
12860 tree arg0
= TREE_VALUE (arglist
);
12861 tree arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
12862 rtx op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
12863 rtx op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
12864 enum machine_mode mode0
= insn_data
[icode
].operand
[0].mode
;
12865 enum machine_mode mode1
= insn_data
[icode
].operand
[1].mode
;
12867 if (VECTOR_MODE_P (mode1
))
12868 op1
= safe_vector_operand (op1
, mode1
);
12870 op0
= gen_rtx_MEM (mode0
, copy_to_mode_reg (Pmode
, op0
));
12872 if (! (*insn_data
[icode
].operand
[1].predicate
) (op1
, mode1
))
12873 op1
= copy_to_mode_reg (mode1
, op1
);
12875 pat
= GEN_FCN (icode
) (op0
, op1
);
12881 /* Subroutine of ix86_expand_builtin to take care of unop insns. */
12884 ix86_expand_unop_builtin (icode
, arglist
, target
, do_load
)
12885 enum insn_code icode
;
12891 tree arg0
= TREE_VALUE (arglist
);
12892 rtx op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
12893 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
12894 enum machine_mode mode0
= insn_data
[icode
].operand
[1].mode
;
12897 || GET_MODE (target
) != tmode
12898 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
12899 target
= gen_reg_rtx (tmode
);
12901 op0
= gen_rtx_MEM (mode0
, copy_to_mode_reg (Pmode
, op0
));
12904 if (VECTOR_MODE_P (mode0
))
12905 op0
= safe_vector_operand (op0
, mode0
);
12907 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
12908 op0
= copy_to_mode_reg (mode0
, op0
);
12911 pat
= GEN_FCN (icode
) (target
, op0
);
12918 /* Subroutine of ix86_expand_builtin to take care of three special unop insns:
12919 sqrtss, rsqrtss, rcpss. */
12922 ix86_expand_unop1_builtin (icode
, arglist
, target
)
12923 enum insn_code icode
;
12928 tree arg0
= TREE_VALUE (arglist
);
12929 rtx op1
, op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
12930 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
12931 enum machine_mode mode0
= insn_data
[icode
].operand
[1].mode
;
12934 || GET_MODE (target
) != tmode
12935 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
12936 target
= gen_reg_rtx (tmode
);
12938 if (VECTOR_MODE_P (mode0
))
12939 op0
= safe_vector_operand (op0
, mode0
);
12941 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
12942 op0
= copy_to_mode_reg (mode0
, op0
);
12945 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode0
))
12946 op1
= copy_to_mode_reg (mode0
, op1
);
12948 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
12955 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
12958 ix86_expand_sse_compare (d
, arglist
, target
)
12959 const struct builtin_description
*d
;
12964 tree arg0
= TREE_VALUE (arglist
);
12965 tree arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
12966 rtx op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
12967 rtx op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
12969 enum machine_mode tmode
= insn_data
[d
->icode
].operand
[0].mode
;
12970 enum machine_mode mode0
= insn_data
[d
->icode
].operand
[1].mode
;
12971 enum machine_mode mode1
= insn_data
[d
->icode
].operand
[2].mode
;
12972 enum rtx_code comparison
= d
->comparison
;
12974 if (VECTOR_MODE_P (mode0
))
12975 op0
= safe_vector_operand (op0
, mode0
);
12976 if (VECTOR_MODE_P (mode1
))
12977 op1
= safe_vector_operand (op1
, mode1
);
12979 /* Swap operands if we have a comparison that isn't available in
12983 rtx tmp
= gen_reg_rtx (mode1
);
12984 emit_move_insn (tmp
, op1
);
12990 || GET_MODE (target
) != tmode
12991 || ! (*insn_data
[d
->icode
].operand
[0].predicate
) (target
, tmode
))
12992 target
= gen_reg_rtx (tmode
);
12994 if (! (*insn_data
[d
->icode
].operand
[1].predicate
) (op0
, mode0
))
12995 op0
= copy_to_mode_reg (mode0
, op0
);
12996 if (! (*insn_data
[d
->icode
].operand
[2].predicate
) (op1
, mode1
))
12997 op1
= copy_to_mode_reg (mode1
, op1
);
12999 op2
= gen_rtx_fmt_ee (comparison
, mode0
, op0
, op1
);
13000 pat
= GEN_FCN (d
->icode
) (target
, op0
, op1
, op2
);
13007 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
13010 ix86_expand_sse_comi (d
, arglist
, target
)
13011 const struct builtin_description
*d
;
13016 tree arg0
= TREE_VALUE (arglist
);
13017 tree arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
13018 rtx op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
13019 rtx op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
13021 enum machine_mode mode0
= insn_data
[d
->icode
].operand
[0].mode
;
13022 enum machine_mode mode1
= insn_data
[d
->icode
].operand
[1].mode
;
13023 enum rtx_code comparison
= d
->comparison
;
13025 if (VECTOR_MODE_P (mode0
))
13026 op0
= safe_vector_operand (op0
, mode0
);
13027 if (VECTOR_MODE_P (mode1
))
13028 op1
= safe_vector_operand (op1
, mode1
);
13030 /* Swap operands if we have a comparison that isn't available in
13039 target
= gen_reg_rtx (SImode
);
13040 emit_move_insn (target
, const0_rtx
);
13041 target
= gen_rtx_SUBREG (QImode
, target
, 0);
13043 if (! (*insn_data
[d
->icode
].operand
[0].predicate
) (op0
, mode0
))
13044 op0
= copy_to_mode_reg (mode0
, op0
);
13045 if (! (*insn_data
[d
->icode
].operand
[1].predicate
) (op1
, mode1
))
13046 op1
= copy_to_mode_reg (mode1
, op1
);
13048 op2
= gen_rtx_fmt_ee (comparison
, mode0
, op0
, op1
);
13049 pat
= GEN_FCN (d
->icode
) (op0
, op1
);
13053 emit_insn (gen_rtx_SET (VOIDmode
,
13054 gen_rtx_STRICT_LOW_PART (VOIDmode
, target
),
13055 gen_rtx_fmt_ee (comparison
, QImode
,
13059 return SUBREG_REG (target
);
13062 /* Expand an expression EXP that calls a built-in function,
13063 with result going to TARGET if that's convenient
13064 (and in mode MODE if that's convenient).
13065 SUBTARGET may be used as the target for computing one of EXP's operands.
13066 IGNORE is nonzero if the value is to be ignored. */
13069 ix86_expand_builtin (exp
, target
, subtarget
, mode
, ignore
)
13072 rtx subtarget ATTRIBUTE_UNUSED
;
13073 enum machine_mode mode ATTRIBUTE_UNUSED
;
13074 int ignore ATTRIBUTE_UNUSED
;
13076 const struct builtin_description
*d
;
13078 enum insn_code icode
;
13079 tree fndecl
= TREE_OPERAND (TREE_OPERAND (exp
, 0), 0);
13080 tree arglist
= TREE_OPERAND (exp
, 1);
13081 tree arg0
, arg1
, arg2
;
13082 rtx op0
, op1
, op2
, pat
;
13083 enum machine_mode tmode
, mode0
, mode1
, mode2
;
13084 unsigned int fcode
= DECL_FUNCTION_CODE (fndecl
);
13088 case IX86_BUILTIN_EMMS
:
13089 emit_insn (gen_emms ());
13092 case IX86_BUILTIN_SFENCE
:
13093 emit_insn (gen_sfence ());
13096 case IX86_BUILTIN_PEXTRW
:
13097 case IX86_BUILTIN_PEXTRW128
:
13098 icode
= (fcode
== IX86_BUILTIN_PEXTRW
13099 ? CODE_FOR_mmx_pextrw
13100 : CODE_FOR_sse2_pextrw
);
13101 arg0
= TREE_VALUE (arglist
);
13102 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
13103 op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
13104 op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
13105 tmode
= insn_data
[icode
].operand
[0].mode
;
13106 mode0
= insn_data
[icode
].operand
[1].mode
;
13107 mode1
= insn_data
[icode
].operand
[2].mode
;
13109 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
13110 op0
= copy_to_mode_reg (mode0
, op0
);
13111 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
13113 /* @@@ better error message */
13114 error ("selector must be an immediate");
13115 return gen_reg_rtx (tmode
);
13118 || GET_MODE (target
) != tmode
13119 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
13120 target
= gen_reg_rtx (tmode
);
13121 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
13127 case IX86_BUILTIN_PINSRW
:
13128 case IX86_BUILTIN_PINSRW128
:
13129 icode
= (fcode
== IX86_BUILTIN_PINSRW
13130 ? CODE_FOR_mmx_pinsrw
13131 : CODE_FOR_sse2_pinsrw
);
13132 arg0
= TREE_VALUE (arglist
);
13133 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
13134 arg2
= TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist
)));
13135 op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
13136 op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
13137 op2
= expand_expr (arg2
, NULL_RTX
, VOIDmode
, 0);
13138 tmode
= insn_data
[icode
].operand
[0].mode
;
13139 mode0
= insn_data
[icode
].operand
[1].mode
;
13140 mode1
= insn_data
[icode
].operand
[2].mode
;
13141 mode2
= insn_data
[icode
].operand
[3].mode
;
13143 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
13144 op0
= copy_to_mode_reg (mode0
, op0
);
13145 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
13146 op1
= copy_to_mode_reg (mode1
, op1
);
13147 if (! (*insn_data
[icode
].operand
[3].predicate
) (op2
, mode2
))
13149 /* @@@ better error message */
13150 error ("selector must be an immediate");
13154 || GET_MODE (target
) != tmode
13155 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
13156 target
= gen_reg_rtx (tmode
);
13157 pat
= GEN_FCN (icode
) (target
, op0
, op1
, op2
);
13163 case IX86_BUILTIN_MASKMOVQ
:
13164 case IX86_BUILTIN_MASKMOVDQU
:
13165 icode
= (fcode
== IX86_BUILTIN_MASKMOVQ
13166 ? (TARGET_64BIT
? CODE_FOR_mmx_maskmovq_rex
: CODE_FOR_mmx_maskmovq
)
13167 : (TARGET_64BIT
? CODE_FOR_sse2_maskmovdqu_rex64
13168 : CODE_FOR_sse2_maskmovdqu
));
13169 /* Note the arg order is different from the operand order. */
13170 arg1
= TREE_VALUE (arglist
);
13171 arg2
= TREE_VALUE (TREE_CHAIN (arglist
));
13172 arg0
= TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist
)));
13173 op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
13174 op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
13175 op2
= expand_expr (arg2
, NULL_RTX
, VOIDmode
, 0);
13176 mode0
= insn_data
[icode
].operand
[0].mode
;
13177 mode1
= insn_data
[icode
].operand
[1].mode
;
13178 mode2
= insn_data
[icode
].operand
[2].mode
;
13180 if (! (*insn_data
[icode
].operand
[0].predicate
) (op0
, mode0
))
13181 op0
= copy_to_mode_reg (mode0
, op0
);
13182 if (! (*insn_data
[icode
].operand
[1].predicate
) (op1
, mode1
))
13183 op1
= copy_to_mode_reg (mode1
, op1
);
13184 if (! (*insn_data
[icode
].operand
[2].predicate
) (op2
, mode2
))
13185 op2
= copy_to_mode_reg (mode2
, op2
);
13186 pat
= GEN_FCN (icode
) (op0
, op1
, op2
);
13192 case IX86_BUILTIN_SQRTSS
:
13193 return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv4sf2
, arglist
, target
);
13194 case IX86_BUILTIN_RSQRTSS
:
13195 return ix86_expand_unop1_builtin (CODE_FOR_vmrsqrtv4sf2
, arglist
, target
);
13196 case IX86_BUILTIN_RCPSS
:
13197 return ix86_expand_unop1_builtin (CODE_FOR_vmrcpv4sf2
, arglist
, target
);
13199 case IX86_BUILTIN_LOADAPS
:
13200 return ix86_expand_unop_builtin (CODE_FOR_sse_movaps
, arglist
, target
, 1);
13202 case IX86_BUILTIN_LOADUPS
:
13203 return ix86_expand_unop_builtin (CODE_FOR_sse_movups
, arglist
, target
, 1);
13205 case IX86_BUILTIN_STOREAPS
:
13206 return ix86_expand_store_builtin (CODE_FOR_sse_movaps
, arglist
);
13208 case IX86_BUILTIN_STOREUPS
:
13209 return ix86_expand_store_builtin (CODE_FOR_sse_movups
, arglist
);
13211 case IX86_BUILTIN_LOADSS
:
13212 return ix86_expand_unop_builtin (CODE_FOR_sse_loadss
, arglist
, target
, 1);
13214 case IX86_BUILTIN_STORESS
:
13215 return ix86_expand_store_builtin (CODE_FOR_sse_storess
, arglist
);
13217 case IX86_BUILTIN_LOADHPS
:
13218 case IX86_BUILTIN_LOADLPS
:
13219 case IX86_BUILTIN_LOADHPD
:
13220 case IX86_BUILTIN_LOADLPD
:
13221 icode
= (fcode
== IX86_BUILTIN_LOADHPS
? CODE_FOR_sse_movhps
13222 : fcode
== IX86_BUILTIN_LOADLPS
? CODE_FOR_sse_movlps
13223 : fcode
== IX86_BUILTIN_LOADHPD
? CODE_FOR_sse2_movhpd
13224 : CODE_FOR_sse2_movlpd
);
13225 arg0
= TREE_VALUE (arglist
);
13226 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
13227 op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
13228 op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
13229 tmode
= insn_data
[icode
].operand
[0].mode
;
13230 mode0
= insn_data
[icode
].operand
[1].mode
;
13231 mode1
= insn_data
[icode
].operand
[2].mode
;
13233 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
13234 op0
= copy_to_mode_reg (mode0
, op0
);
13235 op1
= gen_rtx_MEM (mode1
, copy_to_mode_reg (Pmode
, op1
));
13237 || GET_MODE (target
) != tmode
13238 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
13239 target
= gen_reg_rtx (tmode
);
13240 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
13246 case IX86_BUILTIN_STOREHPS
:
13247 case IX86_BUILTIN_STORELPS
:
13248 case IX86_BUILTIN_STOREHPD
:
13249 case IX86_BUILTIN_STORELPD
:
13250 icode
= (fcode
== IX86_BUILTIN_STOREHPS
? CODE_FOR_sse_movhps
13251 : fcode
== IX86_BUILTIN_STORELPS
? CODE_FOR_sse_movlps
13252 : fcode
== IX86_BUILTIN_STOREHPD
? CODE_FOR_sse2_movhpd
13253 : CODE_FOR_sse2_movlpd
);
13254 arg0
= TREE_VALUE (arglist
);
13255 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
13256 op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
13257 op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
13258 mode0
= insn_data
[icode
].operand
[1].mode
;
13259 mode1
= insn_data
[icode
].operand
[2].mode
;
13261 op0
= gen_rtx_MEM (mode0
, copy_to_mode_reg (Pmode
, op0
));
13262 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
13263 op1
= copy_to_mode_reg (mode1
, op1
);
13265 pat
= GEN_FCN (icode
) (op0
, op0
, op1
);
13271 case IX86_BUILTIN_MOVNTPS
:
13272 return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf
, arglist
);
13273 case IX86_BUILTIN_MOVNTQ
:
13274 return ix86_expand_store_builtin (CODE_FOR_sse_movntdi
, arglist
);
13276 case IX86_BUILTIN_LDMXCSR
:
13277 op0
= expand_expr (TREE_VALUE (arglist
), NULL_RTX
, VOIDmode
, 0);
13278 target
= assign_386_stack_local (SImode
, 0);
13279 emit_move_insn (target
, op0
);
13280 emit_insn (gen_ldmxcsr (target
));
13283 case IX86_BUILTIN_STMXCSR
:
13284 target
= assign_386_stack_local (SImode
, 0);
13285 emit_insn (gen_stmxcsr (target
));
13286 return copy_to_mode_reg (SImode
, target
);
13288 case IX86_BUILTIN_SHUFPS
:
13289 case IX86_BUILTIN_SHUFPD
:
13290 icode
= (fcode
== IX86_BUILTIN_SHUFPS
13291 ? CODE_FOR_sse_shufps
13292 : CODE_FOR_sse2_shufpd
);
13293 arg0
= TREE_VALUE (arglist
);
13294 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
13295 arg2
= TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist
)));
13296 op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
13297 op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
13298 op2
= expand_expr (arg2
, NULL_RTX
, VOIDmode
, 0);
13299 tmode
= insn_data
[icode
].operand
[0].mode
;
13300 mode0
= insn_data
[icode
].operand
[1].mode
;
13301 mode1
= insn_data
[icode
].operand
[2].mode
;
13302 mode2
= insn_data
[icode
].operand
[3].mode
;
13304 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
13305 op0
= copy_to_mode_reg (mode0
, op0
);
13306 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
13307 op1
= copy_to_mode_reg (mode1
, op1
);
13308 if (! (*insn_data
[icode
].operand
[3].predicate
) (op2
, mode2
))
13310 /* @@@ better error message */
13311 error ("mask must be an immediate");
13312 return gen_reg_rtx (tmode
);
13315 || GET_MODE (target
) != tmode
13316 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
13317 target
= gen_reg_rtx (tmode
);
13318 pat
= GEN_FCN (icode
) (target
, op0
, op1
, op2
);
13324 case IX86_BUILTIN_PSHUFW
:
13325 case IX86_BUILTIN_PSHUFD
:
13326 case IX86_BUILTIN_PSHUFHW
:
13327 case IX86_BUILTIN_PSHUFLW
:
13328 icode
= ( fcode
== IX86_BUILTIN_PSHUFHW
? CODE_FOR_sse2_pshufhw
13329 : fcode
== IX86_BUILTIN_PSHUFLW
? CODE_FOR_sse2_pshuflw
13330 : fcode
== IX86_BUILTIN_PSHUFD
? CODE_FOR_sse2_pshufd
13331 : CODE_FOR_mmx_pshufw
);
13332 arg0
= TREE_VALUE (arglist
);
13333 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
13334 op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
13335 op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
13336 tmode
= insn_data
[icode
].operand
[0].mode
;
13337 mode1
= insn_data
[icode
].operand
[1].mode
;
13338 mode2
= insn_data
[icode
].operand
[2].mode
;
13340 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode1
))
13341 op0
= copy_to_mode_reg (mode1
, op0
);
13342 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode2
))
13344 /* @@@ better error message */
13345 error ("mask must be an immediate");
13349 || GET_MODE (target
) != tmode
13350 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
13351 target
= gen_reg_rtx (tmode
);
13352 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
13358 case IX86_BUILTIN_PSLLDQI128
:
13359 case IX86_BUILTIN_PSRLDQI128
:
13360 icode
= ( fcode
== IX86_BUILTIN_PSLLDQI128
? CODE_FOR_sse2_ashlti3
13361 : CODE_FOR_sse2_lshrti3
);
13362 arg0
= TREE_VALUE (arglist
);
13363 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
13364 op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
13365 op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
13366 tmode
= insn_data
[icode
].operand
[0].mode
;
13367 mode1
= insn_data
[icode
].operand
[1].mode
;
13368 mode2
= insn_data
[icode
].operand
[2].mode
;
13370 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode1
))
13372 op0
= copy_to_reg (op0
);
13373 op0
= simplify_gen_subreg (mode1
, op0
, GET_MODE (op0
), 0);
13375 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode2
))
13377 error ("shift must be an immediate");
13380 target
= gen_reg_rtx (V2DImode
);
13381 pat
= GEN_FCN (icode
) (simplify_gen_subreg (tmode
, target
, V2DImode
, 0), op0
, op1
);
13387 case IX86_BUILTIN_FEMMS
:
13388 emit_insn (gen_femms ());
13391 case IX86_BUILTIN_PAVGUSB
:
13392 return ix86_expand_binop_builtin (CODE_FOR_pavgusb
, arglist
, target
);
13394 case IX86_BUILTIN_PF2ID
:
13395 return ix86_expand_unop_builtin (CODE_FOR_pf2id
, arglist
, target
, 0);
13397 case IX86_BUILTIN_PFACC
:
13398 return ix86_expand_binop_builtin (CODE_FOR_pfacc
, arglist
, target
);
13400 case IX86_BUILTIN_PFADD
:
13401 return ix86_expand_binop_builtin (CODE_FOR_addv2sf3
, arglist
, target
);
13403 case IX86_BUILTIN_PFCMPEQ
:
13404 return ix86_expand_binop_builtin (CODE_FOR_eqv2sf3
, arglist
, target
);
13406 case IX86_BUILTIN_PFCMPGE
:
13407 return ix86_expand_binop_builtin (CODE_FOR_gev2sf3
, arglist
, target
);
13409 case IX86_BUILTIN_PFCMPGT
:
13410 return ix86_expand_binop_builtin (CODE_FOR_gtv2sf3
, arglist
, target
);
13412 case IX86_BUILTIN_PFMAX
:
13413 return ix86_expand_binop_builtin (CODE_FOR_pfmaxv2sf3
, arglist
, target
);
13415 case IX86_BUILTIN_PFMIN
:
13416 return ix86_expand_binop_builtin (CODE_FOR_pfminv2sf3
, arglist
, target
);
13418 case IX86_BUILTIN_PFMUL
:
13419 return ix86_expand_binop_builtin (CODE_FOR_mulv2sf3
, arglist
, target
);
13421 case IX86_BUILTIN_PFRCP
:
13422 return ix86_expand_unop_builtin (CODE_FOR_pfrcpv2sf2
, arglist
, target
, 0);
13424 case IX86_BUILTIN_PFRCPIT1
:
13425 return ix86_expand_binop_builtin (CODE_FOR_pfrcpit1v2sf3
, arglist
, target
);
13427 case IX86_BUILTIN_PFRCPIT2
:
13428 return ix86_expand_binop_builtin (CODE_FOR_pfrcpit2v2sf3
, arglist
, target
);
13430 case IX86_BUILTIN_PFRSQIT1
:
13431 return ix86_expand_binop_builtin (CODE_FOR_pfrsqit1v2sf3
, arglist
, target
);
13433 case IX86_BUILTIN_PFRSQRT
:
13434 return ix86_expand_unop_builtin (CODE_FOR_pfrsqrtv2sf2
, arglist
, target
, 0);
13436 case IX86_BUILTIN_PFSUB
:
13437 return ix86_expand_binop_builtin (CODE_FOR_subv2sf3
, arglist
, target
);
13439 case IX86_BUILTIN_PFSUBR
:
13440 return ix86_expand_binop_builtin (CODE_FOR_subrv2sf3
, arglist
, target
);
13442 case IX86_BUILTIN_PI2FD
:
13443 return ix86_expand_unop_builtin (CODE_FOR_floatv2si2
, arglist
, target
, 0);
13445 case IX86_BUILTIN_PMULHRW
:
13446 return ix86_expand_binop_builtin (CODE_FOR_pmulhrwv4hi3
, arglist
, target
);
13448 case IX86_BUILTIN_PF2IW
:
13449 return ix86_expand_unop_builtin (CODE_FOR_pf2iw
, arglist
, target
, 0);
13451 case IX86_BUILTIN_PFNACC
:
13452 return ix86_expand_binop_builtin (CODE_FOR_pfnacc
, arglist
, target
);
13454 case IX86_BUILTIN_PFPNACC
:
13455 return ix86_expand_binop_builtin (CODE_FOR_pfpnacc
, arglist
, target
);
13457 case IX86_BUILTIN_PI2FW
:
13458 return ix86_expand_unop_builtin (CODE_FOR_pi2fw
, arglist
, target
, 0);
13460 case IX86_BUILTIN_PSWAPDSI
:
13461 return ix86_expand_unop_builtin (CODE_FOR_pswapdv2si2
, arglist
, target
, 0);
13463 case IX86_BUILTIN_PSWAPDSF
:
13464 return ix86_expand_unop_builtin (CODE_FOR_pswapdv2sf2
, arglist
, target
, 0);
13466 case IX86_BUILTIN_SSE_ZERO
:
13467 target
= gen_reg_rtx (V4SFmode
);
13468 emit_insn (gen_sse_clrv4sf (target
));
13471 case IX86_BUILTIN_MMX_ZERO
:
13472 target
= gen_reg_rtx (DImode
);
13473 emit_insn (gen_mmx_clrdi (target
));
13476 case IX86_BUILTIN_CLRTI
:
13477 target
= gen_reg_rtx (V2DImode
);
13478 emit_insn (gen_sse2_clrti (simplify_gen_subreg (TImode
, target
, V2DImode
, 0)));
13482 case IX86_BUILTIN_SQRTSD
:
13483 return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv2df2
, arglist
, target
);
13484 case IX86_BUILTIN_LOADAPD
:
13485 return ix86_expand_unop_builtin (CODE_FOR_sse2_movapd
, arglist
, target
, 1);
13486 case IX86_BUILTIN_LOADUPD
:
13487 return ix86_expand_unop_builtin (CODE_FOR_sse2_movupd
, arglist
, target
, 1);
13489 case IX86_BUILTIN_STOREAPD
:
13490 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd
, arglist
);
13491 case IX86_BUILTIN_STOREUPD
:
13492 return ix86_expand_store_builtin (CODE_FOR_sse2_movupd
, arglist
);
13494 case IX86_BUILTIN_LOADSD
:
13495 return ix86_expand_unop_builtin (CODE_FOR_sse2_loadsd
, arglist
, target
, 1);
13497 case IX86_BUILTIN_STORESD
:
13498 return ix86_expand_store_builtin (CODE_FOR_sse2_storesd
, arglist
);
13500 case IX86_BUILTIN_SETPD1
:
13501 target
= assign_386_stack_local (DFmode
, 0);
13502 arg0
= TREE_VALUE (arglist
);
13503 emit_move_insn (adjust_address (target
, DFmode
, 0),
13504 expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0));
13505 op0
= gen_reg_rtx (V2DFmode
);
13506 emit_insn (gen_sse2_loadsd (op0
, adjust_address (target
, V2DFmode
, 0)));
13507 emit_insn (gen_sse2_shufpd (op0
, op0
, op0
, GEN_INT (0)));
13510 case IX86_BUILTIN_SETPD
:
13511 target
= assign_386_stack_local (V2DFmode
, 0);
13512 arg0
= TREE_VALUE (arglist
);
13513 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
13514 emit_move_insn (adjust_address (target
, DFmode
, 0),
13515 expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0));
13516 emit_move_insn (adjust_address (target
, DFmode
, 8),
13517 expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0));
13518 op0
= gen_reg_rtx (V2DFmode
);
13519 emit_insn (gen_sse2_movapd (op0
, target
));
13522 case IX86_BUILTIN_LOADRPD
:
13523 target
= ix86_expand_unop_builtin (CODE_FOR_sse2_movapd
, arglist
,
13524 gen_reg_rtx (V2DFmode
), 1);
13525 emit_insn (gen_sse2_shufpd (target
, target
, target
, GEN_INT (1)));
13528 case IX86_BUILTIN_LOADPD1
:
13529 target
= ix86_expand_unop_builtin (CODE_FOR_sse2_loadsd
, arglist
,
13530 gen_reg_rtx (V2DFmode
), 1);
13531 emit_insn (gen_sse2_shufpd (target
, target
, target
, const0_rtx
));
13534 case IX86_BUILTIN_STOREPD1
:
13535 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd
, arglist
);
13536 case IX86_BUILTIN_STORERPD
:
13537 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd
, arglist
);
13539 case IX86_BUILTIN_CLRPD
:
13540 target
= gen_reg_rtx (V2DFmode
);
13541 emit_insn (gen_sse_clrv2df (target
));
13544 case IX86_BUILTIN_MFENCE
:
13545 emit_insn (gen_sse2_mfence ());
13547 case IX86_BUILTIN_LFENCE
:
13548 emit_insn (gen_sse2_lfence ());
13551 case IX86_BUILTIN_CLFLUSH
:
13552 arg0
= TREE_VALUE (arglist
);
13553 op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
13554 icode
= CODE_FOR_sse2_clflush
;
13555 if (! (*insn_data
[icode
].operand
[0].predicate
) (op0
, Pmode
))
13556 op0
= copy_to_mode_reg (Pmode
, op0
);
13558 emit_insn (gen_sse2_clflush (op0
));
13561 case IX86_BUILTIN_MOVNTPD
:
13562 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2df
, arglist
);
13563 case IX86_BUILTIN_MOVNTDQ
:
13564 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2di
, arglist
);
13565 case IX86_BUILTIN_MOVNTI
:
13566 return ix86_expand_store_builtin (CODE_FOR_sse2_movntsi
, arglist
);
13568 case IX86_BUILTIN_LOADDQA
:
13569 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqa
, arglist
, target
, 1);
13570 case IX86_BUILTIN_LOADDQU
:
13571 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqu
, arglist
, target
, 1);
13572 case IX86_BUILTIN_LOADD
:
13573 return ix86_expand_unop_builtin (CODE_FOR_sse2_loadd
, arglist
, target
, 1);
13575 case IX86_BUILTIN_STOREDQA
:
13576 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqa
, arglist
);
13577 case IX86_BUILTIN_STOREDQU
:
13578 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqu
, arglist
);
13579 case IX86_BUILTIN_STORED
:
13580 return ix86_expand_store_builtin (CODE_FOR_sse2_stored
, arglist
);
13586 for (i
= 0, d
= bdesc_2arg
; i
< ARRAY_SIZE (bdesc_2arg
); i
++, d
++)
13587 if (d
->code
== fcode
)
13589 /* Compares are treated specially. */
13590 if (d
->icode
== CODE_FOR_maskcmpv4sf3
13591 || d
->icode
== CODE_FOR_vmmaskcmpv4sf3
13592 || d
->icode
== CODE_FOR_maskncmpv4sf3
13593 || d
->icode
== CODE_FOR_vmmaskncmpv4sf3
13594 || d
->icode
== CODE_FOR_maskcmpv2df3
13595 || d
->icode
== CODE_FOR_vmmaskcmpv2df3
13596 || d
->icode
== CODE_FOR_maskncmpv2df3
13597 || d
->icode
== CODE_FOR_vmmaskncmpv2df3
)
13598 return ix86_expand_sse_compare (d
, arglist
, target
);
13600 return ix86_expand_binop_builtin (d
->icode
, arglist
, target
);
13603 for (i
= 0, d
= bdesc_1arg
; i
< ARRAY_SIZE (bdesc_1arg
); i
++, d
++)
13604 if (d
->code
== fcode
)
13605 return ix86_expand_unop_builtin (d
->icode
, arglist
, target
, 0);
13607 for (i
= 0, d
= bdesc_comi
; i
< ARRAY_SIZE (bdesc_comi
); i
++, d
++)
13608 if (d
->code
== fcode
)
13609 return ix86_expand_sse_comi (d
, arglist
, target
);
13611 /* @@@ Should really do something sensible here. */
13615 /* Store OPERAND to the memory after reload is completed. This means
13616 that we can't easily use assign_stack_local. */
13618 ix86_force_to_memory (mode
, operand
)
13619 enum machine_mode mode
;
13623 if (!reload_completed
)
13625 if (TARGET_64BIT
&& TARGET_RED_ZONE
)
13627 result
= gen_rtx_MEM (mode
,
13628 gen_rtx_PLUS (Pmode
,
13630 GEN_INT (-RED_ZONE_SIZE
)));
13631 emit_move_insn (result
, operand
);
13633 else if (TARGET_64BIT
&& !TARGET_RED_ZONE
)
13639 operand
= gen_lowpart (DImode
, operand
);
13643 gen_rtx_SET (VOIDmode
,
13644 gen_rtx_MEM (DImode
,
13645 gen_rtx_PRE_DEC (DImode
,
13646 stack_pointer_rtx
)),
13652 result
= gen_rtx_MEM (mode
, stack_pointer_rtx
);
13661 split_di (&operand
, 1, operands
, operands
+ 1);
13663 gen_rtx_SET (VOIDmode
,
13664 gen_rtx_MEM (SImode
,
13665 gen_rtx_PRE_DEC (Pmode
,
13666 stack_pointer_rtx
)),
13669 gen_rtx_SET (VOIDmode
,
13670 gen_rtx_MEM (SImode
,
13671 gen_rtx_PRE_DEC (Pmode
,
13672 stack_pointer_rtx
)),
13677 /* It is better to store HImodes as SImodes. */
13678 if (!TARGET_PARTIAL_REG_STALL
)
13679 operand
= gen_lowpart (SImode
, operand
);
13683 gen_rtx_SET (VOIDmode
,
13684 gen_rtx_MEM (GET_MODE (operand
),
13685 gen_rtx_PRE_DEC (SImode
,
13686 stack_pointer_rtx
)),
13692 result
= gen_rtx_MEM (mode
, stack_pointer_rtx
);
13697 /* Free operand from the memory. */
13699 ix86_free_from_memory (mode
)
13700 enum machine_mode mode
;
13702 if (!TARGET_64BIT
|| !TARGET_RED_ZONE
)
13706 if (mode
== DImode
|| TARGET_64BIT
)
13708 else if (mode
== HImode
&& TARGET_PARTIAL_REG_STALL
)
13712 /* Use LEA to deallocate stack space. In peephole2 it will be converted
13713 to pop or add instruction if registers are available. */
13714 emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
13715 gen_rtx_PLUS (Pmode
, stack_pointer_rtx
,
13720 /* Put float CONST_DOUBLE in the constant pool instead of fp regs.
13721 QImode must go into class Q_REGS.
13722 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
13723 movdf to do mem-to-mem moves through integer regs. */
13725 ix86_preferred_reload_class (x
, class)
13727 enum reg_class
class;
13729 if (GET_CODE (x
) == CONST_VECTOR
&& x
!= CONST0_RTX (GET_MODE (x
)))
13731 if (GET_CODE (x
) == CONST_DOUBLE
&& GET_MODE (x
) != VOIDmode
)
13733 /* SSE can't load any constant directly yet. */
13734 if (SSE_CLASS_P (class))
13736 /* Floats can load 0 and 1. */
13737 if (MAYBE_FLOAT_CLASS_P (class) && standard_80387_constant_p (x
))
13739 /* Limit class to non-SSE. Use GENERAL_REGS if possible. */
13740 if (MAYBE_SSE_CLASS_P (class))
13741 return (reg_class_subset_p (class, GENERAL_REGS
)
13742 ? GENERAL_REGS
: FLOAT_REGS
);
13746 /* General regs can load everything. */
13747 if (reg_class_subset_p (class, GENERAL_REGS
))
13748 return GENERAL_REGS
;
13749 /* In case we haven't resolved FLOAT or SSE yet, give up. */
13750 if (MAYBE_FLOAT_CLASS_P (class) || MAYBE_SSE_CLASS_P (class))
13753 if (MAYBE_MMX_CLASS_P (class) && CONSTANT_P (x
))
13755 if (GET_MODE (x
) == QImode
&& ! reg_class_subset_p (class, Q_REGS
))
13760 /* If we are copying between general and FP registers, we need a memory
13761 location. The same is true for SSE and MMX registers.
13763 The macro can't work reliably when one of the CLASSES is class containing
13764 registers from multiple units (SSE, MMX, integer). We avoid this by never
13765 combining those units in single alternative in the machine description.
13766 Ensure that this constraint holds to avoid unexpected surprises.
13768 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
13769 enforce these sanity checks. */
13771 ix86_secondary_memory_needed (class1
, class2
, mode
, strict
)
13772 enum reg_class class1
, class2
;
13773 enum machine_mode mode
;
13776 if (MAYBE_FLOAT_CLASS_P (class1
) != FLOAT_CLASS_P (class1
)
13777 || MAYBE_FLOAT_CLASS_P (class2
) != FLOAT_CLASS_P (class2
)
13778 || MAYBE_SSE_CLASS_P (class1
) != SSE_CLASS_P (class1
)
13779 || MAYBE_SSE_CLASS_P (class2
) != SSE_CLASS_P (class2
)
13780 || MAYBE_MMX_CLASS_P (class1
) != MMX_CLASS_P (class1
)
13781 || MAYBE_MMX_CLASS_P (class2
) != MMX_CLASS_P (class2
))
13788 return (FLOAT_CLASS_P (class1
) != FLOAT_CLASS_P (class2
)
13789 || (SSE_CLASS_P (class1
) != SSE_CLASS_P (class2
)
13790 && (mode
) != SImode
)
13791 || (MMX_CLASS_P (class1
) != MMX_CLASS_P (class2
)
13792 && (mode
) != SImode
));
13794 /* Return the cost of moving data from a register in class CLASS1 to
13795 one in class CLASS2.
13797 It is not required that the cost always equal 2 when FROM is the same as TO;
13798 on some machines it is expensive to move between registers if they are not
13799 general registers. */
13801 ix86_register_move_cost (mode
, class1
, class2
)
13802 enum machine_mode mode
;
13803 enum reg_class class1
, class2
;
13805 /* In case we require secondary memory, compute cost of the store followed
13806 by load. In order to avoid bad register allocation choices, we need
13807 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
13809 if (ix86_secondary_memory_needed (class1
, class2
, mode
, 0))
13813 cost
+= MAX (MEMORY_MOVE_COST (mode
, class1
, 0),
13814 MEMORY_MOVE_COST (mode
, class1
, 1));
13815 cost
+= MAX (MEMORY_MOVE_COST (mode
, class2
, 0),
13816 MEMORY_MOVE_COST (mode
, class2
, 1));
13818 /* In case of copying from general_purpose_register we may emit multiple
13819 stores followed by single load causing memory size mismatch stall.
13820 Count this as arbitarily high cost of 20. */
13821 if (CLASS_MAX_NREGS (class1
, mode
) > CLASS_MAX_NREGS (class2
, mode
))
13824 /* In the case of FP/MMX moves, the registers actually overlap, and we
13825 have to switch modes in order to treat them differently. */
13826 if ((MMX_CLASS_P (class1
) && MAYBE_FLOAT_CLASS_P (class2
))
13827 || (MMX_CLASS_P (class2
) && MAYBE_FLOAT_CLASS_P (class1
)))
13833 /* Moves between SSE/MMX and integer unit are expensive. */
13834 if (MMX_CLASS_P (class1
) != MMX_CLASS_P (class2
)
13835 || SSE_CLASS_P (class1
) != SSE_CLASS_P (class2
))
13836 return ix86_cost
->mmxsse_to_integer
;
13837 if (MAYBE_FLOAT_CLASS_P (class1
))
13838 return ix86_cost
->fp_move
;
13839 if (MAYBE_SSE_CLASS_P (class1
))
13840 return ix86_cost
->sse_move
;
13841 if (MAYBE_MMX_CLASS_P (class1
))
13842 return ix86_cost
->mmx_move
;
13846 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
13848 ix86_hard_regno_mode_ok (regno
, mode
)
13850 enum machine_mode mode
;
13852 /* Flags and only flags can only hold CCmode values. */
13853 if (CC_REGNO_P (regno
))
13854 return GET_MODE_CLASS (mode
) == MODE_CC
;
13855 if (GET_MODE_CLASS (mode
) == MODE_CC
13856 || GET_MODE_CLASS (mode
) == MODE_RANDOM
13857 || GET_MODE_CLASS (mode
) == MODE_PARTIAL_INT
)
13859 if (FP_REGNO_P (regno
))
13860 return VALID_FP_MODE_P (mode
);
13861 if (SSE_REGNO_P (regno
))
13862 return VALID_SSE_REG_MODE (mode
);
13863 if (MMX_REGNO_P (regno
))
13864 return VALID_MMX_REG_MODE (mode
) || VALID_MMX_REG_MODE_3DNOW (mode
);
13865 /* We handle both integer and floats in the general purpose registers.
13866 In future we should be able to handle vector modes as well. */
13867 if (!VALID_INT_MODE_P (mode
) && !VALID_FP_MODE_P (mode
))
13869 /* Take care for QImode values - they can be in non-QI regs, but then
13870 they do cause partial register stalls. */
13871 if (regno
< 4 || mode
!= QImode
|| TARGET_64BIT
)
13873 return reload_in_progress
|| reload_completed
|| !TARGET_PARTIAL_REG_STALL
;
13876 /* Return the cost of moving data of mode M between a
13877 register and memory. A value of 2 is the default; this cost is
13878 relative to those in `REGISTER_MOVE_COST'.
13880 If moving between registers and memory is more expensive than
13881 between two registers, you should define this macro to express the
13884 Model also increased moving costs of QImode registers in non
13888 ix86_memory_move_cost (mode
, class, in
)
13889 enum machine_mode mode
;
13890 enum reg_class
class;
13893 if (FLOAT_CLASS_P (class))
13911 return in
? ix86_cost
->fp_load
[index
] : ix86_cost
->fp_store
[index
];
13913 if (SSE_CLASS_P (class))
13916 switch (GET_MODE_SIZE (mode
))
13930 return in
? ix86_cost
->sse_load
[index
] : ix86_cost
->sse_store
[index
];
13932 if (MMX_CLASS_P (class))
13935 switch (GET_MODE_SIZE (mode
))
13946 return in
? ix86_cost
->mmx_load
[index
] : ix86_cost
->mmx_store
[index
];
13948 switch (GET_MODE_SIZE (mode
))
13952 return (Q_CLASS_P (class) ? ix86_cost
->int_load
[0]
13953 : ix86_cost
->movzbl_load
);
13955 return (Q_CLASS_P (class) ? ix86_cost
->int_store
[0]
13956 : ix86_cost
->int_store
[0] + 4);
13959 return in
? ix86_cost
->int_load
[1] : ix86_cost
->int_store
[1];
13961 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
13962 if (mode
== TFmode
)
13964 return ((in
? ix86_cost
->int_load
[2] : ix86_cost
->int_store
[2])
13965 * ((int) GET_MODE_SIZE (mode
)
13966 + UNITS_PER_WORD
-1 ) / UNITS_PER_WORD
);
13970 #if defined (DO_GLOBAL_CTORS_BODY) && defined (HAS_INIT_SECTION)
13972 ix86_svr3_asm_out_constructor (symbol
, priority
)
13974 int priority ATTRIBUTE_UNUSED
;
13977 fputs ("\tpushl $", asm_out_file
);
13978 assemble_name (asm_out_file
, XSTR (symbol
, 0));
13979 fputc ('\n', asm_out_file
);
13985 static int current_machopic_label_num
;
13987 /* Given a symbol name and its associated stub, write out the
13988 definition of the stub. */
13991 machopic_output_stub (file
, symb
, stub
)
13993 const char *symb
, *stub
;
13995 unsigned int length
;
13996 char *binder_name
, *symbol_name
, lazy_ptr_name
[32];
13997 int label
= ++current_machopic_label_num
;
13999 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
14000 symb
= (*targetm
.strip_name_encoding
) (symb
);
14002 length
= strlen (stub
);
14003 binder_name
= alloca (length
+ 32);
14004 GEN_BINDER_NAME_FOR_STUB (binder_name
, stub
, length
);
14006 length
= strlen (symb
);
14007 symbol_name
= alloca (length
+ 32);
14008 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name
, symb
, length
);
14010 sprintf (lazy_ptr_name
, "L%d$lz", label
);
14013 machopic_picsymbol_stub_section ();
14015 machopic_symbol_stub_section ();
14017 fprintf (file
, "%s:\n", stub
);
14018 fprintf (file
, "\t.indirect_symbol %s\n", symbol_name
);
14022 fprintf (file
, "\tcall LPC$%d\nLPC$%d:\tpopl %%eax\n", label
, label
);
14023 fprintf (file
, "\tmovl %s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name
, label
);
14024 fprintf (file
, "\tjmp %%edx\n");
14027 fprintf (file
, "\tjmp *%s\n", lazy_ptr_name
);
14029 fprintf (file
, "%s:\n", binder_name
);
14033 fprintf (file
, "\tlea %s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name
, label
);
14034 fprintf (file
, "\tpushl %%eax\n");
14037 fprintf (file
, "\t pushl $%s\n", lazy_ptr_name
);
14039 fprintf (file
, "\tjmp dyld_stub_binding_helper\n");
14041 machopic_lazy_symbol_ptr_section ();
14042 fprintf (file
, "%s:\n", lazy_ptr_name
);
14043 fprintf (file
, "\t.indirect_symbol %s\n", symbol_name
);
14044 fprintf (file
, "\t.long %s\n", binder_name
);
14046 #endif /* TARGET_MACHO */
14048 /* Order the registers for register allocator. */
14051 x86_order_regs_for_local_alloc ()
14056 /* First allocate the local general purpose registers. */
14057 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
14058 if (GENERAL_REGNO_P (i
) && call_used_regs
[i
])
14059 reg_alloc_order
[pos
++] = i
;
14061 /* Global general purpose registers. */
14062 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
14063 if (GENERAL_REGNO_P (i
) && !call_used_regs
[i
])
14064 reg_alloc_order
[pos
++] = i
;
14066 /* x87 registers come first in case we are doing FP math
14068 if (!TARGET_SSE_MATH
)
14069 for (i
= FIRST_STACK_REG
; i
<= LAST_STACK_REG
; i
++)
14070 reg_alloc_order
[pos
++] = i
;
14072 /* SSE registers. */
14073 for (i
= FIRST_SSE_REG
; i
<= LAST_SSE_REG
; i
++)
14074 reg_alloc_order
[pos
++] = i
;
14075 for (i
= FIRST_REX_SSE_REG
; i
<= LAST_REX_SSE_REG
; i
++)
14076 reg_alloc_order
[pos
++] = i
;
14078 /* x87 registerts. */
14079 if (TARGET_SSE_MATH
)
14080 for (i
= FIRST_STACK_REG
; i
<= LAST_STACK_REG
; i
++)
14081 reg_alloc_order
[pos
++] = i
;
14083 for (i
= FIRST_MMX_REG
; i
<= LAST_MMX_REG
; i
++)
14084 reg_alloc_order
[pos
++] = i
;
14086 /* Initialize the rest of array as we do not allocate some registers
14088 while (pos
< FIRST_PSEUDO_REGISTER
)
14089 reg_alloc_order
[pos
++] = 0;
14092 /* Returns an expression indicating where the this parameter is
14093 located on entry to the FUNCTION. */
14096 x86_this_parameter (function
)
14099 tree type
= TREE_TYPE (function
);
14103 int n
= aggregate_value_p (TREE_TYPE (type
)) != 0;
14104 return gen_rtx_REG (DImode
, x86_64_int_parameter_registers
[n
]);
14107 if (ix86_fntype_regparm (type
) > 0)
14111 parm
= TYPE_ARG_TYPES (type
);
14112 /* Figure out whether or not the function has a variable number of
14114 for (; parm
; parm
= TREE_CHAIN (parm
))
14115 if (TREE_VALUE (parm
) == void_type_node
)
14117 /* If not, the this parameter is in %eax. */
14119 return gen_rtx_REG (SImode
, 0);
14122 if (aggregate_value_p (TREE_TYPE (type
)))
14123 return gen_rtx_MEM (SImode
, plus_constant (stack_pointer_rtx
, 8));
14125 return gen_rtx_MEM (SImode
, plus_constant (stack_pointer_rtx
, 4));
14128 /* Determine whether x86_output_mi_thunk can succeed. */
14131 x86_can_output_mi_thunk (thunk
, delta
, vcall_offset
, function
)
14132 tree thunk ATTRIBUTE_UNUSED
;
14133 HOST_WIDE_INT delta ATTRIBUTE_UNUSED
;
14134 HOST_WIDE_INT vcall_offset
;
14137 /* 64-bit can handle anything. */
14141 /* For 32-bit, everything's fine if we have one free register. */
14142 if (ix86_fntype_regparm (TREE_TYPE (function
)) < 3)
14145 /* Need a free register for vcall_offset. */
14149 /* Need a free register for GOT references. */
14150 if (flag_pic
&& !(*targetm
.binds_local_p
) (function
))
14153 /* Otherwise ok. */
14157 /* Output the assembler code for a thunk function. THUNK_DECL is the
14158 declaration for the thunk function itself, FUNCTION is the decl for
14159 the target function. DELTA is an immediate constant offset to be
14160 added to THIS. If VCALL_OFFSET is non-zero, the word at
14161 *(*this + vcall_offset) should be added to THIS. */
14164 x86_output_mi_thunk (file
, thunk
, delta
, vcall_offset
, function
)
14165 FILE *file ATTRIBUTE_UNUSED
;
14166 tree thunk ATTRIBUTE_UNUSED
;
14167 HOST_WIDE_INT delta
;
14168 HOST_WIDE_INT vcall_offset
;
14172 rtx
this = x86_this_parameter (function
);
14175 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
14176 pull it in now and let DELTA benefit. */
14179 else if (vcall_offset
)
14181 /* Put the this parameter into %eax. */
14183 xops
[1] = this_reg
= gen_rtx_REG (Pmode
, 0);
14184 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops
);
14187 this_reg
= NULL_RTX
;
14189 /* Adjust the this parameter by a fixed constant. */
14192 xops
[0] = GEN_INT (delta
);
14193 xops
[1] = this_reg
? this_reg
: this;
14196 if (!x86_64_general_operand (xops
[0], DImode
))
14198 tmp
= gen_rtx_REG (DImode
, FIRST_REX_INT_REG
+ 2 /* R10 */);
14200 output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops
);
14204 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops
);
14207 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops
);
14210 /* Adjust the this parameter by a value stored in the vtable. */
14214 tmp
= gen_rtx_REG (DImode
, FIRST_REX_INT_REG
+ 2 /* R10 */);
14216 tmp
= gen_rtx_REG (SImode
, 2 /* ECX */);
14218 xops
[0] = gen_rtx_MEM (Pmode
, this_reg
);
14221 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops
);
14223 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops
);
14225 /* Adjust the this parameter. */
14226 xops
[0] = gen_rtx_MEM (Pmode
, plus_constant (tmp
, vcall_offset
));
14227 if (TARGET_64BIT
&& !memory_operand (xops
[0], Pmode
))
14229 rtx tmp2
= gen_rtx_REG (DImode
, FIRST_REX_INT_REG
+ 3 /* R11 */);
14230 xops
[0] = GEN_INT (vcall_offset
);
14232 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops
);
14233 xops
[0] = gen_rtx_MEM (Pmode
, gen_rtx_PLUS (Pmode
, tmp
, tmp2
));
14235 xops
[1] = this_reg
;
14237 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops
);
14239 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops
);
14242 /* If necessary, drop THIS back to its stack slot. */
14243 if (this_reg
&& this_reg
!= this)
14245 xops
[0] = this_reg
;
14247 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops
);
14250 xops
[0] = DECL_RTL (function
);
14253 if (!flag_pic
|| (*targetm
.binds_local_p
) (function
))
14254 output_asm_insn ("jmp\t%P0", xops
);
14257 tmp
= XEXP (xops
[0], 0);
14258 tmp
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, tmp
), UNSPEC_GOTPCREL
);
14259 tmp
= gen_rtx_CONST (Pmode
, tmp
);
14260 tmp
= gen_rtx_MEM (QImode
, tmp
);
14262 output_asm_insn ("jmp\t%A0", xops
);
14267 if (!flag_pic
|| (*targetm
.binds_local_p
) (function
))
14268 output_asm_insn ("jmp\t%P0", xops
);
14271 tmp
= gen_rtx_REG (SImode
, 2 /* ECX */);
14272 output_set_got (tmp
);
14275 output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops
);
14276 output_asm_insn ("jmp\t{*}%1", xops
);
14282 x86_field_alignment (field
, computed
)
14286 enum machine_mode mode
;
14287 tree type
= TREE_TYPE (field
);
14289 if (TARGET_64BIT
|| TARGET_ALIGN_DOUBLE
)
14291 mode
= TYPE_MODE (TREE_CODE (type
) == ARRAY_TYPE
14292 ? get_inner_array_type (type
) : type
);
14293 if (mode
== DFmode
|| mode
== DCmode
14294 || GET_MODE_CLASS (mode
) == MODE_INT
14295 || GET_MODE_CLASS (mode
) == MODE_COMPLEX_INT
)
14296 return MIN (32, computed
);
14300 /* Output assembler code to FILE to increment profiler label # LABELNO
14301 for profiling a function entry. */
14303 x86_function_profiler (file
, labelno
)
14310 #ifndef NO_PROFILE_COUNTERS
14311 fprintf (file
, "\tleaq\t%sP%d@(%%rip),%%r11\n", LPREFIX
, labelno
);
14313 fprintf (file
, "\tcall\t*%s@GOTPCREL(%%rip)\n", MCOUNT_NAME
);
14317 #ifndef NO_PROFILE_COUNTERS
14318 fprintf (file
, "\tmovq\t$%sP%d,%%r11\n", LPREFIX
, labelno
);
14320 fprintf (file
, "\tcall\t%s\n", MCOUNT_NAME
);
14324 #ifndef NO_PROFILE_COUNTERS
14325 fprintf (file
, "\tleal\t%sP%d@GOTOFF(%%ebx),%%%s\n",
14326 LPREFIX
, labelno
, PROFILE_COUNT_REGISTER
);
14328 fprintf (file
, "\tcall\t*%s@GOT(%%ebx)\n", MCOUNT_NAME
);
14332 #ifndef NO_PROFILE_COUNTERS
14333 fprintf (file
, "\tmovl\t$%sP%d,%%$s\n", LPREFIX
, labelno
,
14334 PROFILE_COUNT_REGISTER
);
14336 fprintf (file
, "\tcall\t%s\n", MCOUNT_NAME
);
14340 /* Implement machine specific optimizations.
14341 At the moment we implement single transformation: AMD Athlon works faster
14342 when RET is not destination of conditional jump or directly preceeded
14343 by other jump instruction. We avoid the penalty by inserting NOP just
14344 before the RET instructions in such cases. */
14346 x86_machine_dependent_reorg (first
)
14347 rtx first ATTRIBUTE_UNUSED
;
14351 if (!TARGET_ATHLON
|| !optimize
|| optimize_size
)
14353 for (e
= EXIT_BLOCK_PTR
->pred
; e
; e
= e
->pred_next
)
14355 basic_block bb
= e
->src
;
14358 bool insert
= false;
14360 if (!returnjump_p (ret
) || !maybe_hot_bb_p (bb
))
14362 prev
= prev_nonnote_insn (ret
);
14363 if (prev
&& GET_CODE (prev
) == CODE_LABEL
)
14366 for (e
= bb
->pred
; e
; e
= e
->pred_next
)
14367 if (EDGE_FREQUENCY (e
) && e
->src
->index
> 0
14368 && !(e
->flags
& EDGE_FALLTHRU
))
14373 prev
= prev_real_insn (ret
);
14374 if (prev
&& GET_CODE (prev
) == JUMP_INSN
14375 && any_condjump_p (prev
))
14379 emit_insn_before (gen_nop (), ret
);
14383 #include "gt-i386.h"