1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002 Free Software Foundation, Inc.
5 This file is part of GNU CC.
7 GNU CC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
12 GNU CC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GNU CC; see the file COPYING. If not, write to
19 the Free Software Foundation, 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
28 #include "hard-reg-set.h"
30 #include "insn-config.h"
31 #include "conditions.h"
33 #include "insn-attr.h"
41 #include "basic-block.h"
44 #include "target-def.h"
45 #include "langhooks.h"
47 #ifndef CHECK_STACK_LIMIT
48 #define CHECK_STACK_LIMIT (-1)
51 /* Processor costs (relative to an add) */
53 struct processor_costs size_cost
= { /* costs for tunning for size */
54 2, /* cost of an add instruction */
55 3, /* cost of a lea instruction */
56 2, /* variable shift costs */
57 3, /* constant shift costs */
58 3, /* cost of starting a multiply */
59 0, /* cost of multiply per each bit set */
60 3, /* cost of a divide/mod */
61 3, /* cost of movsx */
62 3, /* cost of movzx */
65 2, /* cost for loading QImode using movzbl */
66 {2, 2, 2}, /* cost of loading integer registers
67 in QImode, HImode and SImode.
68 Relative to reg-reg move (2). */
69 {2, 2, 2}, /* cost of storing integer registers */
70 2, /* cost of reg,reg fld/fst */
71 {2, 2, 2}, /* cost of loading fp registers
72 in SFmode, DFmode and XFmode */
73 {2, 2, 2}, /* cost of loading integer registers */
74 3, /* cost of moving MMX register */
75 {3, 3}, /* cost of loading MMX registers
76 in SImode and DImode */
77 {3, 3}, /* cost of storing MMX registers
78 in SImode and DImode */
79 3, /* cost of moving SSE register */
80 {3, 3, 3}, /* cost of loading SSE registers
81 in SImode, DImode and TImode */
82 {3, 3, 3}, /* cost of storing SSE registers
83 in SImode, DImode and TImode */
84 3, /* MMX or SSE register to integer */
85 0, /* size of prefetch block */
86 0, /* number of parallel prefetches */
87 2, /* cost of FADD and FSUB insns. */
88 2, /* cost of FMUL instruction. */
89 2, /* cost of FDIV instruction. */
90 2, /* cost of FABS instruction. */
91 2, /* cost of FCHS instruction. */
92 2, /* cost of FSQRT instruction. */
95 /* Processor costs (relative to an add) */
97 struct processor_costs i386_cost
= { /* 386 specific costs */
98 1, /* cost of an add instruction */
99 1, /* cost of a lea instruction */
100 3, /* variable shift costs */
101 2, /* constant shift costs */
102 6, /* cost of starting a multiply */
103 1, /* cost of multiply per each bit set */
104 23, /* cost of a divide/mod */
105 3, /* cost of movsx */
106 2, /* cost of movzx */
107 15, /* "large" insn */
109 4, /* cost for loading QImode using movzbl */
110 {2, 4, 2}, /* cost of loading integer registers
111 in QImode, HImode and SImode.
112 Relative to reg-reg move (2). */
113 {2, 4, 2}, /* cost of storing integer registers */
114 2, /* cost of reg,reg fld/fst */
115 {8, 8, 8}, /* cost of loading fp registers
116 in SFmode, DFmode and XFmode */
117 {8, 8, 8}, /* cost of loading integer registers */
118 2, /* cost of moving MMX register */
119 {4, 8}, /* cost of loading MMX registers
120 in SImode and DImode */
121 {4, 8}, /* cost of storing MMX registers
122 in SImode and DImode */
123 2, /* cost of moving SSE register */
124 {4, 8, 16}, /* cost of loading SSE registers
125 in SImode, DImode and TImode */
126 {4, 8, 16}, /* cost of storing SSE registers
127 in SImode, DImode and TImode */
128 3, /* MMX or SSE register to integer */
129 0, /* size of prefetch block */
130 0, /* number of parallel prefetches */
131 23, /* cost of FADD and FSUB insns. */
132 27, /* cost of FMUL instruction. */
133 88, /* cost of FDIV instruction. */
134 22, /* cost of FABS instruction. */
135 24, /* cost of FCHS instruction. */
136 122, /* cost of FSQRT instruction. */
140 struct processor_costs i486_cost
= { /* 486 specific costs */
141 1, /* cost of an add instruction */
142 1, /* cost of a lea instruction */
143 3, /* variable shift costs */
144 2, /* constant shift costs */
145 12, /* cost of starting a multiply */
146 1, /* cost of multiply per each bit set */
147 40, /* cost of a divide/mod */
148 3, /* cost of movsx */
149 2, /* cost of movzx */
150 15, /* "large" insn */
152 4, /* cost for loading QImode using movzbl */
153 {2, 4, 2}, /* cost of loading integer registers
154 in QImode, HImode and SImode.
155 Relative to reg-reg move (2). */
156 {2, 4, 2}, /* cost of storing integer registers */
157 2, /* cost of reg,reg fld/fst */
158 {8, 8, 8}, /* cost of loading fp registers
159 in SFmode, DFmode and XFmode */
160 {8, 8, 8}, /* cost of loading integer registers */
161 2, /* cost of moving MMX register */
162 {4, 8}, /* cost of loading MMX registers
163 in SImode and DImode */
164 {4, 8}, /* cost of storing MMX registers
165 in SImode and DImode */
166 2, /* cost of moving SSE register */
167 {4, 8, 16}, /* cost of loading SSE registers
168 in SImode, DImode and TImode */
169 {4, 8, 16}, /* cost of storing SSE registers
170 in SImode, DImode and TImode */
171 3, /* MMX or SSE register to integer */
172 0, /* size of prefetch block */
173 0, /* number of parallel prefetches */
174 8, /* cost of FADD and FSUB insns. */
175 16, /* cost of FMUL instruction. */
176 73, /* cost of FDIV instruction. */
177 3, /* cost of FABS instruction. */
178 3, /* cost of FCHS instruction. */
179 83, /* cost of FSQRT instruction. */
183 struct processor_costs pentium_cost
= {
184 1, /* cost of an add instruction */
185 1, /* cost of a lea instruction */
186 4, /* variable shift costs */
187 1, /* constant shift costs */
188 11, /* cost of starting a multiply */
189 0, /* cost of multiply per each bit set */
190 25, /* cost of a divide/mod */
191 3, /* cost of movsx */
192 2, /* cost of movzx */
193 8, /* "large" insn */
195 6, /* cost for loading QImode using movzbl */
196 {2, 4, 2}, /* cost of loading integer registers
197 in QImode, HImode and SImode.
198 Relative to reg-reg move (2). */
199 {2, 4, 2}, /* cost of storing integer registers */
200 2, /* cost of reg,reg fld/fst */
201 {2, 2, 6}, /* cost of loading fp registers
202 in SFmode, DFmode and XFmode */
203 {4, 4, 6}, /* cost of loading integer registers */
204 8, /* cost of moving MMX register */
205 {8, 8}, /* cost of loading MMX registers
206 in SImode and DImode */
207 {8, 8}, /* cost of storing MMX registers
208 in SImode and DImode */
209 2, /* cost of moving SSE register */
210 {4, 8, 16}, /* cost of loading SSE registers
211 in SImode, DImode and TImode */
212 {4, 8, 16}, /* cost of storing SSE registers
213 in SImode, DImode and TImode */
214 3, /* MMX or SSE register to integer */
215 0, /* size of prefetch block */
216 0, /* number of parallel prefetches */
217 3, /* cost of FADD and FSUB insns. */
218 3, /* cost of FMUL instruction. */
219 39, /* cost of FDIV instruction. */
220 1, /* cost of FABS instruction. */
221 1, /* cost of FCHS instruction. */
222 70, /* cost of FSQRT instruction. */
226 struct processor_costs pentiumpro_cost
= {
227 1, /* cost of an add instruction */
228 1, /* cost of a lea instruction */
229 1, /* variable shift costs */
230 1, /* constant shift costs */
231 4, /* cost of starting a multiply */
232 0, /* cost of multiply per each bit set */
233 17, /* cost of a divide/mod */
234 1, /* cost of movsx */
235 1, /* cost of movzx */
236 8, /* "large" insn */
238 2, /* cost for loading QImode using movzbl */
239 {4, 4, 4}, /* cost of loading integer registers
240 in QImode, HImode and SImode.
241 Relative to reg-reg move (2). */
242 {2, 2, 2}, /* cost of storing integer registers */
243 2, /* cost of reg,reg fld/fst */
244 {2, 2, 6}, /* cost of loading fp registers
245 in SFmode, DFmode and XFmode */
246 {4, 4, 6}, /* cost of loading integer registers */
247 2, /* cost of moving MMX register */
248 {2, 2}, /* cost of loading MMX registers
249 in SImode and DImode */
250 {2, 2}, /* cost of storing MMX registers
251 in SImode and DImode */
252 2, /* cost of moving SSE register */
253 {2, 2, 8}, /* cost of loading SSE registers
254 in SImode, DImode and TImode */
255 {2, 2, 8}, /* cost of storing SSE registers
256 in SImode, DImode and TImode */
257 3, /* MMX or SSE register to integer */
258 32, /* size of prefetch block */
259 6, /* number of parallel prefetches */
260 3, /* cost of FADD and FSUB insns. */
261 5, /* cost of FMUL instruction. */
262 56, /* cost of FDIV instruction. */
263 2, /* cost of FABS instruction. */
264 2, /* cost of FCHS instruction. */
265 56, /* cost of FSQRT instruction. */
269 struct processor_costs k6_cost
= {
270 1, /* cost of an add instruction */
271 2, /* cost of a lea instruction */
272 1, /* variable shift costs */
273 1, /* constant shift costs */
274 3, /* cost of starting a multiply */
275 0, /* cost of multiply per each bit set */
276 18, /* cost of a divide/mod */
277 2, /* cost of movsx */
278 2, /* cost of movzx */
279 8, /* "large" insn */
281 3, /* cost for loading QImode using movzbl */
282 {4, 5, 4}, /* cost of loading integer registers
283 in QImode, HImode and SImode.
284 Relative to reg-reg move (2). */
285 {2, 3, 2}, /* cost of storing integer registers */
286 4, /* cost of reg,reg fld/fst */
287 {6, 6, 6}, /* cost of loading fp registers
288 in SFmode, DFmode and XFmode */
289 {4, 4, 4}, /* cost of loading integer registers */
290 2, /* cost of moving MMX register */
291 {2, 2}, /* cost of loading MMX registers
292 in SImode and DImode */
293 {2, 2}, /* cost of storing MMX registers
294 in SImode and DImode */
295 2, /* cost of moving SSE register */
296 {2, 2, 8}, /* cost of loading SSE registers
297 in SImode, DImode and TImode */
298 {2, 2, 8}, /* cost of storing SSE registers
299 in SImode, DImode and TImode */
300 6, /* MMX or SSE register to integer */
301 32, /* size of prefetch block */
302 1, /* number of parallel prefetches */
303 2, /* cost of FADD and FSUB insns. */
304 2, /* cost of FMUL instruction. */
305 56, /* cost of FDIV instruction. */
306 2, /* cost of FABS instruction. */
307 2, /* cost of FCHS instruction. */
308 56, /* cost of FSQRT instruction. */
312 struct processor_costs athlon_cost
= {
313 1, /* cost of an add instruction */
314 2, /* cost of a lea instruction */
315 1, /* variable shift costs */
316 1, /* constant shift costs */
317 5, /* cost of starting a multiply */
318 0, /* cost of multiply per each bit set */
319 42, /* cost of a divide/mod */
320 1, /* cost of movsx */
321 1, /* cost of movzx */
322 8, /* "large" insn */
324 4, /* cost for loading QImode using movzbl */
325 {3, 4, 3}, /* cost of loading integer registers
326 in QImode, HImode and SImode.
327 Relative to reg-reg move (2). */
328 {3, 4, 3}, /* cost of storing integer registers */
329 4, /* cost of reg,reg fld/fst */
330 {4, 4, 12}, /* cost of loading fp registers
331 in SFmode, DFmode and XFmode */
332 {6, 6, 8}, /* cost of loading integer registers */
333 2, /* cost of moving MMX register */
334 {4, 4}, /* cost of loading MMX registers
335 in SImode and DImode */
336 {4, 4}, /* cost of storing MMX registers
337 in SImode and DImode */
338 2, /* cost of moving SSE register */
339 {4, 4, 6}, /* cost of loading SSE registers
340 in SImode, DImode and TImode */
341 {4, 4, 5}, /* cost of storing SSE registers
342 in SImode, DImode and TImode */
343 5, /* MMX or SSE register to integer */
344 64, /* size of prefetch block */
345 6, /* number of parallel prefetches */
346 4, /* cost of FADD and FSUB insns. */
347 4, /* cost of FMUL instruction. */
348 24, /* cost of FDIV instruction. */
349 2, /* cost of FABS instruction. */
350 2, /* cost of FCHS instruction. */
351 35, /* cost of FSQRT instruction. */
355 struct processor_costs pentium4_cost
= {
356 1, /* cost of an add instruction */
357 1, /* cost of a lea instruction */
358 8, /* variable shift costs */
359 8, /* constant shift costs */
360 30, /* cost of starting a multiply */
361 0, /* cost of multiply per each bit set */
362 112, /* cost of a divide/mod */
363 1, /* cost of movsx */
364 1, /* cost of movzx */
365 16, /* "large" insn */
367 2, /* cost for loading QImode using movzbl */
368 {4, 5, 4}, /* cost of loading integer registers
369 in QImode, HImode and SImode.
370 Relative to reg-reg move (2). */
371 {2, 3, 2}, /* cost of storing integer registers */
372 2, /* cost of reg,reg fld/fst */
373 {2, 2, 6}, /* cost of loading fp registers
374 in SFmode, DFmode and XFmode */
375 {4, 4, 6}, /* cost of loading integer registers */
376 2, /* cost of moving MMX register */
377 {2, 2}, /* cost of loading MMX registers
378 in SImode and DImode */
379 {2, 2}, /* cost of storing MMX registers
380 in SImode and DImode */
381 12, /* cost of moving SSE register */
382 {12, 12, 12}, /* cost of loading SSE registers
383 in SImode, DImode and TImode */
384 {2, 2, 8}, /* cost of storing SSE registers
385 in SImode, DImode and TImode */
386 10, /* MMX or SSE register to integer */
387 64, /* size of prefetch block */
388 6, /* number of parallel prefetches */
389 5, /* cost of FADD and FSUB insns. */
390 7, /* cost of FMUL instruction. */
391 43, /* cost of FDIV instruction. */
392 2, /* cost of FABS instruction. */
393 2, /* cost of FCHS instruction. */
394 43, /* cost of FSQRT instruction. */
397 const struct processor_costs
*ix86_cost
= &pentium_cost
;
399 /* Processor feature/optimization bitmasks. */
400 #define m_386 (1<<PROCESSOR_I386)
401 #define m_486 (1<<PROCESSOR_I486)
402 #define m_PENT (1<<PROCESSOR_PENTIUM)
403 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
404 #define m_K6 (1<<PROCESSOR_K6)
405 #define m_ATHLON (1<<PROCESSOR_ATHLON)
406 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
408 const int x86_use_leave
= m_386
| m_K6
| m_ATHLON
;
409 const int x86_push_memory
= m_386
| m_K6
| m_ATHLON
| m_PENT4
;
410 const int x86_zero_extend_with_and
= m_486
| m_PENT
;
411 const int x86_movx
= m_ATHLON
| m_PPRO
| m_PENT4
/* m_386 | m_K6 */;
412 const int x86_double_with_add
= ~m_386
;
413 const int x86_use_bit_test
= m_386
;
414 const int x86_unroll_strlen
= m_486
| m_PENT
| m_PPRO
| m_ATHLON
| m_K6
;
415 const int x86_cmove
= m_PPRO
| m_ATHLON
| m_PENT4
;
416 const int x86_3dnow_a
= m_ATHLON
;
417 const int x86_deep_branch
= m_PPRO
| m_K6
| m_ATHLON
| m_PENT4
;
418 const int x86_branch_hints
= m_PENT4
;
419 const int x86_use_sahf
= m_PPRO
| m_K6
| m_PENT4
;
420 const int x86_partial_reg_stall
= m_PPRO
;
421 const int x86_use_loop
= m_K6
;
422 const int x86_use_fiop
= ~(m_PPRO
| m_ATHLON
| m_PENT
);
423 const int x86_use_mov0
= m_K6
;
424 const int x86_use_cltd
= ~(m_PENT
| m_K6
);
425 const int x86_read_modify_write
= ~m_PENT
;
426 const int x86_read_modify
= ~(m_PENT
| m_PPRO
);
427 const int x86_split_long_moves
= m_PPRO
;
428 const int x86_promote_QImode
= m_K6
| m_PENT
| m_386
| m_486
| m_ATHLON
;
429 const int x86_fast_prefix
= ~(m_PENT
| m_486
| m_386
);
430 const int x86_single_stringop
= m_386
| m_PENT4
;
431 const int x86_qimode_math
= ~(0);
432 const int x86_promote_qi_regs
= 0;
433 const int x86_himode_math
= ~(m_PPRO
);
434 const int x86_promote_hi_regs
= m_PPRO
;
435 const int x86_sub_esp_4
= m_ATHLON
| m_PPRO
| m_PENT4
;
436 const int x86_sub_esp_8
= m_ATHLON
| m_PPRO
| m_386
| m_486
| m_PENT4
;
437 const int x86_add_esp_4
= m_ATHLON
| m_K6
| m_PENT4
;
438 const int x86_add_esp_8
= m_ATHLON
| m_PPRO
| m_K6
| m_386
| m_486
| m_PENT4
;
439 const int x86_integer_DFmode_moves
= ~(m_ATHLON
| m_PENT4
| m_PPRO
);
440 const int x86_partial_reg_dependency
= m_ATHLON
| m_PENT4
;
441 const int x86_memory_mismatch_stall
= m_ATHLON
| m_PENT4
;
442 const int x86_accumulate_outgoing_args
= m_ATHLON
| m_PENT4
| m_PPRO
;
443 const int x86_prologue_using_move
= m_ATHLON
| m_PENT4
| m_PPRO
;
444 const int x86_epilogue_using_move
= m_ATHLON
| m_PENT4
| m_PPRO
;
445 const int x86_decompose_lea
= m_PENT4
;
446 const int x86_shift1
= ~m_486
;
447 const int x86_arch_always_fancy_math_387
= m_PENT
| m_PPRO
| m_ATHLON
| m_PENT4
;
449 /* In case the avreage insn count for single function invocation is
450 lower than this constant, emit fast (but longer) prologue and
452 #define FAST_PROLOGUE_INSN_COUNT 30
454 /* Set by prologue expander and used by epilogue expander to determine
456 static int use_fast_prologue_epilogue
;
458 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
459 static const char *const qi_reg_name
[] = QI_REGISTER_NAMES
;
460 static const char *const qi_high_reg_name
[] = QI_HIGH_REGISTER_NAMES
;
461 static const char *const hi_reg_name
[] = HI_REGISTER_NAMES
;
463 /* Array of the smallest class containing reg number REGNO, indexed by
464 REGNO. Used by REGNO_REG_CLASS in i386.h. */
466 enum reg_class
const regclass_map
[FIRST_PSEUDO_REGISTER
] =
469 AREG
, DREG
, CREG
, BREG
,
471 SIREG
, DIREG
, NON_Q_REGS
, NON_Q_REGS
,
473 FP_TOP_REG
, FP_SECOND_REG
, FLOAT_REGS
, FLOAT_REGS
,
474 FLOAT_REGS
, FLOAT_REGS
, FLOAT_REGS
, FLOAT_REGS
,
477 /* flags, fpsr, dirflag, frame */
478 NO_REGS
, NO_REGS
, NO_REGS
, NON_Q_REGS
,
479 SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
,
481 MMX_REGS
, MMX_REGS
, MMX_REGS
, MMX_REGS
, MMX_REGS
, MMX_REGS
,
483 NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
,
484 NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
,
485 SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
,
489 /* The "default" register map used in 32bit mode. */
491 int const dbx_register_map
[FIRST_PSEUDO_REGISTER
] =
493 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
494 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
495 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
496 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
497 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
498 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
499 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
502 static int const x86_64_int_parameter_registers
[6] =
504 5 /*RDI*/, 4 /*RSI*/, 1 /*RDX*/, 2 /*RCX*/,
505 FIRST_REX_INT_REG
/*R8 */, FIRST_REX_INT_REG
+ 1 /*R9 */
508 static int const x86_64_int_return_registers
[4] =
510 0 /*RAX*/, 1 /*RDI*/, 5 /*RDI*/, 4 /*RSI*/
513 /* The "default" register map used in 64bit mode. */
514 int const dbx64_register_map
[FIRST_PSEUDO_REGISTER
] =
516 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
517 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
518 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
519 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
520 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
521 8,9,10,11,12,13,14,15, /* extended integer registers */
522 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
525 /* Define the register numbers to be used in Dwarf debugging information.
526 The SVR4 reference port C compiler uses the following register numbers
527 in its Dwarf output code:
528 0 for %eax (gcc regno = 0)
529 1 for %ecx (gcc regno = 2)
530 2 for %edx (gcc regno = 1)
531 3 for %ebx (gcc regno = 3)
532 4 for %esp (gcc regno = 7)
533 5 for %ebp (gcc regno = 6)
534 6 for %esi (gcc regno = 4)
535 7 for %edi (gcc regno = 5)
536 The following three DWARF register numbers are never generated by
537 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
538 believes these numbers have these meanings.
539 8 for %eip (no gcc equivalent)
540 9 for %eflags (gcc regno = 17)
541 10 for %trapno (no gcc equivalent)
542 It is not at all clear how we should number the FP stack registers
543 for the x86 architecture. If the version of SDB on x86/svr4 were
544 a bit less brain dead with respect to floating-point then we would
545 have a precedent to follow with respect to DWARF register numbers
546 for x86 FP registers, but the SDB on x86/svr4 is so completely
547 broken with respect to FP registers that it is hardly worth thinking
548 of it as something to strive for compatibility with.
549 The version of x86/svr4 SDB I have at the moment does (partially)
550 seem to believe that DWARF register number 11 is associated with
551 the x86 register %st(0), but that's about all. Higher DWARF
552 register numbers don't seem to be associated with anything in
553 particular, and even for DWARF regno 11, SDB only seems to under-
554 stand that it should say that a variable lives in %st(0) (when
555 asked via an `=' command) if we said it was in DWARF regno 11,
556 but SDB still prints garbage when asked for the value of the
557 variable in question (via a `/' command).
558 (Also note that the labels SDB prints for various FP stack regs
559 when doing an `x' command are all wrong.)
560 Note that these problems generally don't affect the native SVR4
561 C compiler because it doesn't allow the use of -O with -g and
562 because when it is *not* optimizing, it allocates a memory
563 location for each floating-point variable, and the memory
564 location is what gets described in the DWARF AT_location
565 attribute for the variable in question.
566 Regardless of the severe mental illness of the x86/svr4 SDB, we
567 do something sensible here and we use the following DWARF
568 register numbers. Note that these are all stack-top-relative
570 11 for %st(0) (gcc regno = 8)
571 12 for %st(1) (gcc regno = 9)
572 13 for %st(2) (gcc regno = 10)
573 14 for %st(3) (gcc regno = 11)
574 15 for %st(4) (gcc regno = 12)
575 16 for %st(5) (gcc regno = 13)
576 17 for %st(6) (gcc regno = 14)
577 18 for %st(7) (gcc regno = 15)
579 int const svr4_dbx_register_map
[FIRST_PSEUDO_REGISTER
] =
581 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
582 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
583 -1, 9, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
584 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
585 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
586 -1, -1, -1, -1, -1, -1, -1, -1, /* extemded integer registers */
587 -1, -1, -1, -1, -1, -1, -1, -1, /* extemded SSE registers */
590 /* Test and compare insns in i386.md store the information needed to
591 generate branch and scc insns here. */
593 rtx ix86_compare_op0
= NULL_RTX
;
594 rtx ix86_compare_op1
= NULL_RTX
;
596 /* The encoding characters for the four TLS models present in ELF. */
598 static char const tls_model_chars
[] = " GLil";
600 #define MAX_386_STACK_LOCALS 3
601 /* Size of the register save area. */
602 #define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16)
604 /* Define the structure for the machine field in struct function. */
605 struct machine_function
GTY(())
607 rtx stack_locals
[(int) MAX_MACHINE_MODE
][MAX_386_STACK_LOCALS
];
608 const char *some_ld_name
;
609 int save_varrargs_registers
;
610 int accesses_prev_frame
;
613 #define ix86_stack_locals (cfun->machine->stack_locals)
614 #define ix86_save_varrargs_registers (cfun->machine->save_varrargs_registers)
616 /* Structure describing stack frame layout.
617 Stack grows downward:
623 saved frame pointer if frame_pointer_needed
624 <- HARD_FRAME_POINTER
630 > to_allocate <- FRAME_POINTER
642 int outgoing_arguments_size
;
645 HOST_WIDE_INT to_allocate
;
646 /* The offsets relative to ARG_POINTER. */
647 HOST_WIDE_INT frame_pointer_offset
;
648 HOST_WIDE_INT hard_frame_pointer_offset
;
649 HOST_WIDE_INT stack_pointer_offset
;
652 /* Used to enable/disable debugging features. */
653 const char *ix86_debug_arg_string
, *ix86_debug_addr_string
;
654 /* Code model option as passed by user. */
655 const char *ix86_cmodel_string
;
657 enum cmodel ix86_cmodel
;
659 const char *ix86_asm_string
;
660 enum asm_dialect ix86_asm_dialect
= ASM_ATT
;
662 const char *ix86_tls_dialect_string
;
663 enum tls_dialect ix86_tls_dialect
= TLS_DIALECT_GNU
;
665 /* Which unit we are generating floating point math for. */
666 enum fpmath_unit ix86_fpmath
;
668 /* Which cpu are we scheduling for. */
669 enum processor_type ix86_cpu
;
670 /* Which instruction set architecture to use. */
671 enum processor_type ix86_arch
;
673 /* Strings to hold which cpu and instruction set architecture to use. */
674 const char *ix86_cpu_string
; /* for -mcpu=<xxx> */
675 const char *ix86_arch_string
; /* for -march=<xxx> */
676 const char *ix86_fpmath_string
; /* for -mfpmath=<xxx> */
678 /* # of registers to use to pass arguments. */
679 const char *ix86_regparm_string
;
681 /* true if sse prefetch instruction is not NOOP. */
682 int x86_prefetch_sse
;
684 /* ix86_regparm_string as a number */
687 /* Alignment to use for loops and jumps: */
689 /* Power of two alignment for loops. */
690 const char *ix86_align_loops_string
;
692 /* Power of two alignment for non-loop jumps. */
693 const char *ix86_align_jumps_string
;
695 /* Power of two alignment for stack boundary in bytes. */
696 const char *ix86_preferred_stack_boundary_string
;
698 /* Preferred alignment for stack boundary in bits. */
699 int ix86_preferred_stack_boundary
;
701 /* Values 1-5: see jump.c */
702 int ix86_branch_cost
;
703 const char *ix86_branch_cost_string
;
705 /* Power of two alignment for functions. */
706 const char *ix86_align_funcs_string
;
708 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
709 static char internal_label_prefix
[16];
710 static int internal_label_prefix_len
;
712 static int local_symbolic_operand
PARAMS ((rtx
, enum machine_mode
));
713 static int tls_symbolic_operand_1
PARAMS ((rtx
, enum tls_model
));
714 static void output_pic_addr_const
PARAMS ((FILE *, rtx
, int));
715 static void put_condition_code
PARAMS ((enum rtx_code
, enum machine_mode
,
717 static const char *get_some_local_dynamic_name
PARAMS ((void));
718 static int get_some_local_dynamic_name_1
PARAMS ((rtx
*, void *));
719 static rtx maybe_get_pool_constant
PARAMS ((rtx
));
720 static rtx ix86_expand_int_compare
PARAMS ((enum rtx_code
, rtx
, rtx
));
721 static enum rtx_code ix86_prepare_fp_compare_args
PARAMS ((enum rtx_code
,
723 static rtx get_thread_pointer
PARAMS ((void));
724 static void get_pc_thunk_name
PARAMS ((char [32], unsigned int));
725 static rtx gen_push
PARAMS ((rtx
));
726 static int memory_address_length
PARAMS ((rtx addr
));
727 static int ix86_flags_dependant
PARAMS ((rtx
, rtx
, enum attr_type
));
728 static int ix86_agi_dependant
PARAMS ((rtx
, rtx
, enum attr_type
));
729 static enum attr_ppro_uops ix86_safe_ppro_uops
PARAMS ((rtx
));
730 static void ix86_dump_ppro_packet
PARAMS ((FILE *));
731 static void ix86_reorder_insn
PARAMS ((rtx
*, rtx
*));
732 static struct machine_function
* ix86_init_machine_status
PARAMS ((void));
733 static int ix86_split_to_parts
PARAMS ((rtx
, rtx
*, enum machine_mode
));
734 static int ix86_nsaved_regs
PARAMS ((void));
735 static void ix86_emit_save_regs
PARAMS ((void));
736 static void ix86_emit_save_regs_using_mov
PARAMS ((rtx
, HOST_WIDE_INT
));
737 static void ix86_emit_restore_regs_using_mov
PARAMS ((rtx
, int, int));
738 static void ix86_output_function_epilogue
PARAMS ((FILE *, HOST_WIDE_INT
));
739 static void ix86_set_move_mem_attrs_1
PARAMS ((rtx
, rtx
, rtx
, rtx
, rtx
));
740 static void ix86_sched_reorder_ppro
PARAMS ((rtx
*, rtx
*));
741 static HOST_WIDE_INT ix86_GOT_alias_set
PARAMS ((void));
742 static void ix86_adjust_counter
PARAMS ((rtx
, HOST_WIDE_INT
));
743 static rtx ix86_expand_aligntest
PARAMS ((rtx
, int));
744 static void ix86_expand_strlensi_unroll_1
PARAMS ((rtx
, rtx
));
745 static int ix86_issue_rate
PARAMS ((void));
746 static int ix86_adjust_cost
PARAMS ((rtx
, rtx
, rtx
, int));
747 static void ix86_sched_init
PARAMS ((FILE *, int, int));
748 static int ix86_sched_reorder
PARAMS ((FILE *, int, rtx
*, int *, int));
749 static int ix86_variable_issue
PARAMS ((FILE *, int, rtx
, int));
750 static int ia32_use_dfa_pipeline_interface
PARAMS ((void));
751 static int ia32_multipass_dfa_lookahead
PARAMS ((void));
752 static void ix86_init_mmx_sse_builtins
PARAMS ((void));
753 static rtx x86_this_parameter
PARAMS ((tree
));
754 static void x86_output_mi_thunk
PARAMS ((FILE *, tree
, HOST_WIDE_INT
,
755 HOST_WIDE_INT
, tree
));
756 static bool x86_can_output_mi_thunk
PARAMS ((tree
, HOST_WIDE_INT
,
757 HOST_WIDE_INT
, tree
));
761 rtx base
, index
, disp
;
765 static int ix86_decompose_address
PARAMS ((rtx
, struct ix86_address
*));
766 static bool ix86_cannot_force_const_mem
PARAMS ((rtx
));
768 static void ix86_encode_section_info
PARAMS ((tree
, int)) ATTRIBUTE_UNUSED
;
769 static const char *ix86_strip_name_encoding
PARAMS ((const char *))
772 struct builtin_description
;
773 static rtx ix86_expand_sse_comi
PARAMS ((const struct builtin_description
*,
775 static rtx ix86_expand_sse_compare
PARAMS ((const struct builtin_description
*,
777 static rtx ix86_expand_unop1_builtin
PARAMS ((enum insn_code
, tree
, rtx
));
778 static rtx ix86_expand_unop_builtin
PARAMS ((enum insn_code
, tree
, rtx
, int));
779 static rtx ix86_expand_binop_builtin
PARAMS ((enum insn_code
, tree
, rtx
));
780 static rtx ix86_expand_store_builtin
PARAMS ((enum insn_code
, tree
));
781 static rtx safe_vector_operand
PARAMS ((rtx
, enum machine_mode
));
782 static enum rtx_code ix86_fp_compare_code_to_integer
PARAMS ((enum rtx_code
));
783 static void ix86_fp_comparison_codes
PARAMS ((enum rtx_code code
,
787 static rtx ix86_expand_fp_compare
PARAMS ((enum rtx_code
, rtx
, rtx
, rtx
,
789 static int ix86_fp_comparison_arithmetics_cost
PARAMS ((enum rtx_code code
));
790 static int ix86_fp_comparison_fcomi_cost
PARAMS ((enum rtx_code code
));
791 static int ix86_fp_comparison_sahf_cost
PARAMS ((enum rtx_code code
));
792 static int ix86_fp_comparison_cost
PARAMS ((enum rtx_code code
));
793 static unsigned int ix86_select_alt_pic_regnum
PARAMS ((void));
794 static int ix86_save_reg
PARAMS ((unsigned int, int));
795 static void ix86_compute_frame_layout
PARAMS ((struct ix86_frame
*));
796 static int ix86_comp_type_attributes
PARAMS ((tree
, tree
));
797 static int ix86_fntype_regparm
PARAMS ((tree
));
798 const struct attribute_spec ix86_attribute_table
[];
799 static tree ix86_handle_cdecl_attribute
PARAMS ((tree
*, tree
, tree
, int, bool *));
800 static tree ix86_handle_regparm_attribute
PARAMS ((tree
*, tree
, tree
, int, bool *));
801 static int ix86_value_regno
PARAMS ((enum machine_mode
));
803 #if defined (DO_GLOBAL_CTORS_BODY) && defined (HAS_INIT_SECTION)
804 static void ix86_svr3_asm_out_constructor
PARAMS ((rtx
, int));
807 /* Register class used for passing given 64bit part of the argument.
808 These represent classes as documented by the PS ABI, with the exception
809 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
810 use SF or DFmode move instead of DImode to avoid reformating penalties.
812 Similary we play games with INTEGERSI_CLASS to use cheaper SImode moves
813 whenever possible (upper half does contain padding).
815 enum x86_64_reg_class
818 X86_64_INTEGER_CLASS
,
819 X86_64_INTEGERSI_CLASS
,
828 static const char * const x86_64_reg_class_name
[] =
829 {"no", "integer", "integerSI", "sse", "sseSF", "sseDF", "sseup", "x87", "x87up", "no"};
831 #define MAX_CLASSES 4
832 static int classify_argument
PARAMS ((enum machine_mode
, tree
,
833 enum x86_64_reg_class
[MAX_CLASSES
],
835 static int examine_argument
PARAMS ((enum machine_mode
, tree
, int, int *,
837 static rtx construct_container
PARAMS ((enum machine_mode
, tree
, int, int, int,
839 static enum x86_64_reg_class merge_classes
PARAMS ((enum x86_64_reg_class
,
840 enum x86_64_reg_class
));
842 /* Initialize the GCC target structure. */
843 #undef TARGET_ATTRIBUTE_TABLE
844 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
845 #ifdef TARGET_DLLIMPORT_DECL_ATTRIBUTES
846 # undef TARGET_MERGE_DECL_ATTRIBUTES
847 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
850 #undef TARGET_COMP_TYPE_ATTRIBUTES
851 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
853 #undef TARGET_INIT_BUILTINS
854 #define TARGET_INIT_BUILTINS ix86_init_builtins
856 #undef TARGET_EXPAND_BUILTIN
857 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
859 #undef TARGET_ASM_FUNCTION_EPILOGUE
860 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
862 #undef TARGET_ASM_OPEN_PAREN
863 #define TARGET_ASM_OPEN_PAREN ""
864 #undef TARGET_ASM_CLOSE_PAREN
865 #define TARGET_ASM_CLOSE_PAREN ""
867 #undef TARGET_ASM_ALIGNED_HI_OP
868 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
869 #undef TARGET_ASM_ALIGNED_SI_OP
870 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
872 #undef TARGET_ASM_ALIGNED_DI_OP
873 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
876 #undef TARGET_ASM_UNALIGNED_HI_OP
877 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
878 #undef TARGET_ASM_UNALIGNED_SI_OP
879 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
880 #undef TARGET_ASM_UNALIGNED_DI_OP
881 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
883 #undef TARGET_SCHED_ADJUST_COST
884 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
885 #undef TARGET_SCHED_ISSUE_RATE
886 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
887 #undef TARGET_SCHED_VARIABLE_ISSUE
888 #define TARGET_SCHED_VARIABLE_ISSUE ix86_variable_issue
889 #undef TARGET_SCHED_INIT
890 #define TARGET_SCHED_INIT ix86_sched_init
891 #undef TARGET_SCHED_REORDER
892 #define TARGET_SCHED_REORDER ix86_sched_reorder
893 #undef TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE
894 #define TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE \
895 ia32_use_dfa_pipeline_interface
896 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
897 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
898 ia32_multipass_dfa_lookahead
901 #undef TARGET_HAVE_TLS
902 #define TARGET_HAVE_TLS true
904 #undef TARGET_CANNOT_FORCE_CONST_MEM
905 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
907 #undef TARGET_ASM_OUTPUT_MI_THUNK
908 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
909 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
910 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
912 struct gcc_target targetm
= TARGET_INITIALIZER
;
914 /* Sometimes certain combinations of command options do not make
915 sense on a particular target machine. You can define a macro
916 `OVERRIDE_OPTIONS' to take account of this. This macro, if
917 defined, is executed once just after all the command options have
920 Don't use this macro to turn on various extra optimizations for
921 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
927 /* Comes from final.c -- no real reason to change it. */
928 #define MAX_CODE_ALIGN 16
932 const struct processor_costs
*cost
; /* Processor costs */
933 const int target_enable
; /* Target flags to enable. */
934 const int target_disable
; /* Target flags to disable. */
935 const int align_loop
; /* Default alignments. */
936 const int align_loop_max_skip
;
937 const int align_jump
;
938 const int align_jump_max_skip
;
939 const int align_func
;
940 const int branch_cost
;
942 const processor_target_table
[PROCESSOR_max
] =
944 {&i386_cost
, 0, 0, 4, 3, 4, 3, 4, 1},
945 {&i486_cost
, 0, 0, 16, 15, 16, 15, 16, 1},
946 {&pentium_cost
, 0, 0, 16, 7, 16, 7, 16, 1},
947 {&pentiumpro_cost
, 0, 0, 16, 15, 16, 7, 16, 1},
948 {&k6_cost
, 0, 0, 32, 7, 32, 7, 32, 1},
949 {&athlon_cost
, 0, 0, 16, 7, 64, 7, 16, 1},
950 {&pentium4_cost
, 0, 0, 0, 0, 0, 0, 0, 1}
953 static const char * const cpu_names
[] = TARGET_CPU_DEFAULT_NAMES
;
956 const char *const name
; /* processor name or nickname. */
957 const enum processor_type processor
;
963 PTA_PREFETCH_SSE
= 8,
968 const processor_alias_table
[] =
970 {"i386", PROCESSOR_I386
, 0},
971 {"i486", PROCESSOR_I486
, 0},
972 {"i586", PROCESSOR_PENTIUM
, 0},
973 {"pentium", PROCESSOR_PENTIUM
, 0},
974 {"pentium-mmx", PROCESSOR_PENTIUM
, PTA_MMX
},
975 {"winchip-c6", PROCESSOR_I486
, PTA_MMX
},
976 {"winchip2", PROCESSOR_I486
, PTA_MMX
| PTA_3DNOW
},
977 {"c3", PROCESSOR_I486
, PTA_MMX
| PTA_3DNOW
},
978 {"i686", PROCESSOR_PENTIUMPRO
, 0},
979 {"pentiumpro", PROCESSOR_PENTIUMPRO
, 0},
980 {"pentium2", PROCESSOR_PENTIUMPRO
, PTA_MMX
},
981 {"pentium3", PROCESSOR_PENTIUMPRO
, PTA_MMX
| PTA_SSE
| PTA_PREFETCH_SSE
},
982 {"pentium4", PROCESSOR_PENTIUM4
, PTA_SSE
| PTA_SSE2
|
983 PTA_MMX
| PTA_PREFETCH_SSE
},
984 {"k6", PROCESSOR_K6
, PTA_MMX
},
985 {"k6-2", PROCESSOR_K6
, PTA_MMX
| PTA_3DNOW
},
986 {"k6-3", PROCESSOR_K6
, PTA_MMX
| PTA_3DNOW
},
987 {"athlon", PROCESSOR_ATHLON
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
989 {"athlon-tbird", PROCESSOR_ATHLON
, PTA_MMX
| PTA_PREFETCH_SSE
990 | PTA_3DNOW
| PTA_3DNOW_A
},
991 {"athlon-4", PROCESSOR_ATHLON
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
992 | PTA_3DNOW_A
| PTA_SSE
},
993 {"athlon-xp", PROCESSOR_ATHLON
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
994 | PTA_3DNOW_A
| PTA_SSE
},
995 {"athlon-mp", PROCESSOR_ATHLON
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
996 | PTA_3DNOW_A
| PTA_SSE
},
999 int const pta_size
= ARRAY_SIZE (processor_alias_table
);
1001 /* By default our XFmode is the 80-bit extended format. If we have
1002 use TFmode instead, it's also the 80-bit format, but with padding. */
1003 real_format_for_mode
[XFmode
- QFmode
] = &ieee_extended_intel_96_format
;
1004 real_format_for_mode
[TFmode
- QFmode
] = &ieee_extended_intel_128_format
;
1006 /* Set the default values for switches whose default depends on TARGET_64BIT
1007 in case they weren't overwriten by command line options. */
1010 if (flag_omit_frame_pointer
== 2)
1011 flag_omit_frame_pointer
= 1;
1012 if (flag_asynchronous_unwind_tables
== 2)
1013 flag_asynchronous_unwind_tables
= 1;
1014 if (flag_pcc_struct_return
== 2)
1015 flag_pcc_struct_return
= 0;
1019 if (flag_omit_frame_pointer
== 2)
1020 flag_omit_frame_pointer
= 0;
1021 if (flag_asynchronous_unwind_tables
== 2)
1022 flag_asynchronous_unwind_tables
= 0;
1023 if (flag_pcc_struct_return
== 2)
1024 flag_pcc_struct_return
= 1;
1027 #ifdef SUBTARGET_OVERRIDE_OPTIONS
1028 SUBTARGET_OVERRIDE_OPTIONS
;
1031 if (!ix86_cpu_string
&& ix86_arch_string
)
1032 ix86_cpu_string
= ix86_arch_string
;
1033 if (!ix86_cpu_string
)
1034 ix86_cpu_string
= cpu_names
[TARGET_CPU_DEFAULT
];
1035 if (!ix86_arch_string
)
1036 ix86_arch_string
= TARGET_64BIT
? "athlon-4" : "i386";
1038 if (ix86_cmodel_string
!= 0)
1040 if (!strcmp (ix86_cmodel_string
, "small"))
1041 ix86_cmodel
= flag_pic
? CM_SMALL_PIC
: CM_SMALL
;
1043 sorry ("code model %s not supported in PIC mode", ix86_cmodel_string
);
1044 else if (!strcmp (ix86_cmodel_string
, "32"))
1045 ix86_cmodel
= CM_32
;
1046 else if (!strcmp (ix86_cmodel_string
, "kernel") && !flag_pic
)
1047 ix86_cmodel
= CM_KERNEL
;
1048 else if (!strcmp (ix86_cmodel_string
, "medium") && !flag_pic
)
1049 ix86_cmodel
= CM_MEDIUM
;
1050 else if (!strcmp (ix86_cmodel_string
, "large") && !flag_pic
)
1051 ix86_cmodel
= CM_LARGE
;
1053 error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string
);
1057 ix86_cmodel
= CM_32
;
1059 ix86_cmodel
= flag_pic
? CM_SMALL_PIC
: CM_SMALL
;
1061 if (ix86_asm_string
!= 0)
1063 if (!strcmp (ix86_asm_string
, "intel"))
1064 ix86_asm_dialect
= ASM_INTEL
;
1065 else if (!strcmp (ix86_asm_string
, "att"))
1066 ix86_asm_dialect
= ASM_ATT
;
1068 error ("bad value (%s) for -masm= switch", ix86_asm_string
);
1070 if ((TARGET_64BIT
== 0) != (ix86_cmodel
== CM_32
))
1071 error ("code model `%s' not supported in the %s bit mode",
1072 ix86_cmodel_string
, TARGET_64BIT
? "64" : "32");
1073 if (ix86_cmodel
== CM_LARGE
)
1074 sorry ("code model `large' not supported yet");
1075 if ((TARGET_64BIT
!= 0) != ((target_flags
& MASK_64BIT
) != 0))
1076 sorry ("%i-bit mode not compiled in",
1077 (target_flags
& MASK_64BIT
) ? 64 : 32);
1079 for (i
= 0; i
< pta_size
; i
++)
1080 if (! strcmp (ix86_arch_string
, processor_alias_table
[i
].name
))
1082 ix86_arch
= processor_alias_table
[i
].processor
;
1083 /* Default cpu tuning to the architecture. */
1084 ix86_cpu
= ix86_arch
;
1085 if (processor_alias_table
[i
].flags
& PTA_MMX
1086 && !(target_flags_explicit
& MASK_MMX
))
1087 target_flags
|= MASK_MMX
;
1088 if (processor_alias_table
[i
].flags
& PTA_3DNOW
1089 && !(target_flags_explicit
& MASK_3DNOW
))
1090 target_flags
|= MASK_3DNOW
;
1091 if (processor_alias_table
[i
].flags
& PTA_3DNOW_A
1092 && !(target_flags_explicit
& MASK_3DNOW_A
))
1093 target_flags
|= MASK_3DNOW_A
;
1094 if (processor_alias_table
[i
].flags
& PTA_SSE
1095 && !(target_flags_explicit
& MASK_SSE
))
1096 target_flags
|= MASK_SSE
;
1097 if (processor_alias_table
[i
].flags
& PTA_SSE2
1098 && !(target_flags_explicit
& MASK_SSE2
))
1099 target_flags
|= MASK_SSE2
;
1100 if (processor_alias_table
[i
].flags
& PTA_PREFETCH_SSE
)
1101 x86_prefetch_sse
= true;
1106 error ("bad value (%s) for -march= switch", ix86_arch_string
);
1108 for (i
= 0; i
< pta_size
; i
++)
1109 if (! strcmp (ix86_cpu_string
, processor_alias_table
[i
].name
))
1111 ix86_cpu
= processor_alias_table
[i
].processor
;
1114 if (processor_alias_table
[i
].flags
& PTA_PREFETCH_SSE
)
1115 x86_prefetch_sse
= true;
1117 error ("bad value (%s) for -mcpu= switch", ix86_cpu_string
);
1120 ix86_cost
= &size_cost
;
1122 ix86_cost
= processor_target_table
[ix86_cpu
].cost
;
1123 target_flags
|= processor_target_table
[ix86_cpu
].target_enable
;
1124 target_flags
&= ~processor_target_table
[ix86_cpu
].target_disable
;
1126 /* Arrange to set up i386_stack_locals for all functions. */
1127 init_machine_status
= ix86_init_machine_status
;
1129 /* Validate -mregparm= value. */
1130 if (ix86_regparm_string
)
1132 i
= atoi (ix86_regparm_string
);
1133 if (i
< 0 || i
> REGPARM_MAX
)
1134 error ("-mregparm=%d is not between 0 and %d", i
, REGPARM_MAX
);
1140 ix86_regparm
= REGPARM_MAX
;
1142 /* If the user has provided any of the -malign-* options,
1143 warn and use that value only if -falign-* is not set.
1144 Remove this code in GCC 3.2 or later. */
1145 if (ix86_align_loops_string
)
1147 warning ("-malign-loops is obsolete, use -falign-loops");
1148 if (align_loops
== 0)
1150 i
= atoi (ix86_align_loops_string
);
1151 if (i
< 0 || i
> MAX_CODE_ALIGN
)
1152 error ("-malign-loops=%d is not between 0 and %d", i
, MAX_CODE_ALIGN
);
1154 align_loops
= 1 << i
;
1158 if (ix86_align_jumps_string
)
1160 warning ("-malign-jumps is obsolete, use -falign-jumps");
1161 if (align_jumps
== 0)
1163 i
= atoi (ix86_align_jumps_string
);
1164 if (i
< 0 || i
> MAX_CODE_ALIGN
)
1165 error ("-malign-loops=%d is not between 0 and %d", i
, MAX_CODE_ALIGN
);
1167 align_jumps
= 1 << i
;
1171 if (ix86_align_funcs_string
)
1173 warning ("-malign-functions is obsolete, use -falign-functions");
1174 if (align_functions
== 0)
1176 i
= atoi (ix86_align_funcs_string
);
1177 if (i
< 0 || i
> MAX_CODE_ALIGN
)
1178 error ("-malign-loops=%d is not between 0 and %d", i
, MAX_CODE_ALIGN
);
1180 align_functions
= 1 << i
;
1184 /* Default align_* from the processor table. */
1185 if (align_loops
== 0)
1187 align_loops
= processor_target_table
[ix86_cpu
].align_loop
;
1188 align_loops_max_skip
= processor_target_table
[ix86_cpu
].align_loop_max_skip
;
1190 if (align_jumps
== 0)
1192 align_jumps
= processor_target_table
[ix86_cpu
].align_jump
;
1193 align_jumps_max_skip
= processor_target_table
[ix86_cpu
].align_jump_max_skip
;
1195 if (align_functions
== 0)
1197 align_functions
= processor_target_table
[ix86_cpu
].align_func
;
1200 /* Validate -mpreferred-stack-boundary= value, or provide default.
1201 The default of 128 bits is for Pentium III's SSE __m128, but we
1202 don't want additional code to keep the stack aligned when
1203 optimizing for code size. */
1204 ix86_preferred_stack_boundary
= (optimize_size
1205 ? TARGET_64BIT
? 128 : 32
1207 if (ix86_preferred_stack_boundary_string
)
1209 i
= atoi (ix86_preferred_stack_boundary_string
);
1210 if (i
< (TARGET_64BIT
? 4 : 2) || i
> 12)
1211 error ("-mpreferred-stack-boundary=%d is not between %d and 12", i
,
1212 TARGET_64BIT
? 4 : 2);
1214 ix86_preferred_stack_boundary
= (1 << i
) * BITS_PER_UNIT
;
1217 /* Validate -mbranch-cost= value, or provide default. */
1218 ix86_branch_cost
= processor_target_table
[ix86_cpu
].branch_cost
;
1219 if (ix86_branch_cost_string
)
1221 i
= atoi (ix86_branch_cost_string
);
1223 error ("-mbranch-cost=%d is not between 0 and 5", i
);
1225 ix86_branch_cost
= i
;
1228 if (ix86_tls_dialect_string
)
1230 if (strcmp (ix86_tls_dialect_string
, "gnu") == 0)
1231 ix86_tls_dialect
= TLS_DIALECT_GNU
;
1232 else if (strcmp (ix86_tls_dialect_string
, "sun") == 0)
1233 ix86_tls_dialect
= TLS_DIALECT_SUN
;
1235 error ("bad value (%s) for -mtls-dialect= switch",
1236 ix86_tls_dialect_string
);
1239 /* Keep nonleaf frame pointers. */
1240 if (TARGET_OMIT_LEAF_FRAME_POINTER
)
1241 flag_omit_frame_pointer
= 1;
1243 /* If we're doing fast math, we don't care about comparison order
1244 wrt NaNs. This lets us use a shorter comparison sequence. */
1245 if (flag_unsafe_math_optimizations
)
1246 target_flags
&= ~MASK_IEEE_FP
;
1248 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
1249 since the insns won't need emulation. */
1250 if (x86_arch_always_fancy_math_387
& (1 << ix86_arch
))
1251 target_flags
&= ~MASK_NO_FANCY_MATH_387
;
1255 if (TARGET_ALIGN_DOUBLE
)
1256 error ("-malign-double makes no sense in the 64bit mode");
1258 error ("-mrtd calling convention not supported in the 64bit mode");
1259 /* Enable by default the SSE and MMX builtins. */
1260 target_flags
|= (MASK_SSE2
| MASK_SSE
| MASK_MMX
| MASK_128BIT_LONG_DOUBLE
);
1261 ix86_fpmath
= FPMATH_SSE
;
1264 ix86_fpmath
= FPMATH_387
;
1266 if (ix86_fpmath_string
!= 0)
1268 if (! strcmp (ix86_fpmath_string
, "387"))
1269 ix86_fpmath
= FPMATH_387
;
1270 else if (! strcmp (ix86_fpmath_string
, "sse"))
1274 warning ("SSE instruction set disabled, using 387 arithmetics");
1275 ix86_fpmath
= FPMATH_387
;
1278 ix86_fpmath
= FPMATH_SSE
;
1280 else if (! strcmp (ix86_fpmath_string
, "387,sse")
1281 || ! strcmp (ix86_fpmath_string
, "sse,387"))
1285 warning ("SSE instruction set disabled, using 387 arithmetics");
1286 ix86_fpmath
= FPMATH_387
;
1288 else if (!TARGET_80387
)
1290 warning ("387 instruction set disabled, using SSE arithmetics");
1291 ix86_fpmath
= FPMATH_SSE
;
1294 ix86_fpmath
= FPMATH_SSE
| FPMATH_387
;
1297 error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string
);
1300 /* It makes no sense to ask for just SSE builtins, so MMX is also turned
1304 target_flags
|= MASK_MMX
;
1305 x86_prefetch_sse
= true;
1308 /* If it has 3DNow! it also has MMX so MMX is also turned on by -m3dnow */
1311 target_flags
|= MASK_MMX
;
1312 /* If we are targetting the Athlon architecture, enable the 3Dnow/MMX
1313 extensions it adds. */
1314 if (x86_3dnow_a
& (1 << ix86_arch
))
1315 target_flags
|= MASK_3DNOW_A
;
1317 if ((x86_accumulate_outgoing_args
& CPUMASK
)
1318 && !(target_flags_explicit
& MASK_ACCUMULATE_OUTGOING_ARGS
)
1320 target_flags
|= MASK_ACCUMULATE_OUTGOING_ARGS
;
1322 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
1325 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix
, "LX", 0);
1326 p
= strchr (internal_label_prefix
, 'X');
1327 internal_label_prefix_len
= p
- internal_label_prefix
;
1333 optimization_options (level
, size
)
1335 int size ATTRIBUTE_UNUSED
;
1337 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
1338 make the problem with not enough registers even worse. */
1339 #ifdef INSN_SCHEDULING
1341 flag_schedule_insns
= 0;
1344 /* The default values of these switches depend on the TARGET_64BIT
1345 that is not known at this moment. Mark these values with 2 and
1346 let user the to override these. In case there is no command line option
1347 specifying them, we will set the defaults in override_options. */
1349 flag_omit_frame_pointer
= 2;
1350 flag_pcc_struct_return
= 2;
1351 flag_asynchronous_unwind_tables
= 2;
1354 /* Table of valid machine attributes. */
1355 const struct attribute_spec ix86_attribute_table
[] =
1357 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
1358 /* Stdcall attribute says callee is responsible for popping arguments
1359 if they are not variable. */
1360 { "stdcall", 0, 0, false, true, true, ix86_handle_cdecl_attribute
},
1361 /* Cdecl attribute says the callee is a normal C declaration */
1362 { "cdecl", 0, 0, false, true, true, ix86_handle_cdecl_attribute
},
1363 /* Regparm attribute specifies how many integer arguments are to be
1364 passed in registers. */
1365 { "regparm", 1, 1, false, true, true, ix86_handle_regparm_attribute
},
1366 #ifdef TARGET_DLLIMPORT_DECL_ATTRIBUTES
1367 { "dllimport", 0, 0, false, false, false, ix86_handle_dll_attribute
},
1368 { "dllexport", 0, 0, false, false, false, ix86_handle_dll_attribute
},
1369 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute
},
1371 { NULL
, 0, 0, false, false, false, NULL
}
1374 /* Handle a "cdecl" or "stdcall" attribute;
1375 arguments as in struct attribute_spec.handler. */
1377 ix86_handle_cdecl_attribute (node
, name
, args
, flags
, no_add_attrs
)
1380 tree args ATTRIBUTE_UNUSED
;
1381 int flags ATTRIBUTE_UNUSED
;
1384 if (TREE_CODE (*node
) != FUNCTION_TYPE
1385 && TREE_CODE (*node
) != METHOD_TYPE
1386 && TREE_CODE (*node
) != FIELD_DECL
1387 && TREE_CODE (*node
) != TYPE_DECL
)
1389 warning ("`%s' attribute only applies to functions",
1390 IDENTIFIER_POINTER (name
));
1391 *no_add_attrs
= true;
1396 warning ("`%s' attribute ignored", IDENTIFIER_POINTER (name
));
1397 *no_add_attrs
= true;
1403 /* Handle a "regparm" attribute;
1404 arguments as in struct attribute_spec.handler. */
1406 ix86_handle_regparm_attribute (node
, name
, args
, flags
, no_add_attrs
)
1410 int flags ATTRIBUTE_UNUSED
;
1413 if (TREE_CODE (*node
) != FUNCTION_TYPE
1414 && TREE_CODE (*node
) != METHOD_TYPE
1415 && TREE_CODE (*node
) != FIELD_DECL
1416 && TREE_CODE (*node
) != TYPE_DECL
)
1418 warning ("`%s' attribute only applies to functions",
1419 IDENTIFIER_POINTER (name
));
1420 *no_add_attrs
= true;
1426 cst
= TREE_VALUE (args
);
1427 if (TREE_CODE (cst
) != INTEGER_CST
)
1429 warning ("`%s' attribute requires an integer constant argument",
1430 IDENTIFIER_POINTER (name
));
1431 *no_add_attrs
= true;
1433 else if (compare_tree_int (cst
, REGPARM_MAX
) > 0)
1435 warning ("argument to `%s' attribute larger than %d",
1436 IDENTIFIER_POINTER (name
), REGPARM_MAX
);
1437 *no_add_attrs
= true;
1444 /* Return 0 if the attributes for two types are incompatible, 1 if they
1445 are compatible, and 2 if they are nearly compatible (which causes a
1446 warning to be generated). */
1449 ix86_comp_type_attributes (type1
, type2
)
1453 /* Check for mismatch of non-default calling convention. */
1454 const char *const rtdstr
= TARGET_RTD
? "cdecl" : "stdcall";
1456 if (TREE_CODE (type1
) != FUNCTION_TYPE
)
1459 /* Check for mismatched return types (cdecl vs stdcall). */
1460 if (!lookup_attribute (rtdstr
, TYPE_ATTRIBUTES (type1
))
1461 != !lookup_attribute (rtdstr
, TYPE_ATTRIBUTES (type2
)))
1466 /* Return the regparm value for a fuctio with the indicated TYPE. */
1469 ix86_fntype_regparm (type
)
1474 attr
= lookup_attribute ("regparm", TYPE_ATTRIBUTES (type
));
1476 return TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr
)));
1478 return ix86_regparm
;
1481 /* Value is the number of bytes of arguments automatically
1482 popped when returning from a subroutine call.
1483 FUNDECL is the declaration node of the function (as a tree),
1484 FUNTYPE is the data type of the function (as a tree),
1485 or for a library call it is an identifier node for the subroutine name.
1486 SIZE is the number of bytes of arguments passed on the stack.
1488 On the 80386, the RTD insn may be used to pop them if the number
1489 of args is fixed, but if the number is variable then the caller
1490 must pop them all. RTD can't be used for library calls now
1491 because the library is compiled with the Unix compiler.
1492 Use of RTD is a selectable option, since it is incompatible with
1493 standard Unix calling sequences. If the option is not selected,
1494 the caller must always pop the args.
1496 The attribute stdcall is equivalent to RTD on a per module basis. */
1499 ix86_return_pops_args (fundecl
, funtype
, size
)
1504 int rtd
= TARGET_RTD
&& (!fundecl
|| TREE_CODE (fundecl
) != IDENTIFIER_NODE
);
1506 /* Cdecl functions override -mrtd, and never pop the stack. */
1507 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype
))) {
1509 /* Stdcall functions will pop the stack if not variable args. */
1510 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype
)))
1514 && (TYPE_ARG_TYPES (funtype
) == NULL_TREE
1515 || (TREE_VALUE (tree_last (TYPE_ARG_TYPES (funtype
)))
1516 == void_type_node
)))
1520 /* Lose any fake structure return argument if it is passed on the stack. */
1521 if (aggregate_value_p (TREE_TYPE (funtype
))
1524 int nregs
= ix86_fntype_regparm (funtype
);
1527 return GET_MODE_SIZE (Pmode
);
1533 /* Argument support functions. */
1535 /* Return true when register may be used to pass function parameters. */
1537 ix86_function_arg_regno_p (regno
)
1542 return (regno
< REGPARM_MAX
1543 || (TARGET_SSE
&& SSE_REGNO_P (regno
) && !fixed_regs
[regno
]));
1544 if (SSE_REGNO_P (regno
) && TARGET_SSE
)
1546 /* RAX is used as hidden argument to va_arg functions. */
1549 for (i
= 0; i
< REGPARM_MAX
; i
++)
1550 if (regno
== x86_64_int_parameter_registers
[i
])
1555 /* Initialize a variable CUM of type CUMULATIVE_ARGS
1556 for a call to a function whose data type is FNTYPE.
1557 For a library call, FNTYPE is 0. */
1560 init_cumulative_args (cum
, fntype
, libname
)
1561 CUMULATIVE_ARGS
*cum
; /* Argument info to initialize */
1562 tree fntype
; /* tree ptr for function decl */
1563 rtx libname
; /* SYMBOL_REF of library name or 0 */
1565 static CUMULATIVE_ARGS zero_cum
;
1566 tree param
, next_param
;
1568 if (TARGET_DEBUG_ARG
)
1570 fprintf (stderr
, "\ninit_cumulative_args (");
1572 fprintf (stderr
, "fntype code = %s, ret code = %s",
1573 tree_code_name
[(int) TREE_CODE (fntype
)],
1574 tree_code_name
[(int) TREE_CODE (TREE_TYPE (fntype
))]);
1576 fprintf (stderr
, "no fntype");
1579 fprintf (stderr
, ", libname = %s", XSTR (libname
, 0));
1584 /* Set up the number of registers to use for passing arguments. */
1585 cum
->nregs
= ix86_regparm
;
1586 cum
->sse_nregs
= SSE_REGPARM_MAX
;
1587 if (fntype
&& !TARGET_64BIT
)
1589 tree attr
= lookup_attribute ("regparm", TYPE_ATTRIBUTES (fntype
));
1592 cum
->nregs
= TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr
)));
1594 cum
->maybe_vaarg
= false;
1596 /* Determine if this function has variable arguments. This is
1597 indicated by the last argument being 'void_type_mode' if there
1598 are no variable arguments. If there are variable arguments, then
1599 we won't pass anything in registers */
1603 for (param
= (fntype
) ? TYPE_ARG_TYPES (fntype
) : 0;
1604 param
!= 0; param
= next_param
)
1606 next_param
= TREE_CHAIN (param
);
1607 if (next_param
== 0 && TREE_VALUE (param
) != void_type_node
)
1611 cum
->maybe_vaarg
= true;
1615 if ((!fntype
&& !libname
)
1616 || (fntype
&& !TYPE_ARG_TYPES (fntype
)))
1617 cum
->maybe_vaarg
= 1;
1619 if (TARGET_DEBUG_ARG
)
1620 fprintf (stderr
, ", nregs=%d )\n", cum
->nregs
);
1625 /* x86-64 register passing impleemntation. See x86-64 ABI for details. Goal
1626 of this code is to classify each 8bytes of incoming argument by the register
1627 class and assign registers accordingly. */
1629 /* Return the union class of CLASS1 and CLASS2.
1630 See the x86-64 PS ABI for details. */
1632 static enum x86_64_reg_class
1633 merge_classes (class1
, class2
)
1634 enum x86_64_reg_class class1
, class2
;
1636 /* Rule #1: If both classes are equal, this is the resulting class. */
1637 if (class1
== class2
)
1640 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
1642 if (class1
== X86_64_NO_CLASS
)
1644 if (class2
== X86_64_NO_CLASS
)
1647 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
1648 if (class1
== X86_64_MEMORY_CLASS
|| class2
== X86_64_MEMORY_CLASS
)
1649 return X86_64_MEMORY_CLASS
;
1651 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
1652 if ((class1
== X86_64_INTEGERSI_CLASS
&& class2
== X86_64_SSESF_CLASS
)
1653 || (class2
== X86_64_INTEGERSI_CLASS
&& class1
== X86_64_SSESF_CLASS
))
1654 return X86_64_INTEGERSI_CLASS
;
1655 if (class1
== X86_64_INTEGER_CLASS
|| class1
== X86_64_INTEGERSI_CLASS
1656 || class2
== X86_64_INTEGER_CLASS
|| class2
== X86_64_INTEGERSI_CLASS
)
1657 return X86_64_INTEGER_CLASS
;
1659 /* Rule #5: If one of the classes is X87 or X87UP class, MEMORY is used. */
1660 if (class1
== X86_64_X87_CLASS
|| class1
== X86_64_X87UP_CLASS
1661 || class2
== X86_64_X87_CLASS
|| class2
== X86_64_X87UP_CLASS
)
1662 return X86_64_MEMORY_CLASS
;
1664 /* Rule #6: Otherwise class SSE is used. */
1665 return X86_64_SSE_CLASS
;
1668 /* Classify the argument of type TYPE and mode MODE.
1669 CLASSES will be filled by the register class used to pass each word
1670 of the operand. The number of words is returned. In case the parameter
1671 should be passed in memory, 0 is returned. As a special case for zero
1672 sized containers, classes[0] will be NO_CLASS and 1 is returned.
1674 BIT_OFFSET is used internally for handling records and specifies offset
1675 of the offset in bits modulo 256 to avoid overflow cases.
1677 See the x86-64 PS ABI for details.
1681 classify_argument (mode
, type
, classes
, bit_offset
)
1682 enum machine_mode mode
;
1684 enum x86_64_reg_class classes
[MAX_CLASSES
];
1688 (mode
== BLKmode
) ? int_size_in_bytes (type
) : (int) GET_MODE_SIZE (mode
);
1689 int words
= (bytes
+ (bit_offset
% 64) / 8 + UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
1691 /* Variable sized entities are always passed/returned in memory. */
1695 if (type
&& AGGREGATE_TYPE_P (type
))
1699 enum x86_64_reg_class subclasses
[MAX_CLASSES
];
1701 /* On x86-64 we pass structures larger than 16 bytes on the stack. */
1705 for (i
= 0; i
< words
; i
++)
1706 classes
[i
] = X86_64_NO_CLASS
;
1708 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
1709 signalize memory class, so handle it as special case. */
1712 classes
[0] = X86_64_NO_CLASS
;
1716 /* Classify each field of record and merge classes. */
1717 if (TREE_CODE (type
) == RECORD_TYPE
)
1719 /* For classes first merge in the field of the subclasses. */
1720 if (TYPE_BINFO (type
) != NULL
&& TYPE_BINFO_BASETYPES (type
) != NULL
)
1722 tree bases
= TYPE_BINFO_BASETYPES (type
);
1723 int n_bases
= TREE_VEC_LENGTH (bases
);
1726 for (i
= 0; i
< n_bases
; ++i
)
1728 tree binfo
= TREE_VEC_ELT (bases
, i
);
1730 int offset
= tree_low_cst (BINFO_OFFSET (binfo
), 0) * 8;
1731 tree type
= BINFO_TYPE (binfo
);
1733 num
= classify_argument (TYPE_MODE (type
),
1735 (offset
+ bit_offset
) % 256);
1738 for (i
= 0; i
< num
; i
++)
1740 int pos
= (offset
+ (bit_offset
% 64)) / 8 / 8;
1742 merge_classes (subclasses
[i
], classes
[i
+ pos
]);
1746 /* And now merge the fields of structure. */
1747 for (field
= TYPE_FIELDS (type
); field
; field
= TREE_CHAIN (field
))
1749 if (TREE_CODE (field
) == FIELD_DECL
)
1753 /* Bitfields are always classified as integer. Handle them
1754 early, since later code would consider them to be
1755 misaligned integers. */
1756 if (DECL_BIT_FIELD (field
))
1758 for (i
= int_bit_position (field
) / 8 / 8;
1759 i
< (int_bit_position (field
)
1760 + tree_low_cst (DECL_SIZE (field
), 0)
1763 merge_classes (X86_64_INTEGER_CLASS
,
1768 num
= classify_argument (TYPE_MODE (TREE_TYPE (field
)),
1769 TREE_TYPE (field
), subclasses
,
1770 (int_bit_position (field
)
1771 + bit_offset
) % 256);
1774 for (i
= 0; i
< num
; i
++)
1777 (int_bit_position (field
) + (bit_offset
% 64)) / 8 / 8;
1779 merge_classes (subclasses
[i
], classes
[i
+ pos
]);
1785 /* Arrays are handled as small records. */
1786 else if (TREE_CODE (type
) == ARRAY_TYPE
)
1789 num
= classify_argument (TYPE_MODE (TREE_TYPE (type
)),
1790 TREE_TYPE (type
), subclasses
, bit_offset
);
1794 /* The partial classes are now full classes. */
1795 if (subclasses
[0] == X86_64_SSESF_CLASS
&& bytes
!= 4)
1796 subclasses
[0] = X86_64_SSE_CLASS
;
1797 if (subclasses
[0] == X86_64_INTEGERSI_CLASS
&& bytes
!= 4)
1798 subclasses
[0] = X86_64_INTEGER_CLASS
;
1800 for (i
= 0; i
< words
; i
++)
1801 classes
[i
] = subclasses
[i
% num
];
1803 /* Unions are similar to RECORD_TYPE but offset is always 0. */
1804 else if (TREE_CODE (type
) == UNION_TYPE
1805 || TREE_CODE (type
) == QUAL_UNION_TYPE
)
1807 /* For classes first merge in the field of the subclasses. */
1808 if (TYPE_BINFO (type
) != NULL
&& TYPE_BINFO_BASETYPES (type
) != NULL
)
1810 tree bases
= TYPE_BINFO_BASETYPES (type
);
1811 int n_bases
= TREE_VEC_LENGTH (bases
);
1814 for (i
= 0; i
< n_bases
; ++i
)
1816 tree binfo
= TREE_VEC_ELT (bases
, i
);
1818 int offset
= tree_low_cst (BINFO_OFFSET (binfo
), 0) * 8;
1819 tree type
= BINFO_TYPE (binfo
);
1821 num
= classify_argument (TYPE_MODE (type
),
1823 (offset
+ (bit_offset
% 64)) % 256);
1826 for (i
= 0; i
< num
; i
++)
1828 int pos
= (offset
+ (bit_offset
% 64)) / 8 / 8;
1830 merge_classes (subclasses
[i
], classes
[i
+ pos
]);
1834 for (field
= TYPE_FIELDS (type
); field
; field
= TREE_CHAIN (field
))
1836 if (TREE_CODE (field
) == FIELD_DECL
)
1839 num
= classify_argument (TYPE_MODE (TREE_TYPE (field
)),
1840 TREE_TYPE (field
), subclasses
,
1844 for (i
= 0; i
< num
; i
++)
1845 classes
[i
] = merge_classes (subclasses
[i
], classes
[i
]);
1852 /* Final merger cleanup. */
1853 for (i
= 0; i
< words
; i
++)
1855 /* If one class is MEMORY, everything should be passed in
1857 if (classes
[i
] == X86_64_MEMORY_CLASS
)
1860 /* The X86_64_SSEUP_CLASS should be always preceded by
1861 X86_64_SSE_CLASS. */
1862 if (classes
[i
] == X86_64_SSEUP_CLASS
1863 && (i
== 0 || classes
[i
- 1] != X86_64_SSE_CLASS
))
1864 classes
[i
] = X86_64_SSE_CLASS
;
1866 /* X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS. */
1867 if (classes
[i
] == X86_64_X87UP_CLASS
1868 && (i
== 0 || classes
[i
- 1] != X86_64_X87_CLASS
))
1869 classes
[i
] = X86_64_SSE_CLASS
;
1874 /* Compute alignment needed. We align all types to natural boundaries with
1875 exception of XFmode that is aligned to 64bits. */
1876 if (mode
!= VOIDmode
&& mode
!= BLKmode
)
1878 int mode_alignment
= GET_MODE_BITSIZE (mode
);
1881 mode_alignment
= 128;
1882 else if (mode
== XCmode
)
1883 mode_alignment
= 256;
1884 /* Misaligned fields are always returned in memory. */
1885 if (bit_offset
% mode_alignment
)
1889 /* Classification of atomic types. */
1899 if (bit_offset
+ GET_MODE_BITSIZE (mode
) <= 32)
1900 classes
[0] = X86_64_INTEGERSI_CLASS
;
1902 classes
[0] = X86_64_INTEGER_CLASS
;
1906 classes
[0] = classes
[1] = X86_64_INTEGER_CLASS
;
1909 classes
[0] = classes
[1] = X86_64_INTEGER_CLASS
;
1910 classes
[2] = classes
[3] = X86_64_INTEGER_CLASS
;
1913 if (!(bit_offset
% 64))
1914 classes
[0] = X86_64_SSESF_CLASS
;
1916 classes
[0] = X86_64_SSE_CLASS
;
1919 classes
[0] = X86_64_SSEDF_CLASS
;
1922 classes
[0] = X86_64_X87_CLASS
;
1923 classes
[1] = X86_64_X87UP_CLASS
;
1926 classes
[0] = X86_64_X87_CLASS
;
1927 classes
[1] = X86_64_X87UP_CLASS
;
1928 classes
[2] = X86_64_X87_CLASS
;
1929 classes
[3] = X86_64_X87UP_CLASS
;
1932 classes
[0] = X86_64_SSEDF_CLASS
;
1933 classes
[1] = X86_64_SSEDF_CLASS
;
1936 classes
[0] = X86_64_SSE_CLASS
;
1944 classes
[0] = X86_64_SSE_CLASS
;
1945 classes
[1] = X86_64_SSEUP_CLASS
;
1960 /* Examine the argument and return set number of register required in each
1961 class. Return 0 iff parameter should be passed in memory. */
1963 examine_argument (mode
, type
, in_return
, int_nregs
, sse_nregs
)
1964 enum machine_mode mode
;
1966 int *int_nregs
, *sse_nregs
;
1969 enum x86_64_reg_class
class[MAX_CLASSES
];
1970 int n
= classify_argument (mode
, type
, class, 0);
1976 for (n
--; n
>= 0; n
--)
1979 case X86_64_INTEGER_CLASS
:
1980 case X86_64_INTEGERSI_CLASS
:
1983 case X86_64_SSE_CLASS
:
1984 case X86_64_SSESF_CLASS
:
1985 case X86_64_SSEDF_CLASS
:
1988 case X86_64_NO_CLASS
:
1989 case X86_64_SSEUP_CLASS
:
1991 case X86_64_X87_CLASS
:
1992 case X86_64_X87UP_CLASS
:
1996 case X86_64_MEMORY_CLASS
:
2001 /* Construct container for the argument used by GCC interface. See
2002 FUNCTION_ARG for the detailed description. */
2004 construct_container (mode
, type
, in_return
, nintregs
, nsseregs
, intreg
, sse_regno
)
2005 enum machine_mode mode
;
2008 int nintregs
, nsseregs
;
2012 enum machine_mode tmpmode
;
2014 (mode
== BLKmode
) ? int_size_in_bytes (type
) : (int) GET_MODE_SIZE (mode
);
2015 enum x86_64_reg_class
class[MAX_CLASSES
];
2019 int needed_sseregs
, needed_intregs
;
2020 rtx exp
[MAX_CLASSES
];
2023 n
= classify_argument (mode
, type
, class, 0);
2024 if (TARGET_DEBUG_ARG
)
2027 fprintf (stderr
, "Memory class\n");
2030 fprintf (stderr
, "Classes:");
2031 for (i
= 0; i
< n
; i
++)
2033 fprintf (stderr
, " %s", x86_64_reg_class_name
[class[i
]]);
2035 fprintf (stderr
, "\n");
2040 if (!examine_argument (mode
, type
, in_return
, &needed_intregs
, &needed_sseregs
))
2042 if (needed_intregs
> nintregs
|| needed_sseregs
> nsseregs
)
2045 /* First construct simple cases. Avoid SCmode, since we want to use
2046 single register to pass this type. */
2047 if (n
== 1 && mode
!= SCmode
)
2050 case X86_64_INTEGER_CLASS
:
2051 case X86_64_INTEGERSI_CLASS
:
2052 return gen_rtx_REG (mode
, intreg
[0]);
2053 case X86_64_SSE_CLASS
:
2054 case X86_64_SSESF_CLASS
:
2055 case X86_64_SSEDF_CLASS
:
2056 return gen_rtx_REG (mode
, SSE_REGNO (sse_regno
));
2057 case X86_64_X87_CLASS
:
2058 return gen_rtx_REG (mode
, FIRST_STACK_REG
);
2059 case X86_64_NO_CLASS
:
2060 /* Zero sized array, struct or class. */
2065 if (n
== 2 && class[0] == X86_64_SSE_CLASS
&& class[1] == X86_64_SSEUP_CLASS
)
2066 return gen_rtx_REG (mode
, SSE_REGNO (sse_regno
));
2068 && class[0] == X86_64_X87_CLASS
&& class[1] == X86_64_X87UP_CLASS
)
2069 return gen_rtx_REG (TFmode
, FIRST_STACK_REG
);
2070 if (n
== 2 && class[0] == X86_64_INTEGER_CLASS
2071 && class[1] == X86_64_INTEGER_CLASS
2072 && (mode
== CDImode
|| mode
== TImode
)
2073 && intreg
[0] + 1 == intreg
[1])
2074 return gen_rtx_REG (mode
, intreg
[0]);
2076 && class[0] == X86_64_X87_CLASS
&& class[1] == X86_64_X87UP_CLASS
2077 && class[2] == X86_64_X87_CLASS
&& class[3] == X86_64_X87UP_CLASS
)
2078 return gen_rtx_REG (TCmode
, FIRST_STACK_REG
);
2080 /* Otherwise figure out the entries of the PARALLEL. */
2081 for (i
= 0; i
< n
; i
++)
2085 case X86_64_NO_CLASS
:
2087 case X86_64_INTEGER_CLASS
:
2088 case X86_64_INTEGERSI_CLASS
:
2089 /* Merge TImodes on aligned occassions here too. */
2090 if (i
* 8 + 8 > bytes
)
2091 tmpmode
= mode_for_size ((bytes
- i
* 8) * BITS_PER_UNIT
, MODE_INT
, 0);
2092 else if (class[i
] == X86_64_INTEGERSI_CLASS
)
2096 /* We've requested 24 bytes we don't have mode for. Use DImode. */
2097 if (tmpmode
== BLKmode
)
2099 exp
[nexps
++] = gen_rtx_EXPR_LIST (VOIDmode
,
2100 gen_rtx_REG (tmpmode
, *intreg
),
2104 case X86_64_SSESF_CLASS
:
2105 exp
[nexps
++] = gen_rtx_EXPR_LIST (VOIDmode
,
2106 gen_rtx_REG (SFmode
,
2107 SSE_REGNO (sse_regno
)),
2111 case X86_64_SSEDF_CLASS
:
2112 exp
[nexps
++] = gen_rtx_EXPR_LIST (VOIDmode
,
2113 gen_rtx_REG (DFmode
,
2114 SSE_REGNO (sse_regno
)),
2118 case X86_64_SSE_CLASS
:
2119 if (i
< n
- 1 && class[i
+ 1] == X86_64_SSEUP_CLASS
)
2123 exp
[nexps
++] = gen_rtx_EXPR_LIST (VOIDmode
,
2124 gen_rtx_REG (tmpmode
,
2125 SSE_REGNO (sse_regno
)),
2127 if (tmpmode
== TImode
)
2135 ret
= gen_rtx_PARALLEL (mode
, rtvec_alloc (nexps
));
2136 for (i
= 0; i
< nexps
; i
++)
2137 XVECEXP (ret
, 0, i
) = exp
[i
];
2141 /* Update the data in CUM to advance over an argument
2142 of mode MODE and data type TYPE.
2143 (TYPE is null for libcalls where that information may not be available.) */
2146 function_arg_advance (cum
, mode
, type
, named
)
2147 CUMULATIVE_ARGS
*cum
; /* current arg information */
2148 enum machine_mode mode
; /* current arg mode */
2149 tree type
; /* type of the argument or 0 if lib support */
2150 int named
; /* whether or not the argument was named */
2153 (mode
== BLKmode
) ? int_size_in_bytes (type
) : (int) GET_MODE_SIZE (mode
);
2154 int words
= (bytes
+ UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
2156 if (TARGET_DEBUG_ARG
)
2158 "function_adv (sz=%d, wds=%2d, nregs=%d, mode=%s, named=%d)\n\n",
2159 words
, cum
->words
, cum
->nregs
, GET_MODE_NAME (mode
), named
);
2162 int int_nregs
, sse_nregs
;
2163 if (!examine_argument (mode
, type
, 0, &int_nregs
, &sse_nregs
))
2164 cum
->words
+= words
;
2165 else if (sse_nregs
<= cum
->sse_nregs
&& int_nregs
<= cum
->nregs
)
2167 cum
->nregs
-= int_nregs
;
2168 cum
->sse_nregs
-= sse_nregs
;
2169 cum
->regno
+= int_nregs
;
2170 cum
->sse_regno
+= sse_nregs
;
2173 cum
->words
+= words
;
2177 if (TARGET_SSE
&& mode
== TImode
)
2179 cum
->sse_words
+= words
;
2180 cum
->sse_nregs
-= 1;
2181 cum
->sse_regno
+= 1;
2182 if (cum
->sse_nregs
<= 0)
2190 cum
->words
+= words
;
2191 cum
->nregs
-= words
;
2192 cum
->regno
+= words
;
2194 if (cum
->nregs
<= 0)
2204 /* Define where to put the arguments to a function.
2205 Value is zero to push the argument on the stack,
2206 or a hard register in which to store the argument.
2208 MODE is the argument's machine mode.
2209 TYPE is the data type of the argument (as a tree).
2210 This is null for libcalls where that information may
2212 CUM is a variable of type CUMULATIVE_ARGS which gives info about
2213 the preceding args and about the function being called.
2214 NAMED is nonzero if this argument is a named parameter
2215 (otherwise it is an extra parameter matching an ellipsis). */
2218 function_arg (cum
, mode
, type
, named
)
2219 CUMULATIVE_ARGS
*cum
; /* current arg information */
2220 enum machine_mode mode
; /* current arg mode */
2221 tree type
; /* type of the argument or 0 if lib support */
2222 int named
; /* != 0 for normal args, == 0 for ... args */
2226 (mode
== BLKmode
) ? int_size_in_bytes (type
) : (int) GET_MODE_SIZE (mode
);
2227 int words
= (bytes
+ UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
2229 /* Handle an hidden AL argument containing number of registers for varargs
2230 x86-64 functions. For i386 ABI just return constm1_rtx to avoid
2232 if (mode
== VOIDmode
)
2235 return GEN_INT (cum
->maybe_vaarg
2236 ? (cum
->sse_nregs
< 0
2244 ret
= construct_container (mode
, type
, 0, cum
->nregs
, cum
->sse_nregs
,
2245 &x86_64_int_parameter_registers
[cum
->regno
],
2250 /* For now, pass fp/complex values on the stack. */
2259 if (words
<= cum
->nregs
)
2260 ret
= gen_rtx_REG (mode
, cum
->regno
);
2264 ret
= gen_rtx_REG (mode
, cum
->sse_regno
);
2268 if (TARGET_DEBUG_ARG
)
2271 "function_arg (size=%d, wds=%2d, nregs=%d, mode=%4s, named=%d, ",
2272 words
, cum
->words
, cum
->nregs
, GET_MODE_NAME (mode
), named
);
2275 print_simple_rtl (stderr
, ret
);
2277 fprintf (stderr
, ", stack");
2279 fprintf (stderr
, " )\n");
2285 /* Gives the alignment boundary, in bits, of an argument with the specified mode
2289 ix86_function_arg_boundary (mode
, type
)
2290 enum machine_mode mode
;
2295 return PARM_BOUNDARY
;
2297 align
= TYPE_ALIGN (type
);
2299 align
= GET_MODE_ALIGNMENT (mode
);
2300 if (align
< PARM_BOUNDARY
)
2301 align
= PARM_BOUNDARY
;
2307 /* Return true if N is a possible register number of function value. */
2309 ix86_function_value_regno_p (regno
)
2314 return ((regno
) == 0
2315 || ((regno
) == FIRST_FLOAT_REG
&& TARGET_FLOAT_RETURNS_IN_80387
)
2316 || ((regno
) == FIRST_SSE_REG
&& TARGET_SSE
));
2318 return ((regno
) == 0 || (regno
) == FIRST_FLOAT_REG
2319 || ((regno
) == FIRST_SSE_REG
&& TARGET_SSE
)
2320 || ((regno
) == FIRST_FLOAT_REG
&& TARGET_FLOAT_RETURNS_IN_80387
));
2323 /* Define how to find the value returned by a function.
2324 VALTYPE is the data type of the value (as a tree).
2325 If the precise function being called is known, FUNC is its FUNCTION_DECL;
2326 otherwise, FUNC is 0. */
2328 ix86_function_value (valtype
)
2333 rtx ret
= construct_container (TYPE_MODE (valtype
), valtype
, 1,
2334 REGPARM_MAX
, SSE_REGPARM_MAX
,
2335 x86_64_int_return_registers
, 0);
2336 /* For zero sized structures, construct_continer return NULL, but we need
2337 to keep rest of compiler happy by returning meaningfull value. */
2339 ret
= gen_rtx_REG (TYPE_MODE (valtype
), 0);
2343 return gen_rtx_REG (TYPE_MODE (valtype
),
2344 ix86_value_regno (TYPE_MODE (valtype
)));
2347 /* Return false iff type is returned in memory. */
2349 ix86_return_in_memory (type
)
2352 int needed_intregs
, needed_sseregs
;
2355 return !examine_argument (TYPE_MODE (type
), type
, 1,
2356 &needed_intregs
, &needed_sseregs
);
2360 if (TYPE_MODE (type
) == BLKmode
2361 || (VECTOR_MODE_P (TYPE_MODE (type
))
2362 && int_size_in_bytes (type
) == 8)
2363 || (int_size_in_bytes (type
) > 12 && TYPE_MODE (type
) != TImode
2364 && TYPE_MODE (type
) != TFmode
2365 && !VECTOR_MODE_P (TYPE_MODE (type
))))
2371 /* Define how to find the value returned by a library function
2372 assuming the value has mode MODE. */
2374 ix86_libcall_value (mode
)
2375 enum machine_mode mode
;
2385 return gen_rtx_REG (mode
, FIRST_SSE_REG
);
2388 return gen_rtx_REG (mode
, FIRST_FLOAT_REG
);
2390 return gen_rtx_REG (mode
, 0);
2394 return gen_rtx_REG (mode
, ix86_value_regno (mode
));
2397 /* Given a mode, return the register to use for a return value. */
2400 ix86_value_regno (mode
)
2401 enum machine_mode mode
;
2403 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
&& TARGET_FLOAT_RETURNS_IN_80387
)
2404 return FIRST_FLOAT_REG
;
2405 if (mode
== TImode
|| VECTOR_MODE_P (mode
))
2406 return FIRST_SSE_REG
;
2410 /* Create the va_list data type. */
2413 ix86_build_va_list ()
2415 tree f_gpr
, f_fpr
, f_ovf
, f_sav
, record
, type_decl
;
2417 /* For i386 we use plain pointer to argument area. */
2419 return build_pointer_type (char_type_node
);
2421 record
= (*lang_hooks
.types
.make_type
) (RECORD_TYPE
);
2422 type_decl
= build_decl (TYPE_DECL
, get_identifier ("__va_list_tag"), record
);
2424 f_gpr
= build_decl (FIELD_DECL
, get_identifier ("gp_offset"),
2425 unsigned_type_node
);
2426 f_fpr
= build_decl (FIELD_DECL
, get_identifier ("fp_offset"),
2427 unsigned_type_node
);
2428 f_ovf
= build_decl (FIELD_DECL
, get_identifier ("overflow_arg_area"),
2430 f_sav
= build_decl (FIELD_DECL
, get_identifier ("reg_save_area"),
2433 DECL_FIELD_CONTEXT (f_gpr
) = record
;
2434 DECL_FIELD_CONTEXT (f_fpr
) = record
;
2435 DECL_FIELD_CONTEXT (f_ovf
) = record
;
2436 DECL_FIELD_CONTEXT (f_sav
) = record
;
2438 TREE_CHAIN (record
) = type_decl
;
2439 TYPE_NAME (record
) = type_decl
;
2440 TYPE_FIELDS (record
) = f_gpr
;
2441 TREE_CHAIN (f_gpr
) = f_fpr
;
2442 TREE_CHAIN (f_fpr
) = f_ovf
;
2443 TREE_CHAIN (f_ovf
) = f_sav
;
2445 layout_type (record
);
2447 /* The correct type is an array type of one element. */
2448 return build_array_type (record
, build_index_type (size_zero_node
));
2451 /* Perform any needed actions needed for a function that is receiving a
2452 variable number of arguments.
2456 MODE and TYPE are the mode and type of the current parameter.
2458 PRETEND_SIZE is a variable that should be set to the amount of stack
2459 that must be pushed by the prolog to pretend that our caller pushed
2462 Normally, this macro will push all remaining incoming registers on the
2463 stack and set PRETEND_SIZE to the length of the registers pushed. */
2466 ix86_setup_incoming_varargs (cum
, mode
, type
, pretend_size
, no_rtl
)
2467 CUMULATIVE_ARGS
*cum
;
2468 enum machine_mode mode
;
2470 int *pretend_size ATTRIBUTE_UNUSED
;
2474 CUMULATIVE_ARGS next_cum
;
2475 rtx save_area
= NULL_RTX
, mem
;
2488 /* Indicate to allocate space on the stack for varargs save area. */
2489 ix86_save_varrargs_registers
= 1;
2491 fntype
= TREE_TYPE (current_function_decl
);
2492 stdarg_p
= (TYPE_ARG_TYPES (fntype
) != 0
2493 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype
)))
2494 != void_type_node
));
2496 /* For varargs, we do not want to skip the dummy va_dcl argument.
2497 For stdargs, we do want to skip the last named argument. */
2500 function_arg_advance (&next_cum
, mode
, type
, 1);
2503 save_area
= frame_pointer_rtx
;
2505 set
= get_varargs_alias_set ();
2507 for (i
= next_cum
.regno
; i
< ix86_regparm
; i
++)
2509 mem
= gen_rtx_MEM (Pmode
,
2510 plus_constant (save_area
, i
* UNITS_PER_WORD
));
2511 set_mem_alias_set (mem
, set
);
2512 emit_move_insn (mem
, gen_rtx_REG (Pmode
,
2513 x86_64_int_parameter_registers
[i
]));
2516 if (next_cum
.sse_nregs
)
2518 /* Now emit code to save SSE registers. The AX parameter contains number
2519 of SSE parameter regsiters used to call this function. We use
2520 sse_prologue_save insn template that produces computed jump across
2521 SSE saves. We need some preparation work to get this working. */
2523 label
= gen_label_rtx ();
2524 label_ref
= gen_rtx_LABEL_REF (Pmode
, label
);
2526 /* Compute address to jump to :
2527 label - 5*eax + nnamed_sse_arguments*5 */
2528 tmp_reg
= gen_reg_rtx (Pmode
);
2529 nsse_reg
= gen_reg_rtx (Pmode
);
2530 emit_insn (gen_zero_extendqidi2 (nsse_reg
, gen_rtx_REG (QImode
, 0)));
2531 emit_insn (gen_rtx_SET (VOIDmode
, tmp_reg
,
2532 gen_rtx_MULT (Pmode
, nsse_reg
,
2534 if (next_cum
.sse_regno
)
2537 gen_rtx_CONST (DImode
,
2538 gen_rtx_PLUS (DImode
,
2540 GEN_INT (next_cum
.sse_regno
* 4))));
2542 emit_move_insn (nsse_reg
, label_ref
);
2543 emit_insn (gen_subdi3 (nsse_reg
, nsse_reg
, tmp_reg
));
2545 /* Compute address of memory block we save into. We always use pointer
2546 pointing 127 bytes after first byte to store - this is needed to keep
2547 instruction size limited by 4 bytes. */
2548 tmp_reg
= gen_reg_rtx (Pmode
);
2549 emit_insn (gen_rtx_SET (VOIDmode
, tmp_reg
,
2550 plus_constant (save_area
,
2551 8 * REGPARM_MAX
+ 127)));
2552 mem
= gen_rtx_MEM (BLKmode
, plus_constant (tmp_reg
, -127));
2553 set_mem_alias_set (mem
, set
);
2554 set_mem_align (mem
, BITS_PER_WORD
);
2556 /* And finally do the dirty job! */
2557 emit_insn (gen_sse_prologue_save (mem
, nsse_reg
,
2558 GEN_INT (next_cum
.sse_regno
), label
));
2563 /* Implement va_start. */
2566 ix86_va_start (valist
, nextarg
)
2570 HOST_WIDE_INT words
, n_gpr
, n_fpr
;
2571 tree f_gpr
, f_fpr
, f_ovf
, f_sav
;
2572 tree gpr
, fpr
, ovf
, sav
, t
;
2574 /* Only 64bit target needs something special. */
2577 std_expand_builtin_va_start (valist
, nextarg
);
2581 f_gpr
= TYPE_FIELDS (TREE_TYPE (va_list_type_node
));
2582 f_fpr
= TREE_CHAIN (f_gpr
);
2583 f_ovf
= TREE_CHAIN (f_fpr
);
2584 f_sav
= TREE_CHAIN (f_ovf
);
2586 valist
= build1 (INDIRECT_REF
, TREE_TYPE (TREE_TYPE (valist
)), valist
);
2587 gpr
= build (COMPONENT_REF
, TREE_TYPE (f_gpr
), valist
, f_gpr
);
2588 fpr
= build (COMPONENT_REF
, TREE_TYPE (f_fpr
), valist
, f_fpr
);
2589 ovf
= build (COMPONENT_REF
, TREE_TYPE (f_ovf
), valist
, f_ovf
);
2590 sav
= build (COMPONENT_REF
, TREE_TYPE (f_sav
), valist
, f_sav
);
2592 /* Count number of gp and fp argument registers used. */
2593 words
= current_function_args_info
.words
;
2594 n_gpr
= current_function_args_info
.regno
;
2595 n_fpr
= current_function_args_info
.sse_regno
;
2597 if (TARGET_DEBUG_ARG
)
2598 fprintf (stderr
, "va_start: words = %d, n_gpr = %d, n_fpr = %d\n",
2599 (int) words
, (int) n_gpr
, (int) n_fpr
);
2601 t
= build (MODIFY_EXPR
, TREE_TYPE (gpr
), gpr
,
2602 build_int_2 (n_gpr
* 8, 0));
2603 TREE_SIDE_EFFECTS (t
) = 1;
2604 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
2606 t
= build (MODIFY_EXPR
, TREE_TYPE (fpr
), fpr
,
2607 build_int_2 (n_fpr
* 16 + 8*REGPARM_MAX
, 0));
2608 TREE_SIDE_EFFECTS (t
) = 1;
2609 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
2611 /* Find the overflow area. */
2612 t
= make_tree (TREE_TYPE (ovf
), virtual_incoming_args_rtx
);
2614 t
= build (PLUS_EXPR
, TREE_TYPE (ovf
), t
,
2615 build_int_2 (words
* UNITS_PER_WORD
, 0));
2616 t
= build (MODIFY_EXPR
, TREE_TYPE (ovf
), ovf
, t
);
2617 TREE_SIDE_EFFECTS (t
) = 1;
2618 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
2620 /* Find the register save area.
2621 Prologue of the function save it right above stack frame. */
2622 t
= make_tree (TREE_TYPE (sav
), frame_pointer_rtx
);
2623 t
= build (MODIFY_EXPR
, TREE_TYPE (sav
), sav
, t
);
2624 TREE_SIDE_EFFECTS (t
) = 1;
2625 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
2628 /* Implement va_arg. */
2630 ix86_va_arg (valist
, type
)
2633 static const int intreg
[6] = { 0, 1, 2, 3, 4, 5 };
2634 tree f_gpr
, f_fpr
, f_ovf
, f_sav
;
2635 tree gpr
, fpr
, ovf
, sav
, t
;
2637 rtx lab_false
, lab_over
= NULL_RTX
;
2641 /* Only 64bit target needs something special. */
2644 return std_expand_builtin_va_arg (valist
, type
);
2647 f_gpr
= TYPE_FIELDS (TREE_TYPE (va_list_type_node
));
2648 f_fpr
= TREE_CHAIN (f_gpr
);
2649 f_ovf
= TREE_CHAIN (f_fpr
);
2650 f_sav
= TREE_CHAIN (f_ovf
);
2652 valist
= build1 (INDIRECT_REF
, TREE_TYPE (TREE_TYPE (valist
)), valist
);
2653 gpr
= build (COMPONENT_REF
, TREE_TYPE (f_gpr
), valist
, f_gpr
);
2654 fpr
= build (COMPONENT_REF
, TREE_TYPE (f_fpr
), valist
, f_fpr
);
2655 ovf
= build (COMPONENT_REF
, TREE_TYPE (f_ovf
), valist
, f_ovf
);
2656 sav
= build (COMPONENT_REF
, TREE_TYPE (f_sav
), valist
, f_sav
);
2658 size
= int_size_in_bytes (type
);
2659 rsize
= (size
+ UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
2661 container
= construct_container (TYPE_MODE (type
), type
, 0,
2662 REGPARM_MAX
, SSE_REGPARM_MAX
, intreg
, 0);
2664 * Pull the value out of the saved registers ...
2667 addr_rtx
= gen_reg_rtx (Pmode
);
2671 rtx int_addr_rtx
, sse_addr_rtx
;
2672 int needed_intregs
, needed_sseregs
;
2675 lab_over
= gen_label_rtx ();
2676 lab_false
= gen_label_rtx ();
2678 examine_argument (TYPE_MODE (type
), type
, 0,
2679 &needed_intregs
, &needed_sseregs
);
2682 need_temp
= ((needed_intregs
&& TYPE_ALIGN (type
) > 64)
2683 || TYPE_ALIGN (type
) > 128);
2685 /* In case we are passing structure, verify that it is consetuctive block
2686 on the register save area. If not we need to do moves. */
2687 if (!need_temp
&& !REG_P (container
))
2689 /* Verify that all registers are strictly consetuctive */
2690 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container
, 0, 0), 0))))
2694 for (i
= 0; i
< XVECLEN (container
, 0) && !need_temp
; i
++)
2696 rtx slot
= XVECEXP (container
, 0, i
);
2697 if (REGNO (XEXP (slot
, 0)) != FIRST_SSE_REG
+ (unsigned int) i
2698 || INTVAL (XEXP (slot
, 1)) != i
* 16)
2706 for (i
= 0; i
< XVECLEN (container
, 0) && !need_temp
; i
++)
2708 rtx slot
= XVECEXP (container
, 0, i
);
2709 if (REGNO (XEXP (slot
, 0)) != (unsigned int) i
2710 || INTVAL (XEXP (slot
, 1)) != i
* 8)
2717 int_addr_rtx
= addr_rtx
;
2718 sse_addr_rtx
= addr_rtx
;
2722 int_addr_rtx
= gen_reg_rtx (Pmode
);
2723 sse_addr_rtx
= gen_reg_rtx (Pmode
);
2725 /* First ensure that we fit completely in registers. */
2728 emit_cmp_and_jump_insns (expand_expr
2729 (gpr
, NULL_RTX
, SImode
, EXPAND_NORMAL
),
2730 GEN_INT ((REGPARM_MAX
- needed_intregs
+
2731 1) * 8), GE
, const1_rtx
, SImode
,
2736 emit_cmp_and_jump_insns (expand_expr
2737 (fpr
, NULL_RTX
, SImode
, EXPAND_NORMAL
),
2738 GEN_INT ((SSE_REGPARM_MAX
-
2739 needed_sseregs
+ 1) * 16 +
2740 REGPARM_MAX
* 8), GE
, const1_rtx
,
2741 SImode
, 1, lab_false
);
2744 /* Compute index to start of area used for integer regs. */
2747 t
= build (PLUS_EXPR
, ptr_type_node
, sav
, gpr
);
2748 r
= expand_expr (t
, int_addr_rtx
, Pmode
, EXPAND_NORMAL
);
2749 if (r
!= int_addr_rtx
)
2750 emit_move_insn (int_addr_rtx
, r
);
2754 t
= build (PLUS_EXPR
, ptr_type_node
, sav
, fpr
);
2755 r
= expand_expr (t
, sse_addr_rtx
, Pmode
, EXPAND_NORMAL
);
2756 if (r
!= sse_addr_rtx
)
2757 emit_move_insn (sse_addr_rtx
, r
);
2764 /* Never use the memory itself, as it has the alias set. */
2765 addr_rtx
= XEXP (assign_temp (type
, 0, 1, 0), 0);
2766 mem
= gen_rtx_MEM (BLKmode
, addr_rtx
);
2767 set_mem_alias_set (mem
, get_varargs_alias_set ());
2768 set_mem_align (mem
, BITS_PER_UNIT
);
2770 for (i
= 0; i
< XVECLEN (container
, 0); i
++)
2772 rtx slot
= XVECEXP (container
, 0, i
);
2773 rtx reg
= XEXP (slot
, 0);
2774 enum machine_mode mode
= GET_MODE (reg
);
2780 if (SSE_REGNO_P (REGNO (reg
)))
2782 src_addr
= sse_addr_rtx
;
2783 src_offset
= (REGNO (reg
) - FIRST_SSE_REG
) * 16;
2787 src_addr
= int_addr_rtx
;
2788 src_offset
= REGNO (reg
) * 8;
2790 src_mem
= gen_rtx_MEM (mode
, src_addr
);
2791 set_mem_alias_set (src_mem
, get_varargs_alias_set ());
2792 src_mem
= adjust_address (src_mem
, mode
, src_offset
);
2793 dest_mem
= adjust_address (mem
, mode
, INTVAL (XEXP (slot
, 1)));
2794 emit_move_insn (dest_mem
, src_mem
);
2801 build (PLUS_EXPR
, TREE_TYPE (gpr
), gpr
,
2802 build_int_2 (needed_intregs
* 8, 0));
2803 t
= build (MODIFY_EXPR
, TREE_TYPE (gpr
), gpr
, t
);
2804 TREE_SIDE_EFFECTS (t
) = 1;
2805 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
2810 build (PLUS_EXPR
, TREE_TYPE (fpr
), fpr
,
2811 build_int_2 (needed_sseregs
* 16, 0));
2812 t
= build (MODIFY_EXPR
, TREE_TYPE (fpr
), fpr
, t
);
2813 TREE_SIDE_EFFECTS (t
) = 1;
2814 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
2817 emit_jump_insn (gen_jump (lab_over
));
2819 emit_label (lab_false
);
2822 /* ... otherwise out of the overflow area. */
2824 /* Care for on-stack alignment if needed. */
2825 if (FUNCTION_ARG_BOUNDARY (VOIDmode
, type
) <= 64)
2829 HOST_WIDE_INT align
= FUNCTION_ARG_BOUNDARY (VOIDmode
, type
) / 8;
2830 t
= build (PLUS_EXPR
, TREE_TYPE (ovf
), ovf
, build_int_2 (align
- 1, 0));
2831 t
= build (BIT_AND_EXPR
, TREE_TYPE (t
), t
, build_int_2 (-align
, -1));
2835 r
= expand_expr (t
, addr_rtx
, Pmode
, EXPAND_NORMAL
);
2837 emit_move_insn (addr_rtx
, r
);
2840 build (PLUS_EXPR
, TREE_TYPE (t
), t
,
2841 build_int_2 (rsize
* UNITS_PER_WORD
, 0));
2842 t
= build (MODIFY_EXPR
, TREE_TYPE (ovf
), ovf
, t
);
2843 TREE_SIDE_EFFECTS (t
) = 1;
2844 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
2847 emit_label (lab_over
);
2852 /* Return nonzero if OP is either a i387 or SSE fp register. */
2854 any_fp_register_operand (op
, mode
)
2856 enum machine_mode mode ATTRIBUTE_UNUSED
;
2858 return ANY_FP_REG_P (op
);
2861 /* Return nonzero if OP is an i387 fp register. */
2863 fp_register_operand (op
, mode
)
2865 enum machine_mode mode ATTRIBUTE_UNUSED
;
2867 return FP_REG_P (op
);
2870 /* Return nonzero if OP is a non-fp register_operand. */
2872 register_and_not_any_fp_reg_operand (op
, mode
)
2874 enum machine_mode mode
;
2876 return register_operand (op
, mode
) && !ANY_FP_REG_P (op
);
2879 /* Return nonzero of OP is a register operand other than an
2880 i387 fp register. */
2882 register_and_not_fp_reg_operand (op
, mode
)
2884 enum machine_mode mode
;
2886 return register_operand (op
, mode
) && !FP_REG_P (op
);
2889 /* Return nonzero if OP is general operand representable on x86_64. */
2892 x86_64_general_operand (op
, mode
)
2894 enum machine_mode mode
;
2897 return general_operand (op
, mode
);
2898 if (nonimmediate_operand (op
, mode
))
2900 return x86_64_sign_extended_value (op
);
2903 /* Return nonzero if OP is general operand representable on x86_64
2904 as either sign extended or zero extended constant. */
2907 x86_64_szext_general_operand (op
, mode
)
2909 enum machine_mode mode
;
2912 return general_operand (op
, mode
);
2913 if (nonimmediate_operand (op
, mode
))
2915 return x86_64_sign_extended_value (op
) || x86_64_zero_extended_value (op
);
2918 /* Return nonzero if OP is nonmemory operand representable on x86_64. */
2921 x86_64_nonmemory_operand (op
, mode
)
2923 enum machine_mode mode
;
2926 return nonmemory_operand (op
, mode
);
2927 if (register_operand (op
, mode
))
2929 return x86_64_sign_extended_value (op
);
2932 /* Return nonzero if OP is nonmemory operand acceptable by movabs patterns. */
2935 x86_64_movabs_operand (op
, mode
)
2937 enum machine_mode mode
;
2939 if (!TARGET_64BIT
|| !flag_pic
)
2940 return nonmemory_operand (op
, mode
);
2941 if (register_operand (op
, mode
) || x86_64_sign_extended_value (op
))
2943 if (CONSTANT_P (op
) && !symbolic_reference_mentioned_p (op
))
2948 /* Return nonzero if OP is nonmemory operand representable on x86_64. */
2951 x86_64_szext_nonmemory_operand (op
, mode
)
2953 enum machine_mode mode
;
2956 return nonmemory_operand (op
, mode
);
2957 if (register_operand (op
, mode
))
2959 return x86_64_sign_extended_value (op
) || x86_64_zero_extended_value (op
);
2962 /* Return nonzero if OP is immediate operand representable on x86_64. */
2965 x86_64_immediate_operand (op
, mode
)
2967 enum machine_mode mode
;
2970 return immediate_operand (op
, mode
);
2971 return x86_64_sign_extended_value (op
);
2974 /* Return nonzero if OP is immediate operand representable on x86_64. */
2977 x86_64_zext_immediate_operand (op
, mode
)
2979 enum machine_mode mode ATTRIBUTE_UNUSED
;
2981 return x86_64_zero_extended_value (op
);
2984 /* Return nonzero if OP is (const_int 1), else return zero. */
2987 const_int_1_operand (op
, mode
)
2989 enum machine_mode mode ATTRIBUTE_UNUSED
;
2991 return (GET_CODE (op
) == CONST_INT
&& INTVAL (op
) == 1);
2994 /* Return nonzero if OP is CONST_INT >= 1 and <= 31 (a valid operand
2995 for shift & compare patterns, as shifting by 0 does not change flags),
2996 else return zero. */
2999 const_int_1_31_operand (op
, mode
)
3001 enum machine_mode mode ATTRIBUTE_UNUSED
;
3003 return (GET_CODE (op
) == CONST_INT
&& INTVAL (op
) >= 1 && INTVAL (op
) <= 31);
3006 /* Returns 1 if OP is either a symbol reference or a sum of a symbol
3007 reference and a constant. */
3010 symbolic_operand (op
, mode
)
3012 enum machine_mode mode ATTRIBUTE_UNUSED
;
3014 switch (GET_CODE (op
))
3022 if (GET_CODE (op
) == SYMBOL_REF
3023 || GET_CODE (op
) == LABEL_REF
3024 || (GET_CODE (op
) == UNSPEC
3025 && (XINT (op
, 1) == UNSPEC_GOT
3026 || XINT (op
, 1) == UNSPEC_GOTOFF
3027 || XINT (op
, 1) == UNSPEC_GOTPCREL
)))
3029 if (GET_CODE (op
) != PLUS
3030 || GET_CODE (XEXP (op
, 1)) != CONST_INT
)
3034 if (GET_CODE (op
) == SYMBOL_REF
3035 || GET_CODE (op
) == LABEL_REF
)
3037 /* Only @GOTOFF gets offsets. */
3038 if (GET_CODE (op
) != UNSPEC
3039 || XINT (op
, 1) != UNSPEC_GOTOFF
)
3042 op
= XVECEXP (op
, 0, 0);
3043 if (GET_CODE (op
) == SYMBOL_REF
3044 || GET_CODE (op
) == LABEL_REF
)
3053 /* Return true if the operand contains a @GOT or @GOTOFF reference. */
3056 pic_symbolic_operand (op
, mode
)
3058 enum machine_mode mode ATTRIBUTE_UNUSED
;
3060 if (GET_CODE (op
) != CONST
)
3065 if (GET_CODE (XEXP (op
, 0)) == UNSPEC
)
3070 if (GET_CODE (op
) == UNSPEC
)
3072 if (GET_CODE (op
) != PLUS
3073 || GET_CODE (XEXP (op
, 1)) != CONST_INT
)
3076 if (GET_CODE (op
) == UNSPEC
)
3082 /* Return true if OP is a symbolic operand that resolves locally. */
3085 local_symbolic_operand (op
, mode
)
3087 enum machine_mode mode ATTRIBUTE_UNUSED
;
3089 if (GET_CODE (op
) == CONST
3090 && GET_CODE (XEXP (op
, 0)) == PLUS
3091 && GET_CODE (XEXP (XEXP (op
, 0), 1)) == CONST_INT
)
3092 op
= XEXP (XEXP (op
, 0), 0);
3094 if (GET_CODE (op
) == LABEL_REF
)
3097 if (GET_CODE (op
) != SYMBOL_REF
)
3100 /* These we've been told are local by varasm and encode_section_info
3102 if (CONSTANT_POOL_ADDRESS_P (op
) || SYMBOL_REF_FLAG (op
))
3105 /* There is, however, a not insubstantial body of code in the rest of
3106 the compiler that assumes it can just stick the results of
3107 ASM_GENERATE_INTERNAL_LABEL in a symbol_ref and have done. */
3108 /* ??? This is a hack. Should update the body of the compiler to
3109 always create a DECL an invoke targetm.encode_section_info. */
3110 if (strncmp (XSTR (op
, 0), internal_label_prefix
,
3111 internal_label_prefix_len
) == 0)
3117 /* Test for various thread-local symbols. See ix86_encode_section_info. */
3120 tls_symbolic_operand (op
, mode
)
3122 enum machine_mode mode ATTRIBUTE_UNUSED
;
3124 const char *symbol_str
;
3126 if (GET_CODE (op
) != SYMBOL_REF
)
3128 symbol_str
= XSTR (op
, 0);
3130 if (symbol_str
[0] != '%')
3132 return strchr (tls_model_chars
, symbol_str
[1]) - tls_model_chars
;
3136 tls_symbolic_operand_1 (op
, kind
)
3138 enum tls_model kind
;
3140 const char *symbol_str
;
3142 if (GET_CODE (op
) != SYMBOL_REF
)
3144 symbol_str
= XSTR (op
, 0);
3146 return symbol_str
[0] == '%' && symbol_str
[1] == tls_model_chars
[kind
];
3150 global_dynamic_symbolic_operand (op
, mode
)
3152 enum machine_mode mode ATTRIBUTE_UNUSED
;
3154 return tls_symbolic_operand_1 (op
, TLS_MODEL_GLOBAL_DYNAMIC
);
3158 local_dynamic_symbolic_operand (op
, mode
)
3160 enum machine_mode mode ATTRIBUTE_UNUSED
;
3162 return tls_symbolic_operand_1 (op
, TLS_MODEL_LOCAL_DYNAMIC
);
3166 initial_exec_symbolic_operand (op
, mode
)
3168 enum machine_mode mode ATTRIBUTE_UNUSED
;
3170 return tls_symbolic_operand_1 (op
, TLS_MODEL_INITIAL_EXEC
);
3174 local_exec_symbolic_operand (op
, mode
)
3176 enum machine_mode mode ATTRIBUTE_UNUSED
;
3178 return tls_symbolic_operand_1 (op
, TLS_MODEL_LOCAL_EXEC
);
3181 /* Test for a valid operand for a call instruction. Don't allow the
3182 arg pointer register or virtual regs since they may decay into
3183 reg + const, which the patterns can't handle. */
3186 call_insn_operand (op
, mode
)
3188 enum machine_mode mode ATTRIBUTE_UNUSED
;
3190 /* Disallow indirect through a virtual register. This leads to
3191 compiler aborts when trying to eliminate them. */
3192 if (GET_CODE (op
) == REG
3193 && (op
== arg_pointer_rtx
3194 || op
== frame_pointer_rtx
3195 || (REGNO (op
) >= FIRST_PSEUDO_REGISTER
3196 && REGNO (op
) <= LAST_VIRTUAL_REGISTER
)))
3199 /* Disallow `call 1234'. Due to varying assembler lameness this
3200 gets either rejected or translated to `call .+1234'. */
3201 if (GET_CODE (op
) == CONST_INT
)
3204 /* Explicitly allow SYMBOL_REF even if pic. */
3205 if (GET_CODE (op
) == SYMBOL_REF
)
3208 /* Otherwise we can allow any general_operand in the address. */
3209 return general_operand (op
, Pmode
);
3213 constant_call_address_operand (op
, mode
)
3215 enum machine_mode mode ATTRIBUTE_UNUSED
;
3217 if (GET_CODE (op
) == CONST
3218 && GET_CODE (XEXP (op
, 0)) == PLUS
3219 && GET_CODE (XEXP (XEXP (op
, 0), 1)) == CONST_INT
)
3220 op
= XEXP (XEXP (op
, 0), 0);
3221 return GET_CODE (op
) == SYMBOL_REF
;
3224 /* Match exactly zero and one. */
3227 const0_operand (op
, mode
)
3229 enum machine_mode mode
;
3231 return op
== CONST0_RTX (mode
);
3235 const1_operand (op
, mode
)
3237 enum machine_mode mode ATTRIBUTE_UNUSED
;
3239 return op
== const1_rtx
;
3242 /* Match 2, 4, or 8. Used for leal multiplicands. */
3245 const248_operand (op
, mode
)
3247 enum machine_mode mode ATTRIBUTE_UNUSED
;
3249 return (GET_CODE (op
) == CONST_INT
3250 && (INTVAL (op
) == 2 || INTVAL (op
) == 4 || INTVAL (op
) == 8));
3253 /* True if this is a constant appropriate for an increment or decremenmt. */
3256 incdec_operand (op
, mode
)
3258 enum machine_mode mode ATTRIBUTE_UNUSED
;
3260 /* On Pentium4, the inc and dec operations causes extra dependency on flag
3261 registers, since carry flag is not set. */
3262 if (TARGET_PENTIUM4
&& !optimize_size
)
3264 return op
== const1_rtx
|| op
== constm1_rtx
;
3267 /* Return nonzero if OP is acceptable as operand of DImode shift
3271 shiftdi_operand (op
, mode
)
3273 enum machine_mode mode ATTRIBUTE_UNUSED
;
3276 return nonimmediate_operand (op
, mode
);
3278 return register_operand (op
, mode
);
3281 /* Return false if this is the stack pointer, or any other fake
3282 register eliminable to the stack pointer. Otherwise, this is
3285 This is used to prevent esp from being used as an index reg.
3286 Which would only happen in pathological cases. */
3289 reg_no_sp_operand (op
, mode
)
3291 enum machine_mode mode
;
3294 if (GET_CODE (t
) == SUBREG
)
3296 if (t
== stack_pointer_rtx
|| t
== arg_pointer_rtx
|| t
== frame_pointer_rtx
)
3299 return register_operand (op
, mode
);
3303 mmx_reg_operand (op
, mode
)
3305 enum machine_mode mode ATTRIBUTE_UNUSED
;
3307 return MMX_REG_P (op
);
3310 /* Return false if this is any eliminable register. Otherwise
3314 general_no_elim_operand (op
, mode
)
3316 enum machine_mode mode
;
3319 if (GET_CODE (t
) == SUBREG
)
3321 if (t
== arg_pointer_rtx
|| t
== frame_pointer_rtx
3322 || t
== virtual_incoming_args_rtx
|| t
== virtual_stack_vars_rtx
3323 || t
== virtual_stack_dynamic_rtx
)
3326 && REGNO (t
) >= FIRST_VIRTUAL_REGISTER
3327 && REGNO (t
) <= LAST_VIRTUAL_REGISTER
)
3330 return general_operand (op
, mode
);
3333 /* Return false if this is any eliminable register. Otherwise
3334 register_operand or const_int. */
3337 nonmemory_no_elim_operand (op
, mode
)
3339 enum machine_mode mode
;
3342 if (GET_CODE (t
) == SUBREG
)
3344 if (t
== arg_pointer_rtx
|| t
== frame_pointer_rtx
3345 || t
== virtual_incoming_args_rtx
|| t
== virtual_stack_vars_rtx
3346 || t
== virtual_stack_dynamic_rtx
)
3349 return GET_CODE (op
) == CONST_INT
|| register_operand (op
, mode
);
3352 /* Return false if this is any eliminable register or stack register,
3353 otherwise work like register_operand. */
3356 index_register_operand (op
, mode
)
3358 enum machine_mode mode
;
3361 if (GET_CODE (t
) == SUBREG
)
3365 if (t
== arg_pointer_rtx
3366 || t
== frame_pointer_rtx
3367 || t
== virtual_incoming_args_rtx
3368 || t
== virtual_stack_vars_rtx
3369 || t
== virtual_stack_dynamic_rtx
3370 || REGNO (t
) == STACK_POINTER_REGNUM
)
3373 return general_operand (op
, mode
);
3376 /* Return true if op is a Q_REGS class register. */
3379 q_regs_operand (op
, mode
)
3381 enum machine_mode mode
;
3383 if (mode
!= VOIDmode
&& GET_MODE (op
) != mode
)
3385 if (GET_CODE (op
) == SUBREG
)
3386 op
= SUBREG_REG (op
);
3387 return ANY_QI_REG_P (op
);
3390 /* Return true if op is a NON_Q_REGS class register. */
3393 non_q_regs_operand (op
, mode
)
3395 enum machine_mode mode
;
3397 if (mode
!= VOIDmode
&& GET_MODE (op
) != mode
)
3399 if (GET_CODE (op
) == SUBREG
)
3400 op
= SUBREG_REG (op
);
3401 return NON_QI_REG_P (op
);
3404 /* Return 1 if OP is a comparison that can be used in the CMPSS/CMPPS
3407 sse_comparison_operator (op
, mode
)
3409 enum machine_mode mode ATTRIBUTE_UNUSED
;
3411 enum rtx_code code
= GET_CODE (op
);
3414 /* Operations supported directly. */
3424 /* These are equivalent to ones above in non-IEEE comparisons. */
3431 return !TARGET_IEEE_FP
;
3436 /* Return 1 if OP is a valid comparison operator in valid mode. */
3438 ix86_comparison_operator (op
, mode
)
3440 enum machine_mode mode
;
3442 enum machine_mode inmode
;
3443 enum rtx_code code
= GET_CODE (op
);
3444 if (mode
!= VOIDmode
&& GET_MODE (op
) != mode
)
3446 if (GET_RTX_CLASS (code
) != '<')
3448 inmode
= GET_MODE (XEXP (op
, 0));
3450 if (inmode
== CCFPmode
|| inmode
== CCFPUmode
)
3452 enum rtx_code second_code
, bypass_code
;
3453 ix86_fp_comparison_codes (code
, &bypass_code
, &code
, &second_code
);
3454 return (bypass_code
== NIL
&& second_code
== NIL
);
3461 if (inmode
== CCmode
|| inmode
== CCGCmode
3462 || inmode
== CCGOCmode
|| inmode
== CCNOmode
)
3465 case LTU
: case GTU
: case LEU
: case ORDERED
: case UNORDERED
: case GEU
:
3466 if (inmode
== CCmode
)
3470 if (inmode
== CCmode
|| inmode
== CCGCmode
|| inmode
== CCNOmode
)
3478 /* Return 1 if OP is a comparison operator that can be issued by fcmov. */
3481 fcmov_comparison_operator (op
, mode
)
3483 enum machine_mode mode
;
3485 enum machine_mode inmode
;
3486 enum rtx_code code
= GET_CODE (op
);
3487 if (mode
!= VOIDmode
&& GET_MODE (op
) != mode
)
3489 if (GET_RTX_CLASS (code
) != '<')
3491 inmode
= GET_MODE (XEXP (op
, 0));
3492 if (inmode
== CCFPmode
|| inmode
== CCFPUmode
)
3494 enum rtx_code second_code
, bypass_code
;
3495 ix86_fp_comparison_codes (code
, &bypass_code
, &code
, &second_code
);
3496 if (bypass_code
!= NIL
|| second_code
!= NIL
)
3498 code
= ix86_fp_compare_code_to_integer (code
);
3500 /* i387 supports just limited amount of conditional codes. */
3503 case LTU
: case GTU
: case LEU
: case GEU
:
3504 if (inmode
== CCmode
|| inmode
== CCFPmode
|| inmode
== CCFPUmode
)
3507 case ORDERED
: case UNORDERED
:
3515 /* Return 1 if OP is a binary operator that can be promoted to wider mode. */
3518 promotable_binary_operator (op
, mode
)
3520 enum machine_mode mode ATTRIBUTE_UNUSED
;
3522 switch (GET_CODE (op
))
3525 /* Modern CPUs have same latency for HImode and SImode multiply,
3526 but 386 and 486 do HImode multiply faster. */
3527 return ix86_cpu
> PROCESSOR_I486
;
3539 /* Nearly general operand, but accept any const_double, since we wish
3540 to be able to drop them into memory rather than have them get pulled
3544 cmp_fp_expander_operand (op
, mode
)
3546 enum machine_mode mode
;
3548 if (mode
!= VOIDmode
&& mode
!= GET_MODE (op
))
3550 if (GET_CODE (op
) == CONST_DOUBLE
)
3552 return general_operand (op
, mode
);
3555 /* Match an SI or HImode register for a zero_extract. */
3558 ext_register_operand (op
, mode
)
3560 enum machine_mode mode ATTRIBUTE_UNUSED
;
3563 if ((!TARGET_64BIT
|| GET_MODE (op
) != DImode
)
3564 && GET_MODE (op
) != SImode
&& GET_MODE (op
) != HImode
)
3567 if (!register_operand (op
, VOIDmode
))
3570 /* Be curefull to accept only registers having upper parts. */
3571 regno
= REG_P (op
) ? REGNO (op
) : REGNO (SUBREG_REG (op
));
3572 return (regno
> LAST_VIRTUAL_REGISTER
|| regno
< 4);
3575 /* Return 1 if this is a valid binary floating-point operation.
3576 OP is the expression matched, and MODE is its mode. */
3579 binary_fp_operator (op
, mode
)
3581 enum machine_mode mode
;
3583 if (mode
!= VOIDmode
&& mode
!= GET_MODE (op
))
3586 switch (GET_CODE (op
))
3592 return GET_MODE_CLASS (GET_MODE (op
)) == MODE_FLOAT
;
3600 mult_operator (op
, mode
)
3602 enum machine_mode mode ATTRIBUTE_UNUSED
;
3604 return GET_CODE (op
) == MULT
;
3608 div_operator (op
, mode
)
3610 enum machine_mode mode ATTRIBUTE_UNUSED
;
3612 return GET_CODE (op
) == DIV
;
3616 arith_or_logical_operator (op
, mode
)
3618 enum machine_mode mode
;
3620 return ((mode
== VOIDmode
|| GET_MODE (op
) == mode
)
3621 && (GET_RTX_CLASS (GET_CODE (op
)) == 'c'
3622 || GET_RTX_CLASS (GET_CODE (op
)) == '2'));
3625 /* Returns 1 if OP is memory operand with a displacement. */
3628 memory_displacement_operand (op
, mode
)
3630 enum machine_mode mode
;
3632 struct ix86_address parts
;
3634 if (! memory_operand (op
, mode
))
3637 if (! ix86_decompose_address (XEXP (op
, 0), &parts
))
3640 return parts
.disp
!= NULL_RTX
;
3643 /* To avoid problems when jump re-emits comparisons like testqi_ext_ccno_0,
3644 re-recognize the operand to avoid a copy_to_mode_reg that will fail.
3646 ??? It seems likely that this will only work because cmpsi is an
3647 expander, and no actual insns use this. */
3650 cmpsi_operand (op
, mode
)
3652 enum machine_mode mode
;
3654 if (nonimmediate_operand (op
, mode
))
3657 if (GET_CODE (op
) == AND
3658 && GET_MODE (op
) == SImode
3659 && GET_CODE (XEXP (op
, 0)) == ZERO_EXTRACT
3660 && GET_CODE (XEXP (XEXP (op
, 0), 1)) == CONST_INT
3661 && GET_CODE (XEXP (XEXP (op
, 0), 2)) == CONST_INT
3662 && INTVAL (XEXP (XEXP (op
, 0), 1)) == 8
3663 && INTVAL (XEXP (XEXP (op
, 0), 2)) == 8
3664 && GET_CODE (XEXP (op
, 1)) == CONST_INT
)
3670 /* Returns 1 if OP is memory operand that can not be represented by the
3674 long_memory_operand (op
, mode
)
3676 enum machine_mode mode
;
3678 if (! memory_operand (op
, mode
))
3681 return memory_address_length (op
) != 0;
3684 /* Return nonzero if the rtx is known aligned. */
3687 aligned_operand (op
, mode
)
3689 enum machine_mode mode
;
3691 struct ix86_address parts
;
3693 if (!general_operand (op
, mode
))
3696 /* Registers and immediate operands are always "aligned". */
3697 if (GET_CODE (op
) != MEM
)
3700 /* Don't even try to do any aligned optimizations with volatiles. */
3701 if (MEM_VOLATILE_P (op
))
3706 /* Pushes and pops are only valid on the stack pointer. */
3707 if (GET_CODE (op
) == PRE_DEC
3708 || GET_CODE (op
) == POST_INC
)
3711 /* Decode the address. */
3712 if (! ix86_decompose_address (op
, &parts
))
3715 if (parts
.base
&& GET_CODE (parts
.base
) == SUBREG
)
3716 parts
.base
= SUBREG_REG (parts
.base
);
3717 if (parts
.index
&& GET_CODE (parts
.index
) == SUBREG
)
3718 parts
.index
= SUBREG_REG (parts
.index
);
3720 /* Look for some component that isn't known to be aligned. */
3724 && REGNO_POINTER_ALIGN (REGNO (parts
.index
)) < 32)
3729 if (REGNO_POINTER_ALIGN (REGNO (parts
.base
)) < 32)
3734 if (GET_CODE (parts
.disp
) != CONST_INT
3735 || (INTVAL (parts
.disp
) & 3) != 0)
3739 /* Didn't find one -- this must be an aligned address. */
3743 /* Return true if the constant is something that can be loaded with
3744 a special instruction. Only handle 0.0 and 1.0; others are less
3748 standard_80387_constant_p (x
)
3751 if (GET_CODE (x
) != CONST_DOUBLE
|| !FLOAT_MODE_P (GET_MODE (x
)))
3753 /* Note that on the 80387, other constants, such as pi, that we should support
3754 too. On some machines, these are much slower to load as standard constant,
3755 than to load from doubles in memory. */
3756 if (x
== CONST0_RTX (GET_MODE (x
)))
3758 if (x
== CONST1_RTX (GET_MODE (x
)))
3763 /* Return 1 if X is FP constant we can load to SSE register w/o using memory.
3766 standard_sse_constant_p (x
)
3769 if (x
== const0_rtx
)
3771 return (x
== CONST0_RTX (GET_MODE (x
)));
3774 /* Returns 1 if OP contains a symbol reference */
3777 symbolic_reference_mentioned_p (op
)
3780 register const char *fmt
;
3783 if (GET_CODE (op
) == SYMBOL_REF
|| GET_CODE (op
) == LABEL_REF
)
3786 fmt
= GET_RTX_FORMAT (GET_CODE (op
));
3787 for (i
= GET_RTX_LENGTH (GET_CODE (op
)) - 1; i
>= 0; i
--)
3793 for (j
= XVECLEN (op
, i
) - 1; j
>= 0; j
--)
3794 if (symbolic_reference_mentioned_p (XVECEXP (op
, i
, j
)))
3798 else if (fmt
[i
] == 'e' && symbolic_reference_mentioned_p (XEXP (op
, i
)))
3805 /* Return 1 if it is appropriate to emit `ret' instructions in the
3806 body of a function. Do this only if the epilogue is simple, needing a
3807 couple of insns. Prior to reloading, we can't tell how many registers
3808 must be saved, so return 0 then. Return 0 if there is no frame
3809 marker to de-allocate.
3811 If NON_SAVING_SETJMP is defined and true, then it is not possible
3812 for the epilogue to be simple, so return 0. This is a special case
3813 since NON_SAVING_SETJMP will not cause regs_ever_live to change
3814 until final, but jump_optimize may need to know sooner if a
3818 ix86_can_use_return_insn_p ()
3820 struct ix86_frame frame
;
3822 #ifdef NON_SAVING_SETJMP
3823 if (NON_SAVING_SETJMP
&& current_function_calls_setjmp
)
3827 if (! reload_completed
|| frame_pointer_needed
)
3830 /* Don't allow more than 32 pop, since that's all we can do
3831 with one instruction. */
3832 if (current_function_pops_args
3833 && current_function_args_size
>= 32768)
3836 ix86_compute_frame_layout (&frame
);
3837 return frame
.to_allocate
== 0 && frame
.nregs
== 0;
3840 /* Return 1 if VALUE can be stored in the sign extended immediate field. */
3842 x86_64_sign_extended_value (value
)
3845 switch (GET_CODE (value
))
3847 /* CONST_DOUBLES never match, since HOST_BITS_PER_WIDE_INT is known
3848 to be at least 32 and this all acceptable constants are
3849 represented as CONST_INT. */
3851 if (HOST_BITS_PER_WIDE_INT
== 32)
3855 HOST_WIDE_INT val
= trunc_int_for_mode (INTVAL (value
), DImode
);
3856 return trunc_int_for_mode (val
, SImode
) == val
;
3860 /* For certain code models, the symbolic references are known to fit.
3861 in CM_SMALL_PIC model we know it fits if it is local to the shared
3862 library. Don't count TLS SYMBOL_REFs here, since they should fit
3863 only if inside of UNSPEC handled below. */
3865 return (ix86_cmodel
== CM_SMALL
|| ix86_cmodel
== CM_KERNEL
);
3867 /* For certain code models, the code is near as well. */
3869 return (ix86_cmodel
== CM_SMALL
|| ix86_cmodel
== CM_MEDIUM
3870 || ix86_cmodel
== CM_KERNEL
);
3872 /* We also may accept the offsetted memory references in certain special
3875 if (GET_CODE (XEXP (value
, 0)) == UNSPEC
)
3876 switch (XINT (XEXP (value
, 0), 1))
3878 case UNSPEC_GOTPCREL
:
3880 case UNSPEC_GOTNTPOFF
:
3886 if (GET_CODE (XEXP (value
, 0)) == PLUS
)
3888 rtx op1
= XEXP (XEXP (value
, 0), 0);
3889 rtx op2
= XEXP (XEXP (value
, 0), 1);
3890 HOST_WIDE_INT offset
;
3892 if (ix86_cmodel
== CM_LARGE
)
3894 if (GET_CODE (op2
) != CONST_INT
)
3896 offset
= trunc_int_for_mode (INTVAL (op2
), DImode
);
3897 switch (GET_CODE (op1
))
3900 /* For CM_SMALL assume that latest object is 16MB before
3901 end of 31bits boundary. We may also accept pretty
3902 large negative constants knowing that all objects are
3903 in the positive half of address space. */
3904 if (ix86_cmodel
== CM_SMALL
3905 && offset
< 16*1024*1024
3906 && trunc_int_for_mode (offset
, SImode
) == offset
)
3908 /* For CM_KERNEL we know that all object resist in the
3909 negative half of 32bits address space. We may not
3910 accept negative offsets, since they may be just off
3911 and we may accept pretty large positive ones. */
3912 if (ix86_cmodel
== CM_KERNEL
3914 && trunc_int_for_mode (offset
, SImode
) == offset
)
3918 /* These conditions are similar to SYMBOL_REF ones, just the
3919 constraints for code models differ. */
3920 if ((ix86_cmodel
== CM_SMALL
|| ix86_cmodel
== CM_MEDIUM
)
3921 && offset
< 16*1024*1024
3922 && trunc_int_for_mode (offset
, SImode
) == offset
)
3924 if (ix86_cmodel
== CM_KERNEL
3926 && trunc_int_for_mode (offset
, SImode
) == offset
)
3930 switch (XINT (op1
, 1))
3935 && trunc_int_for_mode (offset
, SImode
) == offset
)
3949 /* Return 1 if VALUE can be stored in the zero extended immediate field. */
3951 x86_64_zero_extended_value (value
)
3954 switch (GET_CODE (value
))
3957 if (HOST_BITS_PER_WIDE_INT
== 32)
3958 return (GET_MODE (value
) == VOIDmode
3959 && !CONST_DOUBLE_HIGH (value
));
3963 if (HOST_BITS_PER_WIDE_INT
== 32)
3964 return INTVAL (value
) >= 0;
3966 return !(INTVAL (value
) & ~(HOST_WIDE_INT
) 0xffffffff);
3969 /* For certain code models, the symbolic references are known to fit. */
3971 return ix86_cmodel
== CM_SMALL
;
3973 /* For certain code models, the code is near as well. */
3975 return ix86_cmodel
== CM_SMALL
|| ix86_cmodel
== CM_MEDIUM
;
3977 /* We also may accept the offsetted memory references in certain special
3980 if (GET_CODE (XEXP (value
, 0)) == PLUS
)
3982 rtx op1
= XEXP (XEXP (value
, 0), 0);
3983 rtx op2
= XEXP (XEXP (value
, 0), 1);
3985 if (ix86_cmodel
== CM_LARGE
)
3987 switch (GET_CODE (op1
))
3991 /* For small code model we may accept pretty large positive
3992 offsets, since one bit is available for free. Negative
3993 offsets are limited by the size of NULL pointer area
3994 specified by the ABI. */
3995 if (ix86_cmodel
== CM_SMALL
3996 && GET_CODE (op2
) == CONST_INT
3997 && trunc_int_for_mode (INTVAL (op2
), DImode
) > -0x10000
3998 && (trunc_int_for_mode (INTVAL (op2
), SImode
)
4001 /* ??? For the kernel, we may accept adjustment of
4002 -0x10000000, since we know that it will just convert
4003 negative address space to positive, but perhaps this
4004 is not worthwhile. */
4007 /* These conditions are similar to SYMBOL_REF ones, just the
4008 constraints for code models differ. */
4009 if ((ix86_cmodel
== CM_SMALL
|| ix86_cmodel
== CM_MEDIUM
)
4010 && GET_CODE (op2
) == CONST_INT
4011 && trunc_int_for_mode (INTVAL (op2
), DImode
) > -0x10000
4012 && (trunc_int_for_mode (INTVAL (op2
), SImode
)
4026 /* Value should be nonzero if functions must have frame pointers.
4027 Zero means the frame pointer need not be set up (and parms may
4028 be accessed via the stack pointer) in functions that seem suitable. */
4031 ix86_frame_pointer_required ()
4033 /* If we accessed previous frames, then the generated code expects
4034 to be able to access the saved ebp value in our frame. */
4035 if (cfun
->machine
->accesses_prev_frame
)
4038 /* Several x86 os'es need a frame pointer for other reasons,
4039 usually pertaining to setjmp. */
4040 if (SUBTARGET_FRAME_POINTER_REQUIRED
)
4043 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
4044 the frame pointer by default. Turn it back on now if we've not
4045 got a leaf function. */
4046 if (TARGET_OMIT_LEAF_FRAME_POINTER
4047 && (!current_function_is_leaf
))
4050 if (current_function_profile
)
4056 /* Record that the current function accesses previous call frames. */
4059 ix86_setup_frame_addresses ()
4061 cfun
->machine
->accesses_prev_frame
= 1;
4064 #if defined(HAVE_GAS_HIDDEN) && defined(SUPPORTS_ONE_ONLY)
4065 # define USE_HIDDEN_LINKONCE 1
4067 # define USE_HIDDEN_LINKONCE 0
4070 static int pic_labels_used
;
4072 /* Fills in the label name that should be used for a pc thunk for
4073 the given register. */
4076 get_pc_thunk_name (name
, regno
)
4080 if (USE_HIDDEN_LINKONCE
)
4081 sprintf (name
, "__i686.get_pc_thunk.%s", reg_names
[regno
]);
4083 ASM_GENERATE_INTERNAL_LABEL (name
, "LPR", regno
);
4087 /* This function generates code for -fpic that loads %ebx with
4088 the return address of the caller and then returns. */
4091 ix86_asm_file_end (file
)
4097 for (regno
= 0; regno
< 8; ++regno
)
4101 if (! ((pic_labels_used
>> regno
) & 1))
4104 get_pc_thunk_name (name
, regno
);
4106 if (USE_HIDDEN_LINKONCE
)
4110 decl
= build_decl (FUNCTION_DECL
, get_identifier (name
),
4112 TREE_PUBLIC (decl
) = 1;
4113 TREE_STATIC (decl
) = 1;
4114 DECL_ONE_ONLY (decl
) = 1;
4116 (*targetm
.asm_out
.unique_section
) (decl
, 0);
4117 named_section (decl
, NULL
, 0);
4119 (*targetm
.asm_out
.globalize_label
) (file
, name
);
4120 fputs ("\t.hidden\t", file
);
4121 assemble_name (file
, name
);
4123 ASM_DECLARE_FUNCTION_NAME (file
, name
, decl
);
4128 ASM_OUTPUT_LABEL (file
, name
);
4131 xops
[0] = gen_rtx_REG (SImode
, regno
);
4132 xops
[1] = gen_rtx_MEM (SImode
, stack_pointer_rtx
);
4133 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops
);
4134 output_asm_insn ("ret", xops
);
4138 /* Emit code for the SET_GOT patterns. */
4141 output_set_got (dest
)
4147 xops
[1] = gen_rtx_SYMBOL_REF (Pmode
, GOT_SYMBOL_NAME
);
4149 if (! TARGET_DEEP_BRANCH_PREDICTION
|| !flag_pic
)
4151 xops
[2] = gen_rtx_LABEL_REF (Pmode
, gen_label_rtx ());
4154 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops
);
4156 output_asm_insn ("call\t%a2", xops
);
4159 /* Output the "canonical" label name ("Lxx$pb") here too. This
4160 is what will be referred to by the Mach-O PIC subsystem. */
4161 ASM_OUTPUT_LABEL (asm_out_file
, machopic_function_base_name ());
4163 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file
, "L",
4164 CODE_LABEL_NUMBER (XEXP (xops
[2], 0)));
4167 output_asm_insn ("pop{l}\t%0", xops
);
4172 get_pc_thunk_name (name
, REGNO (dest
));
4173 pic_labels_used
|= 1 << REGNO (dest
);
4175 xops
[2] = gen_rtx_SYMBOL_REF (Pmode
, ggc_strdup (name
));
4176 xops
[2] = gen_rtx_MEM (QImode
, xops
[2]);
4177 output_asm_insn ("call\t%X2", xops
);
4180 if (!flag_pic
|| TARGET_DEEP_BRANCH_PREDICTION
)
4181 output_asm_insn ("add{l}\t{%1, %0|%0, %1}", xops
);
4182 else if (!TARGET_MACHO
)
4183 output_asm_insn ("add{l}\t{%1+[.-%a2], %0|%0, %a1+(.-%a2)}", xops
);
4188 /* Generate an "push" pattern for input ARG. */
4194 return gen_rtx_SET (VOIDmode
,
4196 gen_rtx_PRE_DEC (Pmode
,
4197 stack_pointer_rtx
)),
4201 /* Return >= 0 if there is an unused call-clobbered register available
4202 for the entire function. */
4205 ix86_select_alt_pic_regnum ()
4207 if (current_function_is_leaf
&& !current_function_profile
)
4210 for (i
= 2; i
>= 0; --i
)
4211 if (!regs_ever_live
[i
])
4215 return INVALID_REGNUM
;
4218 /* Return 1 if we need to save REGNO. */
4220 ix86_save_reg (regno
, maybe_eh_return
)
4222 int maybe_eh_return
;
4224 if (pic_offset_table_rtx
4225 && regno
== REAL_PIC_OFFSET_TABLE_REGNUM
4226 && (regs_ever_live
[REAL_PIC_OFFSET_TABLE_REGNUM
]
4227 || current_function_profile
4228 || current_function_calls_eh_return
))
4230 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM
)
4235 if (current_function_calls_eh_return
&& maybe_eh_return
)
4240 unsigned test
= EH_RETURN_DATA_REGNO (i
);
4241 if (test
== INVALID_REGNUM
)
4248 return (regs_ever_live
[regno
]
4249 && !call_used_regs
[regno
]
4250 && !fixed_regs
[regno
]
4251 && (regno
!= HARD_FRAME_POINTER_REGNUM
|| !frame_pointer_needed
));
4254 /* Return number of registers to be saved on the stack. */
4262 for (regno
= FIRST_PSEUDO_REGISTER
- 1; regno
>= 0; regno
--)
4263 if (ix86_save_reg (regno
, true))
4268 /* Return the offset between two registers, one to be eliminated, and the other
4269 its replacement, at the start of a routine. */
4272 ix86_initial_elimination_offset (from
, to
)
4276 struct ix86_frame frame
;
4277 ix86_compute_frame_layout (&frame
);
4279 if (from
== ARG_POINTER_REGNUM
&& to
== HARD_FRAME_POINTER_REGNUM
)
4280 return frame
.hard_frame_pointer_offset
;
4281 else if (from
== FRAME_POINTER_REGNUM
4282 && to
== HARD_FRAME_POINTER_REGNUM
)
4283 return frame
.hard_frame_pointer_offset
- frame
.frame_pointer_offset
;
4286 if (to
!= STACK_POINTER_REGNUM
)
4288 else if (from
== ARG_POINTER_REGNUM
)
4289 return frame
.stack_pointer_offset
;
4290 else if (from
!= FRAME_POINTER_REGNUM
)
4293 return frame
.stack_pointer_offset
- frame
.frame_pointer_offset
;
4297 /* Fill structure ix86_frame about frame of currently computed function. */
4300 ix86_compute_frame_layout (frame
)
4301 struct ix86_frame
*frame
;
4303 HOST_WIDE_INT total_size
;
4304 int stack_alignment_needed
= cfun
->stack_alignment_needed
/ BITS_PER_UNIT
;
4306 int preferred_alignment
= cfun
->preferred_stack_boundary
/ BITS_PER_UNIT
;
4307 HOST_WIDE_INT size
= get_frame_size ();
4309 frame
->nregs
= ix86_nsaved_regs ();
4312 /* Skip return address and saved base pointer. */
4313 offset
= frame_pointer_needed
? UNITS_PER_WORD
* 2 : UNITS_PER_WORD
;
4315 frame
->hard_frame_pointer_offset
= offset
;
4317 /* Do some sanity checking of stack_alignment_needed and
4318 preferred_alignment, since i386 port is the only using those features
4319 that may break easily. */
4321 if (size
&& !stack_alignment_needed
)
4323 if (preferred_alignment
< STACK_BOUNDARY
/ BITS_PER_UNIT
)
4325 if (preferred_alignment
> PREFERRED_STACK_BOUNDARY
/ BITS_PER_UNIT
)
4327 if (stack_alignment_needed
> PREFERRED_STACK_BOUNDARY
/ BITS_PER_UNIT
)
4330 if (stack_alignment_needed
< STACK_BOUNDARY
/ BITS_PER_UNIT
)
4331 stack_alignment_needed
= STACK_BOUNDARY
/ BITS_PER_UNIT
;
4333 /* Register save area */
4334 offset
+= frame
->nregs
* UNITS_PER_WORD
;
4337 if (ix86_save_varrargs_registers
)
4339 offset
+= X86_64_VARARGS_SIZE
;
4340 frame
->va_arg_size
= X86_64_VARARGS_SIZE
;
4343 frame
->va_arg_size
= 0;
4345 /* Align start of frame for local function. */
4346 frame
->padding1
= ((offset
+ stack_alignment_needed
- 1)
4347 & -stack_alignment_needed
) - offset
;
4349 offset
+= frame
->padding1
;
4351 /* Frame pointer points here. */
4352 frame
->frame_pointer_offset
= offset
;
4356 /* Add outgoing arguments area. Can be skipped if we eliminated
4357 all the function calls as dead code. */
4358 if (ACCUMULATE_OUTGOING_ARGS
&& !current_function_is_leaf
)
4360 offset
+= current_function_outgoing_args_size
;
4361 frame
->outgoing_arguments_size
= current_function_outgoing_args_size
;
4364 frame
->outgoing_arguments_size
= 0;
4366 /* Align stack boundary. Only needed if we're calling another function
4368 if (!current_function_is_leaf
|| current_function_calls_alloca
)
4369 frame
->padding2
= ((offset
+ preferred_alignment
- 1)
4370 & -preferred_alignment
) - offset
;
4372 frame
->padding2
= 0;
4374 offset
+= frame
->padding2
;
4376 /* We've reached end of stack frame. */
4377 frame
->stack_pointer_offset
= offset
;
4379 /* Size prologue needs to allocate. */
4380 frame
->to_allocate
=
4381 (size
+ frame
->padding1
+ frame
->padding2
4382 + frame
->outgoing_arguments_size
+ frame
->va_arg_size
);
4384 if (TARGET_64BIT
&& TARGET_RED_ZONE
&& current_function_sp_is_unchanging
4385 && current_function_is_leaf
)
4387 frame
->red_zone_size
= frame
->to_allocate
;
4388 if (frame
->red_zone_size
> RED_ZONE_SIZE
- RED_ZONE_RESERVE
)
4389 frame
->red_zone_size
= RED_ZONE_SIZE
- RED_ZONE_RESERVE
;
4392 frame
->red_zone_size
= 0;
4393 frame
->to_allocate
-= frame
->red_zone_size
;
4394 frame
->stack_pointer_offset
-= frame
->red_zone_size
;
4396 fprintf (stderr
, "nregs: %i\n", frame
->nregs
);
4397 fprintf (stderr
, "size: %i\n", size
);
4398 fprintf (stderr
, "alignment1: %i\n", stack_alignment_needed
);
4399 fprintf (stderr
, "padding1: %i\n", frame
->padding1
);
4400 fprintf (stderr
, "va_arg: %i\n", frame
->va_arg_size
);
4401 fprintf (stderr
, "padding2: %i\n", frame
->padding2
);
4402 fprintf (stderr
, "to_allocate: %i\n", frame
->to_allocate
);
4403 fprintf (stderr
, "red_zone_size: %i\n", frame
->red_zone_size
);
4404 fprintf (stderr
, "frame_pointer_offset: %i\n", frame
->frame_pointer_offset
);
4405 fprintf (stderr
, "hard_frame_pointer_offset: %i\n",
4406 frame
->hard_frame_pointer_offset
);
4407 fprintf (stderr
, "stack_pointer_offset: %i\n", frame
->stack_pointer_offset
);
4411 /* Emit code to save registers in the prologue. */
4414 ix86_emit_save_regs ()
4419 for (regno
= FIRST_PSEUDO_REGISTER
- 1; regno
>= 0; regno
--)
4420 if (ix86_save_reg (regno
, true))
4422 insn
= emit_insn (gen_push (gen_rtx_REG (Pmode
, regno
)));
4423 RTX_FRAME_RELATED_P (insn
) = 1;
4427 /* Emit code to save registers using MOV insns. First register
4428 is restored from POINTER + OFFSET. */
4430 ix86_emit_save_regs_using_mov (pointer
, offset
)
4432 HOST_WIDE_INT offset
;
4437 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
4438 if (ix86_save_reg (regno
, true))
4440 insn
= emit_move_insn (adjust_address (gen_rtx_MEM (Pmode
, pointer
),
4442 gen_rtx_REG (Pmode
, regno
));
4443 RTX_FRAME_RELATED_P (insn
) = 1;
4444 offset
+= UNITS_PER_WORD
;
4448 /* Expand the prologue into a bunch of separate insns. */
4451 ix86_expand_prologue ()
4455 struct ix86_frame frame
;
4457 HOST_WIDE_INT allocate
;
4461 use_fast_prologue_epilogue
4462 = !expensive_function_p (FAST_PROLOGUE_INSN_COUNT
);
4463 if (TARGET_PROLOGUE_USING_MOVE
)
4464 use_mov
= use_fast_prologue_epilogue
;
4466 ix86_compute_frame_layout (&frame
);
4468 /* Note: AT&T enter does NOT have reversed args. Enter is probably
4469 slower on all targets. Also sdb doesn't like it. */
4471 if (frame_pointer_needed
)
4473 insn
= emit_insn (gen_push (hard_frame_pointer_rtx
));
4474 RTX_FRAME_RELATED_P (insn
) = 1;
4476 insn
= emit_move_insn (hard_frame_pointer_rtx
, stack_pointer_rtx
);
4477 RTX_FRAME_RELATED_P (insn
) = 1;
4480 allocate
= frame
.to_allocate
;
4481 /* In case we are dealing only with single register and empty frame,
4482 push is equivalent of the mov+add sequence. */
4483 if (allocate
== 0 && frame
.nregs
<= 1)
4487 ix86_emit_save_regs ();
4489 allocate
+= frame
.nregs
* UNITS_PER_WORD
;
4493 else if (! TARGET_STACK_PROBE
|| allocate
< CHECK_STACK_LIMIT
)
4495 insn
= emit_insn (gen_pro_epilogue_adjust_stack
4496 (stack_pointer_rtx
, stack_pointer_rtx
,
4497 GEN_INT (-allocate
)));
4498 RTX_FRAME_RELATED_P (insn
) = 1;
4502 /* ??? Is this only valid for Win32? */
4509 arg0
= gen_rtx_REG (SImode
, 0);
4510 emit_move_insn (arg0
, GEN_INT (allocate
));
4512 sym
= gen_rtx_MEM (FUNCTION_MODE
,
4513 gen_rtx_SYMBOL_REF (Pmode
, "_alloca"));
4514 insn
= emit_call_insn (gen_call (sym
, const0_rtx
, constm1_rtx
));
4516 CALL_INSN_FUNCTION_USAGE (insn
)
4517 = gen_rtx_EXPR_LIST (VOIDmode
, gen_rtx_USE (VOIDmode
, arg0
),
4518 CALL_INSN_FUNCTION_USAGE (insn
));
4522 if (!frame_pointer_needed
|| !frame
.to_allocate
)
4523 ix86_emit_save_regs_using_mov (stack_pointer_rtx
, frame
.to_allocate
);
4525 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx
,
4526 -frame
.nregs
* UNITS_PER_WORD
);
4529 #ifdef SUBTARGET_PROLOGUE
4533 pic_reg_used
= false;
4534 if (pic_offset_table_rtx
4535 && (regs_ever_live
[REAL_PIC_OFFSET_TABLE_REGNUM
]
4536 || current_function_profile
))
4538 unsigned int alt_pic_reg_used
= ix86_select_alt_pic_regnum ();
4540 if (alt_pic_reg_used
!= INVALID_REGNUM
)
4541 REGNO (pic_offset_table_rtx
) = alt_pic_reg_used
;
4543 pic_reg_used
= true;
4548 insn
= emit_insn (gen_set_got (pic_offset_table_rtx
));
4550 /* Even with accurate pre-reload life analysis, we can wind up
4551 deleting all references to the pic register after reload.
4552 Consider if cross-jumping unifies two sides of a branch
4553 controled by a comparison vs the only read from a global.
4554 In which case, allow the set_got to be deleted, though we're
4555 too late to do anything about the ebx save in the prologue. */
4556 REG_NOTES (insn
) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD
, const0_rtx
, NULL
);
4559 /* Prevent function calls from be scheduled before the call to mcount.
4560 In the pic_reg_used case, make sure that the got load isn't deleted. */
4561 if (current_function_profile
)
4562 emit_insn (gen_blockage (pic_reg_used
? pic_offset_table_rtx
: const0_rtx
));
4565 /* Emit code to restore saved registers using MOV insns. First register
4566 is restored from POINTER + OFFSET. */
4568 ix86_emit_restore_regs_using_mov (pointer
, offset
, maybe_eh_return
)
4571 int maybe_eh_return
;
4575 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
4576 if (ix86_save_reg (regno
, maybe_eh_return
))
4578 emit_move_insn (gen_rtx_REG (Pmode
, regno
),
4579 adjust_address (gen_rtx_MEM (Pmode
, pointer
),
4581 offset
+= UNITS_PER_WORD
;
4585 /* Restore function stack, frame, and registers. */
4588 ix86_expand_epilogue (style
)
4592 int sp_valid
= !frame_pointer_needed
|| current_function_sp_is_unchanging
;
4593 struct ix86_frame frame
;
4594 HOST_WIDE_INT offset
;
4596 ix86_compute_frame_layout (&frame
);
4598 /* Calculate start of saved registers relative to ebp. Special care
4599 must be taken for the normal return case of a function using
4600 eh_return: the eax and edx registers are marked as saved, but not
4601 restored along this path. */
4602 offset
= frame
.nregs
;
4603 if (current_function_calls_eh_return
&& style
!= 2)
4605 offset
*= -UNITS_PER_WORD
;
4607 /* If we're only restoring one register and sp is not valid then
4608 using a move instruction to restore the register since it's
4609 less work than reloading sp and popping the register.
4611 The default code result in stack adjustment using add/lea instruction,
4612 while this code results in LEAVE instruction (or discrete equivalent),
4613 so it is profitable in some other cases as well. Especially when there
4614 are no registers to restore. We also use this code when TARGET_USE_LEAVE
4615 and there is exactly one register to pop. This heruistic may need some
4616 tuning in future. */
4617 if ((!sp_valid
&& frame
.nregs
<= 1)
4618 || (TARGET_EPILOGUE_USING_MOVE
4619 && use_fast_prologue_epilogue
4620 && (frame
.nregs
> 1 || frame
.to_allocate
))
4621 || (frame_pointer_needed
&& !frame
.nregs
&& frame
.to_allocate
)
4622 || (frame_pointer_needed
&& TARGET_USE_LEAVE
4623 && use_fast_prologue_epilogue
&& frame
.nregs
== 1)
4624 || current_function_calls_eh_return
)
4626 /* Restore registers. We can use ebp or esp to address the memory
4627 locations. If both are available, default to ebp, since offsets
4628 are known to be small. Only exception is esp pointing directly to the
4629 end of block of saved registers, where we may simplify addressing
4632 if (!frame_pointer_needed
|| (sp_valid
&& !frame
.to_allocate
))
4633 ix86_emit_restore_regs_using_mov (stack_pointer_rtx
,
4634 frame
.to_allocate
, style
== 2);
4636 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx
,
4637 offset
, style
== 2);
4639 /* eh_return epilogues need %ecx added to the stack pointer. */
4642 rtx tmp
, sa
= EH_RETURN_STACKADJ_RTX
;
4644 if (frame_pointer_needed
)
4646 tmp
= gen_rtx_PLUS (Pmode
, hard_frame_pointer_rtx
, sa
);
4647 tmp
= plus_constant (tmp
, UNITS_PER_WORD
);
4648 emit_insn (gen_rtx_SET (VOIDmode
, sa
, tmp
));
4650 tmp
= gen_rtx_MEM (Pmode
, hard_frame_pointer_rtx
);
4651 emit_move_insn (hard_frame_pointer_rtx
, tmp
);
4653 emit_insn (gen_pro_epilogue_adjust_stack
4654 (stack_pointer_rtx
, sa
, const0_rtx
));
4658 tmp
= gen_rtx_PLUS (Pmode
, stack_pointer_rtx
, sa
);
4659 tmp
= plus_constant (tmp
, (frame
.to_allocate
4660 + frame
.nregs
* UNITS_PER_WORD
));
4661 emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, tmp
));
4664 else if (!frame_pointer_needed
)
4665 emit_insn (gen_pro_epilogue_adjust_stack
4666 (stack_pointer_rtx
, stack_pointer_rtx
,
4667 GEN_INT (frame
.to_allocate
4668 + frame
.nregs
* UNITS_PER_WORD
)));
4669 /* If not an i386, mov & pop is faster than "leave". */
4670 else if (TARGET_USE_LEAVE
|| optimize_size
|| !use_fast_prologue_epilogue
)
4671 emit_insn (TARGET_64BIT
? gen_leave_rex64 () : gen_leave ());
4674 emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx
,
4675 hard_frame_pointer_rtx
,
4678 emit_insn (gen_popdi1 (hard_frame_pointer_rtx
));
4680 emit_insn (gen_popsi1 (hard_frame_pointer_rtx
));
4685 /* First step is to deallocate the stack frame so that we can
4686 pop the registers. */
4689 if (!frame_pointer_needed
)
4691 emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx
,
4692 hard_frame_pointer_rtx
,
4695 else if (frame
.to_allocate
)
4696 emit_insn (gen_pro_epilogue_adjust_stack
4697 (stack_pointer_rtx
, stack_pointer_rtx
,
4698 GEN_INT (frame
.to_allocate
)));
4700 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
4701 if (ix86_save_reg (regno
, false))
4704 emit_insn (gen_popdi1 (gen_rtx_REG (Pmode
, regno
)));
4706 emit_insn (gen_popsi1 (gen_rtx_REG (Pmode
, regno
)));
4708 if (frame_pointer_needed
)
4710 /* Leave results in shorter dependency chains on CPUs that are
4711 able to grok it fast. */
4712 if (TARGET_USE_LEAVE
)
4713 emit_insn (TARGET_64BIT
? gen_leave_rex64 () : gen_leave ());
4714 else if (TARGET_64BIT
)
4715 emit_insn (gen_popdi1 (hard_frame_pointer_rtx
));
4717 emit_insn (gen_popsi1 (hard_frame_pointer_rtx
));
4721 /* Sibcall epilogues don't want a return instruction. */
4725 if (current_function_pops_args
&& current_function_args_size
)
4727 rtx popc
= GEN_INT (current_function_pops_args
);
4729 /* i386 can only pop 64K bytes. If asked to pop more, pop
4730 return address, do explicit add, and jump indirectly to the
4733 if (current_function_pops_args
>= 65536)
4735 rtx ecx
= gen_rtx_REG (SImode
, 2);
4737 /* There are is no "pascal" calling convention in 64bit ABI. */
4741 emit_insn (gen_popsi1 (ecx
));
4742 emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
, popc
));
4743 emit_jump_insn (gen_return_indirect_internal (ecx
));
4746 emit_jump_insn (gen_return_pop_internal (popc
));
4749 emit_jump_insn (gen_return_internal ());
4752 /* Reset from the function's potential modifications. */
4755 ix86_output_function_epilogue (file
, size
)
4756 FILE *file ATTRIBUTE_UNUSED
;
4757 HOST_WIDE_INT size ATTRIBUTE_UNUSED
;
4759 if (pic_offset_table_rtx
)
4760 REGNO (pic_offset_table_rtx
) = REAL_PIC_OFFSET_TABLE_REGNUM
;
4763 /* Extract the parts of an RTL expression that is a valid memory address
4764 for an instruction. Return 0 if the structure of the address is
4765 grossly off. Return -1 if the address contains ASHIFT, so it is not
4766 strictly valid, but still used for computing length of lea instruction.
4770 ix86_decompose_address (addr
, out
)
4772 struct ix86_address
*out
;
4774 rtx base
= NULL_RTX
;
4775 rtx index
= NULL_RTX
;
4776 rtx disp
= NULL_RTX
;
4777 HOST_WIDE_INT scale
= 1;
4778 rtx scale_rtx
= NULL_RTX
;
4781 if (REG_P (addr
) || GET_CODE (addr
) == SUBREG
)
4783 else if (GET_CODE (addr
) == PLUS
)
4785 rtx op0
= XEXP (addr
, 0);
4786 rtx op1
= XEXP (addr
, 1);
4787 enum rtx_code code0
= GET_CODE (op0
);
4788 enum rtx_code code1
= GET_CODE (op1
);
4790 if (code0
== REG
|| code0
== SUBREG
)
4792 if (code1
== REG
|| code1
== SUBREG
)
4793 index
= op0
, base
= op1
; /* index + base */
4795 base
= op0
, disp
= op1
; /* base + displacement */
4797 else if (code0
== MULT
)
4799 index
= XEXP (op0
, 0);
4800 scale_rtx
= XEXP (op0
, 1);
4801 if (code1
== REG
|| code1
== SUBREG
)
4802 base
= op1
; /* index*scale + base */
4804 disp
= op1
; /* index*scale + disp */
4806 else if (code0
== PLUS
&& GET_CODE (XEXP (op0
, 0)) == MULT
)
4808 index
= XEXP (XEXP (op0
, 0), 0); /* index*scale + base + disp */
4809 scale_rtx
= XEXP (XEXP (op0
, 0), 1);
4810 base
= XEXP (op0
, 1);
4813 else if (code0
== PLUS
)
4815 index
= XEXP (op0
, 0); /* index + base + disp */
4816 base
= XEXP (op0
, 1);
4822 else if (GET_CODE (addr
) == MULT
)
4824 index
= XEXP (addr
, 0); /* index*scale */
4825 scale_rtx
= XEXP (addr
, 1);
4827 else if (GET_CODE (addr
) == ASHIFT
)
4831 /* We're called for lea too, which implements ashift on occasion. */
4832 index
= XEXP (addr
, 0);
4833 tmp
= XEXP (addr
, 1);
4834 if (GET_CODE (tmp
) != CONST_INT
)
4836 scale
= INTVAL (tmp
);
4837 if ((unsigned HOST_WIDE_INT
) scale
> 3)
4843 disp
= addr
; /* displacement */
4845 /* Extract the integral value of scale. */
4848 if (GET_CODE (scale_rtx
) != CONST_INT
)
4850 scale
= INTVAL (scale_rtx
);
4853 /* Allow arg pointer and stack pointer as index if there is not scaling */
4854 if (base
&& index
&& scale
== 1
4855 && (index
== arg_pointer_rtx
|| index
== frame_pointer_rtx
4856 || index
== stack_pointer_rtx
))
4863 /* Special case: %ebp cannot be encoded as a base without a displacement. */
4864 if ((base
== hard_frame_pointer_rtx
4865 || base
== frame_pointer_rtx
4866 || base
== arg_pointer_rtx
) && !disp
)
4869 /* Special case: on K6, [%esi] makes the instruction vector decoded.
4870 Avoid this by transforming to [%esi+0]. */
4871 if (ix86_cpu
== PROCESSOR_K6
&& !optimize_size
4872 && base
&& !index
&& !disp
4874 && REGNO_REG_CLASS (REGNO (base
)) == SIREG
)
4877 /* Special case: encode reg+reg instead of reg*2. */
4878 if (!base
&& index
&& scale
&& scale
== 2)
4879 base
= index
, scale
= 1;
4881 /* Special case: scaling cannot be encoded without base or displacement. */
4882 if (!base
&& !disp
&& index
&& scale
!= 1)
4893 /* Return cost of the memory address x.
4894 For i386, it is better to use a complex address than let gcc copy
4895 the address into a reg and make a new pseudo. But not if the address
4896 requires to two regs - that would mean more pseudos with longer
4899 ix86_address_cost (x
)
4902 struct ix86_address parts
;
4905 if (!ix86_decompose_address (x
, &parts
))
4908 if (parts
.base
&& GET_CODE (parts
.base
) == SUBREG
)
4909 parts
.base
= SUBREG_REG (parts
.base
);
4910 if (parts
.index
&& GET_CODE (parts
.index
) == SUBREG
)
4911 parts
.index
= SUBREG_REG (parts
.index
);
4913 /* More complex memory references are better. */
4914 if (parts
.disp
&& parts
.disp
!= const0_rtx
)
4917 /* Attempt to minimize number of registers in the address. */
4919 && (!REG_P (parts
.base
) || REGNO (parts
.base
) >= FIRST_PSEUDO_REGISTER
))
4921 && (!REG_P (parts
.index
)
4922 || REGNO (parts
.index
) >= FIRST_PSEUDO_REGISTER
)))
4926 && (!REG_P (parts
.base
) || REGNO (parts
.base
) >= FIRST_PSEUDO_REGISTER
)
4928 && (!REG_P (parts
.index
) || REGNO (parts
.index
) >= FIRST_PSEUDO_REGISTER
)
4929 && parts
.base
!= parts
.index
)
4932 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
4933 since it's predecode logic can't detect the length of instructions
4934 and it degenerates to vector decoded. Increase cost of such
4935 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
4936 to split such addresses or even refuse such addresses at all.
4938 Following addressing modes are affected:
4943 The first and last case may be avoidable by explicitly coding the zero in
4944 memory address, but I don't have AMD-K6 machine handy to check this
4948 && ((!parts
.disp
&& parts
.base
&& parts
.index
&& parts
.scale
!= 1)
4949 || (parts
.disp
&& !parts
.base
&& parts
.index
&& parts
.scale
!= 1)
4950 || (!parts
.disp
&& parts
.base
&& parts
.index
&& parts
.scale
== 1)))
4956 /* If X is a machine specific address (i.e. a symbol or label being
4957 referenced as a displacement from the GOT implemented using an
4958 UNSPEC), then return the base term. Otherwise return X. */
4961 ix86_find_base_term (x
)
4968 if (GET_CODE (x
) != CONST
)
4971 if (GET_CODE (term
) == PLUS
4972 && (GET_CODE (XEXP (term
, 1)) == CONST_INT
4973 || GET_CODE (XEXP (term
, 1)) == CONST_DOUBLE
))
4974 term
= XEXP (term
, 0);
4975 if (GET_CODE (term
) != UNSPEC
4976 || XINT (term
, 1) != UNSPEC_GOTPCREL
)
4979 term
= XVECEXP (term
, 0, 0);
4981 if (GET_CODE (term
) != SYMBOL_REF
4982 && GET_CODE (term
) != LABEL_REF
)
4988 if (GET_CODE (x
) != PLUS
4989 || XEXP (x
, 0) != pic_offset_table_rtx
4990 || GET_CODE (XEXP (x
, 1)) != CONST
)
4993 term
= XEXP (XEXP (x
, 1), 0);
4995 if (GET_CODE (term
) == PLUS
&& GET_CODE (XEXP (term
, 1)) == CONST_INT
)
4996 term
= XEXP (term
, 0);
4998 if (GET_CODE (term
) != UNSPEC
4999 || XINT (term
, 1) != UNSPEC_GOTOFF
)
5002 term
= XVECEXP (term
, 0, 0);
5004 if (GET_CODE (term
) != SYMBOL_REF
5005 && GET_CODE (term
) != LABEL_REF
)
5011 /* Determine if a given RTX is a valid constant. We already know this
5012 satisfies CONSTANT_P. */
5015 legitimate_constant_p (x
)
5020 switch (GET_CODE (x
))
5023 /* TLS symbols are not constant. */
5024 if (tls_symbolic_operand (x
, Pmode
))
5029 inner
= XEXP (x
, 0);
5031 /* Offsets of TLS symbols are never valid.
5032 Discourage CSE from creating them. */
5033 if (GET_CODE (inner
) == PLUS
5034 && tls_symbolic_operand (XEXP (inner
, 0), Pmode
))
5037 /* Only some unspecs are valid as "constants". */
5038 if (GET_CODE (inner
) == UNSPEC
)
5039 switch (XINT (inner
, 1))
5042 return local_exec_symbolic_operand (XVECEXP (inner
, 0, 0), Pmode
);
5052 /* Otherwise we handle everything else in the move patterns. */
5056 /* Determine if it's legal to put X into the constant pool. This
5057 is not possible for the address of thread-local symbols, which
5058 is checked above. */
5061 ix86_cannot_force_const_mem (x
)
5064 return !legitimate_constant_p (x
);
5067 /* Determine if a given RTX is a valid constant address. */
5070 constant_address_p (x
)
5073 switch (GET_CODE (x
))
5080 return TARGET_64BIT
;
5083 /* For Mach-O, really believe the CONST. */
5086 /* Otherwise fall through. */
5088 return !flag_pic
&& legitimate_constant_p (x
);
5095 /* Nonzero if the constant value X is a legitimate general operand
5096 when generating PIC code. It is given that flag_pic is on and
5097 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
5100 legitimate_pic_operand_p (x
)
5105 switch (GET_CODE (x
))
5108 inner
= XEXP (x
, 0);
5110 /* Only some unspecs are valid as "constants". */
5111 if (GET_CODE (inner
) == UNSPEC
)
5112 switch (XINT (inner
, 1))
5115 return local_exec_symbolic_operand (XVECEXP (inner
, 0, 0), Pmode
);
5123 return legitimate_pic_address_disp_p (x
);
5130 /* Determine if a given CONST RTX is a valid memory displacement
5134 legitimate_pic_address_disp_p (disp
)
5139 /* In 64bit mode we can allow direct addresses of symbols and labels
5140 when they are not dynamic symbols. */
5143 /* TLS references should always be enclosed in UNSPEC. */
5144 if (tls_symbolic_operand (disp
, GET_MODE (disp
)))
5146 if (GET_CODE (disp
) == SYMBOL_REF
5147 && ix86_cmodel
== CM_SMALL_PIC
5148 && (CONSTANT_POOL_ADDRESS_P (disp
)
5149 || SYMBOL_REF_FLAG (disp
)))
5151 if (GET_CODE (disp
) == LABEL_REF
)
5153 if (GET_CODE (disp
) == CONST
5154 && GET_CODE (XEXP (disp
, 0)) == PLUS
5155 && ((GET_CODE (XEXP (XEXP (disp
, 0), 0)) == SYMBOL_REF
5156 && ix86_cmodel
== CM_SMALL_PIC
5157 && (CONSTANT_POOL_ADDRESS_P (XEXP (XEXP (disp
, 0), 0))
5158 || SYMBOL_REF_FLAG (XEXP (XEXP (disp
, 0), 0))))
5159 || GET_CODE (XEXP (XEXP (disp
, 0), 0)) == LABEL_REF
)
5160 && GET_CODE (XEXP (XEXP (disp
, 0), 1)) == CONST_INT
5161 && INTVAL (XEXP (XEXP (disp
, 0), 1)) < 16*1024*1024
5162 && INTVAL (XEXP (XEXP (disp
, 0), 1)) >= -16*1024*1024)
5165 if (GET_CODE (disp
) != CONST
)
5167 disp
= XEXP (disp
, 0);
5171 /* We are unsafe to allow PLUS expressions. This limit allowed distance
5172 of GOT tables. We should not need these anyway. */
5173 if (GET_CODE (disp
) != UNSPEC
5174 || XINT (disp
, 1) != UNSPEC_GOTPCREL
)
5177 if (GET_CODE (XVECEXP (disp
, 0, 0)) != SYMBOL_REF
5178 && GET_CODE (XVECEXP (disp
, 0, 0)) != LABEL_REF
)
5184 if (GET_CODE (disp
) == PLUS
)
5186 if (GET_CODE (XEXP (disp
, 1)) != CONST_INT
)
5188 disp
= XEXP (disp
, 0);
5192 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O. */
5193 if (TARGET_MACHO
&& GET_CODE (disp
) == MINUS
)
5195 if (GET_CODE (XEXP (disp
, 0)) == LABEL_REF
5196 || GET_CODE (XEXP (disp
, 0)) == SYMBOL_REF
)
5197 if (GET_CODE (XEXP (disp
, 1)) == SYMBOL_REF
)
5199 const char *sym_name
= XSTR (XEXP (disp
, 1), 0);
5200 if (strstr (sym_name
, "$pb") != 0)
5205 if (GET_CODE (disp
) != UNSPEC
)
5208 switch (XINT (disp
, 1))
5213 return GET_CODE (XVECEXP (disp
, 0, 0)) == SYMBOL_REF
;
5215 return local_symbolic_operand (XVECEXP (disp
, 0, 0), Pmode
);
5216 case UNSPEC_GOTTPOFF
:
5217 case UNSPEC_GOTNTPOFF
:
5218 case UNSPEC_INDNTPOFF
:
5221 return initial_exec_symbolic_operand (XVECEXP (disp
, 0, 0), Pmode
);
5223 return local_exec_symbolic_operand (XVECEXP (disp
, 0, 0), Pmode
);
5225 return local_dynamic_symbolic_operand (XVECEXP (disp
, 0, 0), Pmode
);
5231 /* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
5232 memory address for an instruction. The MODE argument is the machine mode
5233 for the MEM expression that wants to use this address.
5235 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
5236 convert common non-canonical forms to canonical form so that they will
5240 legitimate_address_p (mode
, addr
, strict
)
5241 enum machine_mode mode
;
5245 struct ix86_address parts
;
5246 rtx base
, index
, disp
;
5247 HOST_WIDE_INT scale
;
5248 const char *reason
= NULL
;
5249 rtx reason_rtx
= NULL_RTX
;
5251 if (TARGET_DEBUG_ADDR
)
5254 "\n======\nGO_IF_LEGITIMATE_ADDRESS, mode = %s, strict = %d\n",
5255 GET_MODE_NAME (mode
), strict
);
5259 if (GET_CODE (addr
) == UNSPEC
&& XINT (addr
, 1) == UNSPEC_TP
)
5261 if (TARGET_DEBUG_ADDR
)
5262 fprintf (stderr
, "Success.\n");
5266 if (ix86_decompose_address (addr
, &parts
) <= 0)
5268 reason
= "decomposition failed";
5273 index
= parts
.index
;
5275 scale
= parts
.scale
;
5277 /* Validate base register.
5279 Don't allow SUBREG's here, it can lead to spill failures when the base
5280 is one word out of a two word structure, which is represented internally
5288 if (GET_CODE (base
) == SUBREG
)
5289 reg
= SUBREG_REG (base
);
5293 if (GET_CODE (reg
) != REG
)
5295 reason
= "base is not a register";
5299 if (GET_MODE (base
) != Pmode
)
5301 reason
= "base is not in Pmode";
5305 if ((strict
&& ! REG_OK_FOR_BASE_STRICT_P (reg
))
5306 || (! strict
&& ! REG_OK_FOR_BASE_NONSTRICT_P (reg
)))
5308 reason
= "base is not valid";
5313 /* Validate index register.
5315 Don't allow SUBREG's here, it can lead to spill failures when the index
5316 is one word out of a two word structure, which is represented internally
5324 if (GET_CODE (index
) == SUBREG
)
5325 reg
= SUBREG_REG (index
);
5329 if (GET_CODE (reg
) != REG
)
5331 reason
= "index is not a register";
5335 if (GET_MODE (index
) != Pmode
)
5337 reason
= "index is not in Pmode";
5341 if ((strict
&& ! REG_OK_FOR_INDEX_STRICT_P (reg
))
5342 || (! strict
&& ! REG_OK_FOR_INDEX_NONSTRICT_P (reg
)))
5344 reason
= "index is not valid";
5349 /* Validate scale factor. */
5352 reason_rtx
= GEN_INT (scale
);
5355 reason
= "scale without index";
5359 if (scale
!= 2 && scale
!= 4 && scale
!= 8)
5361 reason
= "scale is not a valid multiplier";
5366 /* Validate displacement. */
5371 if (GET_CODE (disp
) == CONST
5372 && GET_CODE (XEXP (disp
, 0)) == UNSPEC
)
5373 switch (XINT (XEXP (disp
, 0), 1))
5377 case UNSPEC_GOTPCREL
:
5380 goto is_legitimate_pic
;
5382 case UNSPEC_GOTTPOFF
:
5383 case UNSPEC_GOTNTPOFF
:
5384 case UNSPEC_INDNTPOFF
:
5390 reason
= "invalid address unspec";
5394 else if (flag_pic
&& (SYMBOLIC_CONST (disp
)
5396 && !machopic_operand_p (disp
)
5401 if (TARGET_64BIT
&& (index
|| base
))
5403 /* foo@dtpoff(%rX) is ok. */
5404 if (GET_CODE (disp
) != CONST
5405 || GET_CODE (XEXP (disp
, 0)) != PLUS
5406 || GET_CODE (XEXP (XEXP (disp
, 0), 0)) != UNSPEC
5407 || GET_CODE (XEXP (XEXP (disp
, 0), 1)) != CONST_INT
5408 || (XINT (XEXP (XEXP (disp
, 0), 0), 1) != UNSPEC_DTPOFF
5409 && XINT (XEXP (XEXP (disp
, 0), 0), 1) != UNSPEC_NTPOFF
))
5411 reason
= "non-constant pic memory reference";
5415 else if (! legitimate_pic_address_disp_p (disp
))
5417 reason
= "displacement is an invalid pic construct";
5421 /* This code used to verify that a symbolic pic displacement
5422 includes the pic_offset_table_rtx register.
5424 While this is good idea, unfortunately these constructs may
5425 be created by "adds using lea" optimization for incorrect
5434 This code is nonsensical, but results in addressing
5435 GOT table with pic_offset_table_rtx base. We can't
5436 just refuse it easily, since it gets matched by
5437 "addsi3" pattern, that later gets split to lea in the
5438 case output register differs from input. While this
5439 can be handled by separate addsi pattern for this case
5440 that never results in lea, this seems to be easier and
5441 correct fix for crash to disable this test. */
5443 else if (!CONSTANT_ADDRESS_P (disp
))
5445 reason
= "displacement is not constant";
5448 else if (TARGET_64BIT
&& !x86_64_sign_extended_value (disp
))
5450 reason
= "displacement is out of range";
5453 else if (!TARGET_64BIT
&& GET_CODE (disp
) == CONST_DOUBLE
)
5455 reason
= "displacement is a const_double";
5460 /* Everything looks valid. */
5461 if (TARGET_DEBUG_ADDR
)
5462 fprintf (stderr
, "Success.\n");
5466 if (TARGET_DEBUG_ADDR
)
5468 fprintf (stderr
, "Error: %s\n", reason
);
5469 debug_rtx (reason_rtx
);
5474 /* Return an unique alias set for the GOT. */
5476 static HOST_WIDE_INT
5477 ix86_GOT_alias_set ()
5479 static HOST_WIDE_INT set
= -1;
5481 set
= new_alias_set ();
5485 /* Return a legitimate reference for ORIG (an address) using the
5486 register REG. If REG is 0, a new pseudo is generated.
5488 There are two types of references that must be handled:
5490 1. Global data references must load the address from the GOT, via
5491 the PIC reg. An insn is emitted to do this load, and the reg is
5494 2. Static data references, constant pool addresses, and code labels
5495 compute the address as an offset from the GOT, whose base is in
5496 the PIC reg. Static data objects have SYMBOL_REF_FLAG set to
5497 differentiate them from global data objects. The returned
5498 address is the PIC reg + an unspec constant.
5500 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
5501 reg also appears in the address. */
5504 legitimize_pic_address (orig
, reg
)
5514 reg
= gen_reg_rtx (Pmode
);
5515 /* Use the generic Mach-O PIC machinery. */
5516 return machopic_legitimize_pic_address (orig
, GET_MODE (orig
), reg
);
5519 if (TARGET_64BIT
&& legitimate_pic_address_disp_p (addr
))
5521 else if (!TARGET_64BIT
&& local_symbolic_operand (addr
, Pmode
))
5523 /* This symbol may be referenced via a displacement from the PIC
5524 base address (@GOTOFF). */
5526 if (reload_in_progress
)
5527 regs_ever_live
[PIC_OFFSET_TABLE_REGNUM
] = 1;
5528 new = gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOTOFF
);
5529 new = gen_rtx_CONST (Pmode
, new);
5530 new = gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, new);
5534 emit_move_insn (reg
, new);
5538 else if (GET_CODE (addr
) == SYMBOL_REF
)
5542 new = gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOTPCREL
);
5543 new = gen_rtx_CONST (Pmode
, new);
5544 new = gen_rtx_MEM (Pmode
, new);
5545 RTX_UNCHANGING_P (new) = 1;
5546 set_mem_alias_set (new, ix86_GOT_alias_set ());
5549 reg
= gen_reg_rtx (Pmode
);
5550 /* Use directly gen_movsi, otherwise the address is loaded
5551 into register for CSE. We don't want to CSE this addresses,
5552 instead we CSE addresses from the GOT table, so skip this. */
5553 emit_insn (gen_movsi (reg
, new));
5558 /* This symbol must be referenced via a load from the
5559 Global Offset Table (@GOT). */
5561 if (reload_in_progress
)
5562 regs_ever_live
[PIC_OFFSET_TABLE_REGNUM
] = 1;
5563 new = gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOT
);
5564 new = gen_rtx_CONST (Pmode
, new);
5565 new = gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, new);
5566 new = gen_rtx_MEM (Pmode
, new);
5567 RTX_UNCHANGING_P (new) = 1;
5568 set_mem_alias_set (new, ix86_GOT_alias_set ());
5571 reg
= gen_reg_rtx (Pmode
);
5572 emit_move_insn (reg
, new);
5578 if (GET_CODE (addr
) == CONST
)
5580 addr
= XEXP (addr
, 0);
5582 /* We must match stuff we generate before. Assume the only
5583 unspecs that can get here are ours. Not that we could do
5584 anything with them anyway... */
5585 if (GET_CODE (addr
) == UNSPEC
5586 || (GET_CODE (addr
) == PLUS
5587 && GET_CODE (XEXP (addr
, 0)) == UNSPEC
))
5589 if (GET_CODE (addr
) != PLUS
)
5592 if (GET_CODE (addr
) == PLUS
)
5594 rtx op0
= XEXP (addr
, 0), op1
= XEXP (addr
, 1);
5596 /* Check first to see if this is a constant offset from a @GOTOFF
5597 symbol reference. */
5598 if (local_symbolic_operand (op0
, Pmode
)
5599 && GET_CODE (op1
) == CONST_INT
)
5603 if (reload_in_progress
)
5604 regs_ever_live
[PIC_OFFSET_TABLE_REGNUM
] = 1;
5605 new = gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, op0
),
5607 new = gen_rtx_PLUS (Pmode
, new, op1
);
5608 new = gen_rtx_CONST (Pmode
, new);
5609 new = gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, new);
5613 emit_move_insn (reg
, new);
5619 if (INTVAL (op1
) < -16*1024*1024
5620 || INTVAL (op1
) >= 16*1024*1024)
5621 new = gen_rtx_PLUS (Pmode
, op0
, force_reg (Pmode
, op1
));
5626 base
= legitimize_pic_address (XEXP (addr
, 0), reg
);
5627 new = legitimize_pic_address (XEXP (addr
, 1),
5628 base
== reg
? NULL_RTX
: reg
);
5630 if (GET_CODE (new) == CONST_INT
)
5631 new = plus_constant (base
, INTVAL (new));
5634 if (GET_CODE (new) == PLUS
&& CONSTANT_P (XEXP (new, 1)))
5636 base
= gen_rtx_PLUS (Pmode
, base
, XEXP (new, 0));
5637 new = XEXP (new, 1);
5639 new = gen_rtx_PLUS (Pmode
, base
, new);
5648 ix86_encode_section_info (decl
, first
)
5650 int first ATTRIBUTE_UNUSED
;
5652 bool local_p
= (*targetm
.binds_local_p
) (decl
);
5655 rtl
= DECL_P (decl
) ? DECL_RTL (decl
) : TREE_CST_RTL (decl
);
5656 if (GET_CODE (rtl
) != MEM
)
5658 symbol
= XEXP (rtl
, 0);
5659 if (GET_CODE (symbol
) != SYMBOL_REF
)
5662 /* For basic x86, if using PIC, mark a SYMBOL_REF for a non-global
5663 symbol so that we may access it directly in the GOT. */
5666 SYMBOL_REF_FLAG (symbol
) = local_p
;
5668 /* For ELF, encode thread-local data with %[GLil] for "global dynamic",
5669 "local dynamic", "initial exec" or "local exec" TLS models
5672 if (TREE_CODE (decl
) == VAR_DECL
&& DECL_THREAD_LOCAL (decl
))
5674 const char *symbol_str
;
5677 enum tls_model kind
= decl_tls_model (decl
);
5679 if (TARGET_64BIT
&& ! flag_pic
)
5681 /* x86-64 doesn't allow non-pic code for shared libraries,
5682 so don't generate GD/LD TLS models for non-pic code. */
5685 case TLS_MODEL_GLOBAL_DYNAMIC
:
5686 kind
= TLS_MODEL_INITIAL_EXEC
; break;
5687 case TLS_MODEL_LOCAL_DYNAMIC
:
5688 kind
= TLS_MODEL_LOCAL_EXEC
; break;
5694 symbol_str
= XSTR (symbol
, 0);
5696 if (symbol_str
[0] == '%')
5698 if (symbol_str
[1] == tls_model_chars
[kind
])
5702 len
= strlen (symbol_str
) + 1;
5703 newstr
= alloca (len
+ 2);
5706 newstr
[1] = tls_model_chars
[kind
];
5707 memcpy (newstr
+ 2, symbol_str
, len
);
5709 XSTR (symbol
, 0) = ggc_alloc_string (newstr
, len
+ 2 - 1);
5713 /* Undo the above when printing symbol names. */
5716 ix86_strip_name_encoding (str
)
5726 /* Load the thread pointer into a register. */
5729 get_thread_pointer ()
5733 tp
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, const0_rtx
), UNSPEC_TP
);
5734 tp
= gen_rtx_MEM (Pmode
, tp
);
5735 RTX_UNCHANGING_P (tp
) = 1;
5736 set_mem_alias_set (tp
, ix86_GOT_alias_set ());
5737 tp
= force_reg (Pmode
, tp
);
5742 /* Try machine-dependent ways of modifying an illegitimate address
5743 to be legitimate. If we find one, return the new, valid address.
5744 This macro is used in only one place: `memory_address' in explow.c.
5746 OLDX is the address as it was before break_out_memory_refs was called.
5747 In some cases it is useful to look at this to decide what needs to be done.
5749 MODE and WIN are passed so that this macro can use
5750 GO_IF_LEGITIMATE_ADDRESS.
5752 It is always safe for this macro to do nothing. It exists to recognize
5753 opportunities to optimize the output.
5755 For the 80386, we handle X+REG by loading X into a register R and
5756 using R+REG. R will go in a general reg and indexing will be used.
5757 However, if REG is a broken-out memory address or multiplication,
5758 nothing needs to be done because REG can certainly go in a general reg.
5760 When -fpic is used, special handling is needed for symbolic references.
5761 See comments by legitimize_pic_address in i386.c for details. */
5764 legitimize_address (x
, oldx
, mode
)
5766 register rtx oldx ATTRIBUTE_UNUSED
;
5767 enum machine_mode mode
;
5772 if (TARGET_DEBUG_ADDR
)
5774 fprintf (stderr
, "\n==========\nLEGITIMIZE_ADDRESS, mode = %s\n",
5775 GET_MODE_NAME (mode
));
5779 log
= tls_symbolic_operand (x
, mode
);
5782 rtx dest
, base
, off
, pic
;
5787 case TLS_MODEL_GLOBAL_DYNAMIC
:
5788 dest
= gen_reg_rtx (Pmode
);
5791 rtx rax
= gen_rtx_REG (Pmode
, 0), insns
;
5794 emit_call_insn (gen_tls_global_dynamic_64 (rax
, x
));
5795 insns
= get_insns ();
5798 emit_libcall_block (insns
, dest
, rax
, x
);
5801 emit_insn (gen_tls_global_dynamic_32 (dest
, x
));
5804 case TLS_MODEL_LOCAL_DYNAMIC
:
5805 base
= gen_reg_rtx (Pmode
);
5808 rtx rax
= gen_rtx_REG (Pmode
, 0), insns
, note
;
5811 emit_call_insn (gen_tls_local_dynamic_base_64 (rax
));
5812 insns
= get_insns ();
5815 note
= gen_rtx_EXPR_LIST (VOIDmode
, const0_rtx
, NULL
);
5816 note
= gen_rtx_EXPR_LIST (VOIDmode
, ix86_tls_get_addr (), note
);
5817 emit_libcall_block (insns
, base
, rax
, note
);
5820 emit_insn (gen_tls_local_dynamic_base_32 (base
));
5822 off
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, x
), UNSPEC_DTPOFF
);
5823 off
= gen_rtx_CONST (Pmode
, off
);
5825 return gen_rtx_PLUS (Pmode
, base
, off
);
5827 case TLS_MODEL_INITIAL_EXEC
:
5831 type
= UNSPEC_GOTNTPOFF
;
5835 if (reload_in_progress
)
5836 regs_ever_live
[PIC_OFFSET_TABLE_REGNUM
] = 1;
5837 pic
= pic_offset_table_rtx
;
5838 type
= TARGET_GNU_TLS
? UNSPEC_GOTNTPOFF
: UNSPEC_GOTTPOFF
;
5840 else if (!TARGET_GNU_TLS
)
5842 pic
= gen_reg_rtx (Pmode
);
5843 emit_insn (gen_set_got (pic
));
5844 type
= UNSPEC_GOTTPOFF
;
5849 type
= UNSPEC_INDNTPOFF
;
5852 base
= get_thread_pointer ();
5854 off
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, x
), type
);
5855 off
= gen_rtx_CONST (Pmode
, off
);
5857 off
= gen_rtx_PLUS (Pmode
, pic
, off
);
5858 off
= gen_rtx_MEM (Pmode
, off
);
5859 RTX_UNCHANGING_P (off
) = 1;
5860 set_mem_alias_set (off
, ix86_GOT_alias_set ());
5861 dest
= gen_reg_rtx (Pmode
);
5863 if (TARGET_64BIT
|| TARGET_GNU_TLS
)
5865 emit_move_insn (dest
, off
);
5866 return gen_rtx_PLUS (Pmode
, base
, dest
);
5869 emit_insn (gen_subsi3 (dest
, base
, off
));
5872 case TLS_MODEL_LOCAL_EXEC
:
5873 base
= get_thread_pointer ();
5875 off
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, x
),
5876 (TARGET_64BIT
|| TARGET_GNU_TLS
)
5877 ? UNSPEC_NTPOFF
: UNSPEC_TPOFF
);
5878 off
= gen_rtx_CONST (Pmode
, off
);
5880 if (TARGET_64BIT
|| TARGET_GNU_TLS
)
5881 return gen_rtx_PLUS (Pmode
, base
, off
);
5884 dest
= gen_reg_rtx (Pmode
);
5885 emit_insn (gen_subsi3 (dest
, base
, off
));
5896 if (flag_pic
&& SYMBOLIC_CONST (x
))
5897 return legitimize_pic_address (x
, 0);
5899 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
5900 if (GET_CODE (x
) == ASHIFT
5901 && GET_CODE (XEXP (x
, 1)) == CONST_INT
5902 && (log
= (unsigned) exact_log2 (INTVAL (XEXP (x
, 1)))) < 4)
5905 x
= gen_rtx_MULT (Pmode
, force_reg (Pmode
, XEXP (x
, 0)),
5906 GEN_INT (1 << log
));
5909 if (GET_CODE (x
) == PLUS
)
5911 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
5913 if (GET_CODE (XEXP (x
, 0)) == ASHIFT
5914 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == CONST_INT
5915 && (log
= (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x
, 0), 1)))) < 4)
5918 XEXP (x
, 0) = gen_rtx_MULT (Pmode
,
5919 force_reg (Pmode
, XEXP (XEXP (x
, 0), 0)),
5920 GEN_INT (1 << log
));
5923 if (GET_CODE (XEXP (x
, 1)) == ASHIFT
5924 && GET_CODE (XEXP (XEXP (x
, 1), 1)) == CONST_INT
5925 && (log
= (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x
, 1), 1)))) < 4)
5928 XEXP (x
, 1) = gen_rtx_MULT (Pmode
,
5929 force_reg (Pmode
, XEXP (XEXP (x
, 1), 0)),
5930 GEN_INT (1 << log
));
5933 /* Put multiply first if it isn't already. */
5934 if (GET_CODE (XEXP (x
, 1)) == MULT
)
5936 rtx tmp
= XEXP (x
, 0);
5937 XEXP (x
, 0) = XEXP (x
, 1);
5942 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
5943 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
5944 created by virtual register instantiation, register elimination, and
5945 similar optimizations. */
5946 if (GET_CODE (XEXP (x
, 0)) == MULT
&& GET_CODE (XEXP (x
, 1)) == PLUS
)
5949 x
= gen_rtx_PLUS (Pmode
,
5950 gen_rtx_PLUS (Pmode
, XEXP (x
, 0),
5951 XEXP (XEXP (x
, 1), 0)),
5952 XEXP (XEXP (x
, 1), 1));
5956 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
5957 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
5958 else if (GET_CODE (x
) == PLUS
&& GET_CODE (XEXP (x
, 0)) == PLUS
5959 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
5960 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == PLUS
5961 && CONSTANT_P (XEXP (x
, 1)))
5964 rtx other
= NULL_RTX
;
5966 if (GET_CODE (XEXP (x
, 1)) == CONST_INT
)
5968 constant
= XEXP (x
, 1);
5969 other
= XEXP (XEXP (XEXP (x
, 0), 1), 1);
5971 else if (GET_CODE (XEXP (XEXP (XEXP (x
, 0), 1), 1)) == CONST_INT
)
5973 constant
= XEXP (XEXP (XEXP (x
, 0), 1), 1);
5974 other
= XEXP (x
, 1);
5982 x
= gen_rtx_PLUS (Pmode
,
5983 gen_rtx_PLUS (Pmode
, XEXP (XEXP (x
, 0), 0),
5984 XEXP (XEXP (XEXP (x
, 0), 1), 0)),
5985 plus_constant (other
, INTVAL (constant
)));
5989 if (changed
&& legitimate_address_p (mode
, x
, FALSE
))
5992 if (GET_CODE (XEXP (x
, 0)) == MULT
)
5995 XEXP (x
, 0) = force_operand (XEXP (x
, 0), 0);
5998 if (GET_CODE (XEXP (x
, 1)) == MULT
)
6001 XEXP (x
, 1) = force_operand (XEXP (x
, 1), 0);
6005 && GET_CODE (XEXP (x
, 1)) == REG
6006 && GET_CODE (XEXP (x
, 0)) == REG
)
6009 if (flag_pic
&& SYMBOLIC_CONST (XEXP (x
, 1)))
6012 x
= legitimize_pic_address (x
, 0);
6015 if (changed
&& legitimate_address_p (mode
, x
, FALSE
))
6018 if (GET_CODE (XEXP (x
, 0)) == REG
)
6020 register rtx temp
= gen_reg_rtx (Pmode
);
6021 register rtx val
= force_operand (XEXP (x
, 1), temp
);
6023 emit_move_insn (temp
, val
);
6029 else if (GET_CODE (XEXP (x
, 1)) == REG
)
6031 register rtx temp
= gen_reg_rtx (Pmode
);
6032 register rtx val
= force_operand (XEXP (x
, 0), temp
);
6034 emit_move_insn (temp
, val
);
6044 /* Print an integer constant expression in assembler syntax. Addition
6045 and subtraction are the only arithmetic that may appear in these
6046 expressions. FILE is the stdio stream to write to, X is the rtx, and
6047 CODE is the operand print code from the output string. */
6050 output_pic_addr_const (file
, x
, code
)
6057 switch (GET_CODE (x
))
6067 assemble_name (file
, XSTR (x
, 0));
6068 if (!TARGET_MACHO
&& code
== 'P' && ! SYMBOL_REF_FLAG (x
))
6069 fputs ("@PLT", file
);
6076 ASM_GENERATE_INTERNAL_LABEL (buf
, "L", CODE_LABEL_NUMBER (x
));
6077 assemble_name (asm_out_file
, buf
);
6081 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
));
6085 /* This used to output parentheses around the expression,
6086 but that does not work on the 386 (either ATT or BSD assembler). */
6087 output_pic_addr_const (file
, XEXP (x
, 0), code
);
6091 if (GET_MODE (x
) == VOIDmode
)
6093 /* We can use %d if the number is <32 bits and positive. */
6094 if (CONST_DOUBLE_HIGH (x
) || CONST_DOUBLE_LOW (x
) < 0)
6095 fprintf (file
, "0x%lx%08lx",
6096 (unsigned long) CONST_DOUBLE_HIGH (x
),
6097 (unsigned long) CONST_DOUBLE_LOW (x
));
6099 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, CONST_DOUBLE_LOW (x
));
6102 /* We can't handle floating point constants;
6103 PRINT_OPERAND must handle them. */
6104 output_operand_lossage ("floating constant misused");
6108 /* Some assemblers need integer constants to appear first. */
6109 if (GET_CODE (XEXP (x
, 0)) == CONST_INT
)
6111 output_pic_addr_const (file
, XEXP (x
, 0), code
);
6113 output_pic_addr_const (file
, XEXP (x
, 1), code
);
6115 else if (GET_CODE (XEXP (x
, 1)) == CONST_INT
)
6117 output_pic_addr_const (file
, XEXP (x
, 1), code
);
6119 output_pic_addr_const (file
, XEXP (x
, 0), code
);
6127 putc (ASSEMBLER_DIALECT
== ASM_INTEL
? '(' : '[', file
);
6128 output_pic_addr_const (file
, XEXP (x
, 0), code
);
6130 output_pic_addr_const (file
, XEXP (x
, 1), code
);
6132 putc (ASSEMBLER_DIALECT
== ASM_INTEL
? ')' : ']', file
);
6136 if (XVECLEN (x
, 0) != 1)
6138 output_pic_addr_const (file
, XVECEXP (x
, 0, 0), code
);
6139 switch (XINT (x
, 1))
6142 fputs ("@GOT", file
);
6145 fputs ("@GOTOFF", file
);
6147 case UNSPEC_GOTPCREL
:
6148 fputs ("@GOTPCREL(%rip)", file
);
6150 case UNSPEC_GOTTPOFF
:
6151 /* FIXME: This might be @TPOFF in Sun ld too. */
6152 fputs ("@GOTTPOFF", file
);
6155 fputs ("@TPOFF", file
);
6159 fputs ("@TPOFF", file
);
6161 fputs ("@NTPOFF", file
);
6164 fputs ("@DTPOFF", file
);
6166 case UNSPEC_GOTNTPOFF
:
6168 fputs ("@GOTTPOFF(%rip)", file
);
6170 fputs ("@GOTNTPOFF", file
);
6172 case UNSPEC_INDNTPOFF
:
6173 fputs ("@INDNTPOFF", file
);
6176 output_operand_lossage ("invalid UNSPEC as operand");
6182 output_operand_lossage ("invalid expression as operand");
6186 /* This is called from dwarfout.c via ASM_OUTPUT_DWARF_ADDR_CONST.
6187 We need to handle our special PIC relocations. */
6190 i386_dwarf_output_addr_const (file
, x
)
6195 fprintf (file
, "%s", TARGET_64BIT
? ASM_QUAD
: ASM_LONG
);
6199 fprintf (file
, "%s", ASM_LONG
);
6202 output_pic_addr_const (file
, x
, '\0');
6204 output_addr_const (file
, x
);
6208 /* This is called from dwarf2out.c via ASM_OUTPUT_DWARF_DTPREL.
6209 We need to emit DTP-relative relocations. */
6212 i386_output_dwarf_dtprel (file
, size
, x
)
6217 fputs (ASM_LONG
, file
);
6218 output_addr_const (file
, x
);
6219 fputs ("@DTPOFF", file
);
6225 fputs (", 0", file
);
6232 /* In the name of slightly smaller debug output, and to cater to
6233 general assembler losage, recognize PIC+GOTOFF and turn it back
6234 into a direct symbol reference. */
6237 i386_simplify_dwarf_addr (orig_x
)
6242 if (GET_CODE (x
) == MEM
)
6247 if (GET_CODE (x
) != CONST
6248 || GET_CODE (XEXP (x
, 0)) != UNSPEC
6249 || XINT (XEXP (x
, 0), 1) != UNSPEC_GOTPCREL
6250 || GET_CODE (orig_x
) != MEM
)
6252 return XVECEXP (XEXP (x
, 0), 0, 0);
6255 if (GET_CODE (x
) != PLUS
6256 || GET_CODE (XEXP (x
, 1)) != CONST
)
6259 if (GET_CODE (XEXP (x
, 0)) == REG
6260 && REGNO (XEXP (x
, 0)) == PIC_OFFSET_TABLE_REGNUM
)
6261 /* %ebx + GOT/GOTOFF */
6263 else if (GET_CODE (XEXP (x
, 0)) == PLUS
)
6265 /* %ebx + %reg * scale + GOT/GOTOFF */
6267 if (GET_CODE (XEXP (y
, 0)) == REG
6268 && REGNO (XEXP (y
, 0)) == PIC_OFFSET_TABLE_REGNUM
)
6270 else if (GET_CODE (XEXP (y
, 1)) == REG
6271 && REGNO (XEXP (y
, 1)) == PIC_OFFSET_TABLE_REGNUM
)
6275 if (GET_CODE (y
) != REG
6276 && GET_CODE (y
) != MULT
6277 && GET_CODE (y
) != ASHIFT
)
6283 x
= XEXP (XEXP (x
, 1), 0);
6284 if (GET_CODE (x
) == UNSPEC
6285 && ((XINT (x
, 1) == UNSPEC_GOT
&& GET_CODE (orig_x
) == MEM
)
6286 || (XINT (x
, 1) == UNSPEC_GOTOFF
&& GET_CODE (orig_x
) != MEM
)))
6289 return gen_rtx_PLUS (Pmode
, y
, XVECEXP (x
, 0, 0));
6290 return XVECEXP (x
, 0, 0);
6293 if (GET_CODE (x
) == PLUS
6294 && GET_CODE (XEXP (x
, 0)) == UNSPEC
6295 && GET_CODE (XEXP (x
, 1)) == CONST_INT
6296 && ((XINT (XEXP (x
, 0), 1) == UNSPEC_GOT
&& GET_CODE (orig_x
) == MEM
)
6297 || (XINT (XEXP (x
, 0), 1) == UNSPEC_GOTOFF
6298 && GET_CODE (orig_x
) != MEM
)))
6300 x
= gen_rtx_PLUS (VOIDmode
, XVECEXP (XEXP (x
, 0), 0, 0), XEXP (x
, 1));
6302 return gen_rtx_PLUS (Pmode
, y
, x
);
6310 put_condition_code (code
, mode
, reverse
, fp
, file
)
6312 enum machine_mode mode
;
6318 if (mode
== CCFPmode
|| mode
== CCFPUmode
)
6320 enum rtx_code second_code
, bypass_code
;
6321 ix86_fp_comparison_codes (code
, &bypass_code
, &code
, &second_code
);
6322 if (bypass_code
!= NIL
|| second_code
!= NIL
)
6324 code
= ix86_fp_compare_code_to_integer (code
);
6328 code
= reverse_condition (code
);
6339 if (mode
!= CCmode
&& mode
!= CCNOmode
&& mode
!= CCGCmode
)
6344 /* ??? Use "nbe" instead of "a" for fcmov losage on some assemblers.
6345 Those same assemblers have the same but opposite losage on cmov. */
6348 suffix
= fp
? "nbe" : "a";
6351 if (mode
== CCNOmode
|| mode
== CCGOCmode
)
6353 else if (mode
== CCmode
|| mode
== CCGCmode
)
6364 if (mode
== CCNOmode
|| mode
== CCGOCmode
)
6366 else if (mode
== CCmode
|| mode
== CCGCmode
)
6375 suffix
= fp
? "nb" : "ae";
6378 if (mode
!= CCmode
&& mode
!= CCGCmode
&& mode
!= CCNOmode
)
6388 suffix
= fp
? "u" : "p";
6391 suffix
= fp
? "nu" : "np";
6396 fputs (suffix
, file
);
6400 print_reg (x
, code
, file
)
6405 if (REGNO (x
) == ARG_POINTER_REGNUM
6406 || REGNO (x
) == FRAME_POINTER_REGNUM
6407 || REGNO (x
) == FLAGS_REG
6408 || REGNO (x
) == FPSR_REG
)
6411 if (ASSEMBLER_DIALECT
== ASM_ATT
|| USER_LABEL_PREFIX
[0] == 0)
6414 if (code
== 'w' || MMX_REG_P (x
))
6416 else if (code
== 'b')
6418 else if (code
== 'k')
6420 else if (code
== 'q')
6422 else if (code
== 'y')
6424 else if (code
== 'h')
6427 code
= GET_MODE_SIZE (GET_MODE (x
));
6429 /* Irritatingly, AMD extended registers use different naming convention
6430 from the normal registers. */
6431 if (REX_INT_REG_P (x
))
6438 error ("extended registers have no high halves");
6441 fprintf (file
, "r%ib", REGNO (x
) - FIRST_REX_INT_REG
+ 8);
6444 fprintf (file
, "r%iw", REGNO (x
) - FIRST_REX_INT_REG
+ 8);
6447 fprintf (file
, "r%id", REGNO (x
) - FIRST_REX_INT_REG
+ 8);
6450 fprintf (file
, "r%i", REGNO (x
) - FIRST_REX_INT_REG
+ 8);
6453 error ("unsupported operand size for extended register");
6461 if (STACK_TOP_P (x
))
6463 fputs ("st(0)", file
);
6470 if (! ANY_FP_REG_P (x
))
6471 putc (code
== 8 && TARGET_64BIT
? 'r' : 'e', file
);
6475 fputs (hi_reg_name
[REGNO (x
)], file
);
6478 fputs (qi_reg_name
[REGNO (x
)], file
);
6481 fputs (qi_high_reg_name
[REGNO (x
)], file
);
6488 /* Locate some local-dynamic symbol still in use by this function
6489 so that we can print its name in some tls_local_dynamic_base
6493 get_some_local_dynamic_name ()
6497 if (cfun
->machine
->some_ld_name
)
6498 return cfun
->machine
->some_ld_name
;
6500 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
6502 && for_each_rtx (&PATTERN (insn
), get_some_local_dynamic_name_1
, 0))
6503 return cfun
->machine
->some_ld_name
;
6509 get_some_local_dynamic_name_1 (px
, data
)
6511 void *data ATTRIBUTE_UNUSED
;
6515 if (GET_CODE (x
) == SYMBOL_REF
6516 && local_dynamic_symbolic_operand (x
, Pmode
))
6518 cfun
->machine
->some_ld_name
= XSTR (x
, 0);
6526 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
6527 C -- print opcode suffix for set/cmov insn.
6528 c -- like C, but print reversed condition
6529 F,f -- likewise, but for floating-point.
6530 O -- if CMOV_SUN_AS_SYNTAX, expand to "w.", "l." or "q.", otherwise
6532 R -- print the prefix for register names.
6533 z -- print the opcode suffix for the size of the current operand.
6534 * -- print a star (in certain assembler syntax)
6535 A -- print an absolute memory reference.
6536 w -- print the operand as if it's a "word" (HImode) even if it isn't.
6537 s -- print a shift double count, followed by the assemblers argument
6539 b -- print the QImode name of the register for the indicated operand.
6540 %b0 would print %al if operands[0] is reg 0.
6541 w -- likewise, print the HImode name of the register.
6542 k -- likewise, print the SImode name of the register.
6543 q -- likewise, print the DImode name of the register.
6544 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
6545 y -- print "st(0)" instead of "st" as a register.
6546 D -- print condition for SSE cmp instruction.
6547 P -- if PIC, print an @PLT suffix.
6548 X -- don't print any sort of PIC '@' suffix for a symbol.
6549 & -- print some in-use local-dynamic symbol name.
6553 print_operand (file
, x
, code
)
6563 if (ASSEMBLER_DIALECT
== ASM_ATT
)
6568 assemble_name (file
, get_some_local_dynamic_name ());
6572 if (ASSEMBLER_DIALECT
== ASM_ATT
)
6574 else if (ASSEMBLER_DIALECT
== ASM_INTEL
)
6576 /* Intel syntax. For absolute addresses, registers should not
6577 be surrounded by braces. */
6578 if (GET_CODE (x
) != REG
)
6581 PRINT_OPERAND (file
, x
, 0);
6589 PRINT_OPERAND (file
, x
, 0);
6594 if (ASSEMBLER_DIALECT
== ASM_ATT
)
6599 if (ASSEMBLER_DIALECT
== ASM_ATT
)
6604 if (ASSEMBLER_DIALECT
== ASM_ATT
)
6609 if (ASSEMBLER_DIALECT
== ASM_ATT
)
6614 if (ASSEMBLER_DIALECT
== ASM_ATT
)
6619 if (ASSEMBLER_DIALECT
== ASM_ATT
)
6624 /* 387 opcodes don't get size suffixes if the operands are
6626 if (STACK_REG_P (x
))
6629 /* Likewise if using Intel opcodes. */
6630 if (ASSEMBLER_DIALECT
== ASM_INTEL
)
6633 /* This is the size of op from size of operand. */
6634 switch (GET_MODE_SIZE (GET_MODE (x
)))
6637 #ifdef HAVE_GAS_FILDS_FISTS
6643 if (GET_MODE (x
) == SFmode
)
6658 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_INT
)
6660 #ifdef GAS_MNEMONICS
6686 if (GET_CODE (x
) == CONST_INT
|| ! SHIFT_DOUBLE_OMITS_COUNT
)
6688 PRINT_OPERAND (file
, x
, 0);
6694 /* Little bit of braindamage here. The SSE compare instructions
6695 does use completely different names for the comparisons that the
6696 fp conditional moves. */
6697 switch (GET_CODE (x
))
6712 fputs ("unord", file
);
6716 fputs ("neq", file
);
6720 fputs ("nlt", file
);
6724 fputs ("nle", file
);
6727 fputs ("ord", file
);
6735 #ifdef CMOV_SUN_AS_SYNTAX
6736 if (ASSEMBLER_DIALECT
== ASM_ATT
)
6738 switch (GET_MODE (x
))
6740 case HImode
: putc ('w', file
); break;
6742 case SFmode
: putc ('l', file
); break;
6744 case DFmode
: putc ('q', file
); break;
6752 put_condition_code (GET_CODE (x
), GET_MODE (XEXP (x
, 0)), 0, 0, file
);
6755 #ifdef CMOV_SUN_AS_SYNTAX
6756 if (ASSEMBLER_DIALECT
== ASM_ATT
)
6759 put_condition_code (GET_CODE (x
), GET_MODE (XEXP (x
, 0)), 0, 1, file
);
6762 /* Like above, but reverse condition */
6764 /* Check to see if argument to %c is really a constant
6765 and not a condition code which needs to be reversed. */
6766 if (GET_RTX_CLASS (GET_CODE (x
)) != '<')
6768 output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'");
6771 put_condition_code (GET_CODE (x
), GET_MODE (XEXP (x
, 0)), 1, 0, file
);
6774 #ifdef CMOV_SUN_AS_SYNTAX
6775 if (ASSEMBLER_DIALECT
== ASM_ATT
)
6778 put_condition_code (GET_CODE (x
), GET_MODE (XEXP (x
, 0)), 1, 1, file
);
6784 if (!optimize
|| optimize_size
|| !TARGET_BRANCH_PREDICTION_HINTS
)
6787 x
= find_reg_note (current_output_insn
, REG_BR_PROB
, 0);
6790 int pred_val
= INTVAL (XEXP (x
, 0));
6792 if (pred_val
< REG_BR_PROB_BASE
* 45 / 100
6793 || pred_val
> REG_BR_PROB_BASE
* 55 / 100)
6795 int taken
= pred_val
> REG_BR_PROB_BASE
/ 2;
6796 int cputaken
= final_forward_branch_p (current_output_insn
) == 0;
6798 /* Emit hints only in the case default branch prediction
6799 heruistics would fail. */
6800 if (taken
!= cputaken
)
6802 /* We use 3e (DS) prefix for taken branches and
6803 2e (CS) prefix for not taken branches. */
6805 fputs ("ds ; ", file
);
6807 fputs ("cs ; ", file
);
6814 output_operand_lossage ("invalid operand code `%c'", code
);
6818 if (GET_CODE (x
) == REG
)
6820 PRINT_REG (x
, code
, file
);
6823 else if (GET_CODE (x
) == MEM
)
6825 /* No `byte ptr' prefix for call instructions. */
6826 if (ASSEMBLER_DIALECT
== ASM_INTEL
&& code
!= 'X' && code
!= 'P')
6829 switch (GET_MODE_SIZE (GET_MODE (x
)))
6831 case 1: size
= "BYTE"; break;
6832 case 2: size
= "WORD"; break;
6833 case 4: size
= "DWORD"; break;
6834 case 8: size
= "QWORD"; break;
6835 case 12: size
= "XWORD"; break;
6836 case 16: size
= "XMMWORD"; break;
6841 /* Check for explicit size override (codes 'b', 'w' and 'k') */
6844 else if (code
== 'w')
6846 else if (code
== 'k')
6850 fputs (" PTR ", file
);
6854 if (flag_pic
&& CONSTANT_ADDRESS_P (x
))
6855 output_pic_addr_const (file
, x
, code
);
6856 /* Avoid (%rip) for call operands. */
6857 else if (CONSTANT_ADDRESS_P (x
) && code
== 'P'
6858 && GET_CODE (x
) != CONST_INT
)
6859 output_addr_const (file
, x
);
6860 else if (this_is_asm_operands
&& ! address_operand (x
, VOIDmode
))
6861 output_operand_lossage ("invalid constraints for operand");
6866 else if (GET_CODE (x
) == CONST_DOUBLE
&& GET_MODE (x
) == SFmode
)
6871 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
6872 REAL_VALUE_TO_TARGET_SINGLE (r
, l
);
6874 if (ASSEMBLER_DIALECT
== ASM_ATT
)
6876 fprintf (file
, "0x%lx", l
);
6879 /* These float cases don't actually occur as immediate operands. */
6880 else if (GET_CODE (x
) == CONST_DOUBLE
&& GET_MODE (x
) == DFmode
)
6884 real_to_decimal (dstr
, CONST_DOUBLE_REAL_VALUE (x
), sizeof (dstr
), 0, 1);
6885 fprintf (file
, "%s", dstr
);
6888 else if (GET_CODE (x
) == CONST_DOUBLE
6889 && (GET_MODE (x
) == XFmode
|| GET_MODE (x
) == TFmode
))
6893 real_to_decimal (dstr
, CONST_DOUBLE_REAL_VALUE (x
), sizeof (dstr
), 0, 1);
6894 fprintf (file
, "%s", dstr
);
6901 if (GET_CODE (x
) == CONST_INT
|| GET_CODE (x
) == CONST_DOUBLE
)
6903 if (ASSEMBLER_DIALECT
== ASM_ATT
)
6906 else if (GET_CODE (x
) == CONST
|| GET_CODE (x
) == SYMBOL_REF
6907 || GET_CODE (x
) == LABEL_REF
)
6909 if (ASSEMBLER_DIALECT
== ASM_ATT
)
6912 fputs ("OFFSET FLAT:", file
);
6915 if (GET_CODE (x
) == CONST_INT
)
6916 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
));
6918 output_pic_addr_const (file
, x
, code
);
6920 output_addr_const (file
, x
);
6924 /* Print a memory operand whose address is ADDR. */
6927 print_operand_address (file
, addr
)
6931 struct ix86_address parts
;
6932 rtx base
, index
, disp
;
6935 if (GET_CODE (addr
) == UNSPEC
&& XINT (addr
, 1) == UNSPEC_TP
)
6937 if (ASSEMBLER_DIALECT
== ASM_INTEL
)
6938 fputs ("DWORD PTR ", file
);
6939 if (ASSEMBLER_DIALECT
== ASM_ATT
|| USER_LABEL_PREFIX
[0] == 0)
6942 fputs ("fs:0", file
);
6944 fputs ("gs:0", file
);
6948 if (! ix86_decompose_address (addr
, &parts
))
6952 index
= parts
.index
;
6954 scale
= parts
.scale
;
6956 if (!base
&& !index
)
6958 /* Displacement only requires special attention. */
6960 if (GET_CODE (disp
) == CONST_INT
)
6962 if (ASSEMBLER_DIALECT
== ASM_INTEL
)
6964 if (USER_LABEL_PREFIX
[0] == 0)
6966 fputs ("ds:", file
);
6968 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (addr
));
6971 output_pic_addr_const (file
, addr
, 0);
6973 output_addr_const (file
, addr
);
6975 /* Use one byte shorter RIP relative addressing for 64bit mode. */
6977 && ((GET_CODE (addr
) == SYMBOL_REF
6978 && ! tls_symbolic_operand (addr
, GET_MODE (addr
)))
6979 || GET_CODE (addr
) == LABEL_REF
6980 || (GET_CODE (addr
) == CONST
6981 && GET_CODE (XEXP (addr
, 0)) == PLUS
6982 && (GET_CODE (XEXP (XEXP (addr
, 0), 0)) == SYMBOL_REF
6983 || GET_CODE (XEXP (XEXP (addr
, 0), 0)) == LABEL_REF
)
6984 && GET_CODE (XEXP (XEXP (addr
, 0), 1)) == CONST_INT
)))
6985 fputs ("(%rip)", file
);
6989 if (ASSEMBLER_DIALECT
== ASM_ATT
)
6994 output_pic_addr_const (file
, disp
, 0);
6995 else if (GET_CODE (disp
) == LABEL_REF
)
6996 output_asm_label (disp
);
6998 output_addr_const (file
, disp
);
7003 PRINT_REG (base
, 0, file
);
7007 PRINT_REG (index
, 0, file
);
7009 fprintf (file
, ",%d", scale
);
7015 rtx offset
= NULL_RTX
;
7019 /* Pull out the offset of a symbol; print any symbol itself. */
7020 if (GET_CODE (disp
) == CONST
7021 && GET_CODE (XEXP (disp
, 0)) == PLUS
7022 && GET_CODE (XEXP (XEXP (disp
, 0), 1)) == CONST_INT
)
7024 offset
= XEXP (XEXP (disp
, 0), 1);
7025 disp
= gen_rtx_CONST (VOIDmode
,
7026 XEXP (XEXP (disp
, 0), 0));
7030 output_pic_addr_const (file
, disp
, 0);
7031 else if (GET_CODE (disp
) == LABEL_REF
)
7032 output_asm_label (disp
);
7033 else if (GET_CODE (disp
) == CONST_INT
)
7036 output_addr_const (file
, disp
);
7042 PRINT_REG (base
, 0, file
);
7045 if (INTVAL (offset
) >= 0)
7047 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (offset
));
7051 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (offset
));
7058 PRINT_REG (index
, 0, file
);
7060 fprintf (file
, "*%d", scale
);
7068 output_addr_const_extra (file
, x
)
7074 if (GET_CODE (x
) != UNSPEC
)
7077 op
= XVECEXP (x
, 0, 0);
7078 switch (XINT (x
, 1))
7080 case UNSPEC_GOTTPOFF
:
7081 output_addr_const (file
, op
);
7082 /* FIXME: This might be @TPOFF in Sun ld. */
7083 fputs ("@GOTTPOFF", file
);
7086 output_addr_const (file
, op
);
7087 fputs ("@TPOFF", file
);
7090 output_addr_const (file
, op
);
7092 fputs ("@TPOFF", file
);
7094 fputs ("@NTPOFF", file
);
7097 output_addr_const (file
, op
);
7098 fputs ("@DTPOFF", file
);
7100 case UNSPEC_GOTNTPOFF
:
7101 output_addr_const (file
, op
);
7103 fputs ("@GOTTPOFF(%rip)", file
);
7105 fputs ("@GOTNTPOFF", file
);
7107 case UNSPEC_INDNTPOFF
:
7108 output_addr_const (file
, op
);
7109 fputs ("@INDNTPOFF", file
);
7119 /* Split one or more DImode RTL references into pairs of SImode
7120 references. The RTL can be REG, offsettable MEM, integer constant, or
7121 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
7122 split and "num" is its length. lo_half and hi_half are output arrays
7123 that parallel "operands". */
7126 split_di (operands
, num
, lo_half
, hi_half
)
7129 rtx lo_half
[], hi_half
[];
7133 rtx op
= operands
[num
];
7135 /* simplify_subreg refuse to split volatile memory addresses,
7136 but we still have to handle it. */
7137 if (GET_CODE (op
) == MEM
)
7139 lo_half
[num
] = adjust_address (op
, SImode
, 0);
7140 hi_half
[num
] = adjust_address (op
, SImode
, 4);
7144 lo_half
[num
] = simplify_gen_subreg (SImode
, op
,
7145 GET_MODE (op
) == VOIDmode
7146 ? DImode
: GET_MODE (op
), 0);
7147 hi_half
[num
] = simplify_gen_subreg (SImode
, op
,
7148 GET_MODE (op
) == VOIDmode
7149 ? DImode
: GET_MODE (op
), 4);
7153 /* Split one or more TImode RTL references into pairs of SImode
7154 references. The RTL can be REG, offsettable MEM, integer constant, or
7155 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
7156 split and "num" is its length. lo_half and hi_half are output arrays
7157 that parallel "operands". */
7160 split_ti (operands
, num
, lo_half
, hi_half
)
7163 rtx lo_half
[], hi_half
[];
7167 rtx op
= operands
[num
];
7169 /* simplify_subreg refuse to split volatile memory addresses, but we
7170 still have to handle it. */
7171 if (GET_CODE (op
) == MEM
)
7173 lo_half
[num
] = adjust_address (op
, DImode
, 0);
7174 hi_half
[num
] = adjust_address (op
, DImode
, 8);
7178 lo_half
[num
] = simplify_gen_subreg (DImode
, op
, TImode
, 0);
7179 hi_half
[num
] = simplify_gen_subreg (DImode
, op
, TImode
, 8);
7184 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
7185 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
7186 is the expression of the binary operation. The output may either be
7187 emitted here, or returned to the caller, like all output_* functions.
7189 There is no guarantee that the operands are the same mode, as they
7190 might be within FLOAT or FLOAT_EXTEND expressions. */
7192 #ifndef SYSV386_COMPAT
7193 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
7194 wants to fix the assemblers because that causes incompatibility
7195 with gcc. No-one wants to fix gcc because that causes
7196 incompatibility with assemblers... You can use the option of
7197 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
7198 #define SYSV386_COMPAT 1
7202 output_387_binary_op (insn
, operands
)
7206 static char buf
[30];
7209 int is_sse
= SSE_REG_P (operands
[0]) | SSE_REG_P (operands
[1]) | SSE_REG_P (operands
[2]);
7211 #ifdef ENABLE_CHECKING
7212 /* Even if we do not want to check the inputs, this documents input
7213 constraints. Which helps in understanding the following code. */
7214 if (STACK_REG_P (operands
[0])
7215 && ((REG_P (operands
[1])
7216 && REGNO (operands
[0]) == REGNO (operands
[1])
7217 && (STACK_REG_P (operands
[2]) || GET_CODE (operands
[2]) == MEM
))
7218 || (REG_P (operands
[2])
7219 && REGNO (operands
[0]) == REGNO (operands
[2])
7220 && (STACK_REG_P (operands
[1]) || GET_CODE (operands
[1]) == MEM
)))
7221 && (STACK_TOP_P (operands
[1]) || STACK_TOP_P (operands
[2])))
7227 switch (GET_CODE (operands
[3]))
7230 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
7231 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
7239 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
7240 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
7248 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
7249 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
7257 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
7258 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
7272 if (GET_MODE (operands
[0]) == SFmode
)
7273 strcat (buf
, "ss\t{%2, %0|%0, %2}");
7275 strcat (buf
, "sd\t{%2, %0|%0, %2}");
7280 switch (GET_CODE (operands
[3]))
7284 if (REG_P (operands
[2]) && REGNO (operands
[0]) == REGNO (operands
[2]))
7286 rtx temp
= operands
[2];
7287 operands
[2] = operands
[1];
7291 /* know operands[0] == operands[1]. */
7293 if (GET_CODE (operands
[2]) == MEM
)
7299 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[2])))
7301 if (STACK_TOP_P (operands
[0]))
7302 /* How is it that we are storing to a dead operand[2]?
7303 Well, presumably operands[1] is dead too. We can't
7304 store the result to st(0) as st(0) gets popped on this
7305 instruction. Instead store to operands[2] (which I
7306 think has to be st(1)). st(1) will be popped later.
7307 gcc <= 2.8.1 didn't have this check and generated
7308 assembly code that the Unixware assembler rejected. */
7309 p
= "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
7311 p
= "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
7315 if (STACK_TOP_P (operands
[0]))
7316 p
= "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
7318 p
= "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
7323 if (GET_CODE (operands
[1]) == MEM
)
7329 if (GET_CODE (operands
[2]) == MEM
)
7335 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[2])))
7338 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
7339 derived assemblers, confusingly reverse the direction of
7340 the operation for fsub{r} and fdiv{r} when the
7341 destination register is not st(0). The Intel assembler
7342 doesn't have this brain damage. Read !SYSV386_COMPAT to
7343 figure out what the hardware really does. */
7344 if (STACK_TOP_P (operands
[0]))
7345 p
= "{p\t%0, %2|rp\t%2, %0}";
7347 p
= "{rp\t%2, %0|p\t%0, %2}";
7349 if (STACK_TOP_P (operands
[0]))
7350 /* As above for fmul/fadd, we can't store to st(0). */
7351 p
= "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
7353 p
= "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
7358 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[1])))
7361 if (STACK_TOP_P (operands
[0]))
7362 p
= "{rp\t%0, %1|p\t%1, %0}";
7364 p
= "{p\t%1, %0|rp\t%0, %1}";
7366 if (STACK_TOP_P (operands
[0]))
7367 p
= "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
7369 p
= "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
7374 if (STACK_TOP_P (operands
[0]))
7376 if (STACK_TOP_P (operands
[1]))
7377 p
= "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
7379 p
= "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
7382 else if (STACK_TOP_P (operands
[1]))
7385 p
= "{\t%1, %0|r\t%0, %1}";
7387 p
= "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
7393 p
= "{r\t%2, %0|\t%0, %2}";
7395 p
= "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
7408 /* Output code to initialize control word copies used by
7409 trunc?f?i patterns. NORMAL is set to current control word, while ROUND_DOWN
7410 is set to control word rounding downwards. */
7412 emit_i387_cw_initialization (normal
, round_down
)
7413 rtx normal
, round_down
;
7415 rtx reg
= gen_reg_rtx (HImode
);
7417 emit_insn (gen_x86_fnstcw_1 (normal
));
7418 emit_move_insn (reg
, normal
);
7419 if (!TARGET_PARTIAL_REG_STALL
&& !optimize_size
7421 emit_insn (gen_movsi_insv_1 (reg
, GEN_INT (0xc)));
7423 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0xc00)));
7424 emit_move_insn (round_down
, reg
);
7427 /* Output code for INSN to convert a float to a signed int. OPERANDS
7428 are the insn operands. The output may be [HSD]Imode and the input
7429 operand may be [SDX]Fmode. */
7432 output_fix_trunc (insn
, operands
)
7436 int stack_top_dies
= find_regno_note (insn
, REG_DEAD
, FIRST_STACK_REG
) != 0;
7437 int dimode_p
= GET_MODE (operands
[0]) == DImode
;
7439 /* Jump through a hoop or two for DImode, since the hardware has no
7440 non-popping instruction. We used to do this a different way, but
7441 that was somewhat fragile and broke with post-reload splitters. */
7442 if (dimode_p
&& !stack_top_dies
)
7443 output_asm_insn ("fld\t%y1", operands
);
7445 if (!STACK_TOP_P (operands
[1]))
7448 if (GET_CODE (operands
[0]) != MEM
)
7451 output_asm_insn ("fldcw\t%3", operands
);
7452 if (stack_top_dies
|| dimode_p
)
7453 output_asm_insn ("fistp%z0\t%0", operands
);
7455 output_asm_insn ("fist%z0\t%0", operands
);
7456 output_asm_insn ("fldcw\t%2", operands
);
7461 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
7462 should be used and 2 when fnstsw should be used. UNORDERED_P is true
7463 when fucom should be used. */
7466 output_fp_compare (insn
, operands
, eflags_p
, unordered_p
)
7469 int eflags_p
, unordered_p
;
7472 rtx cmp_op0
= operands
[0];
7473 rtx cmp_op1
= operands
[1];
7474 int is_sse
= SSE_REG_P (operands
[0]) | SSE_REG_P (operands
[1]);
7479 cmp_op1
= operands
[2];
7483 if (GET_MODE (operands
[0]) == SFmode
)
7485 return "ucomiss\t{%1, %0|%0, %1}";
7487 return "comiss\t{%1, %0|%0, %y}";
7490 return "ucomisd\t{%1, %0|%0, %1}";
7492 return "comisd\t{%1, %0|%0, %y}";
7495 if (! STACK_TOP_P (cmp_op0
))
7498 stack_top_dies
= find_regno_note (insn
, REG_DEAD
, FIRST_STACK_REG
) != 0;
7500 if (STACK_REG_P (cmp_op1
)
7502 && find_regno_note (insn
, REG_DEAD
, REGNO (cmp_op1
))
7503 && REGNO (cmp_op1
) != FIRST_STACK_REG
)
7505 /* If both the top of the 387 stack dies, and the other operand
7506 is also a stack register that dies, then this must be a
7507 `fcompp' float compare */
7511 /* There is no double popping fcomi variant. Fortunately,
7512 eflags is immune from the fstp's cc clobbering. */
7514 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands
);
7516 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands
);
7524 return "fucompp\n\tfnstsw\t%0";
7526 return "fcompp\n\tfnstsw\t%0";
7539 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
7541 static const char * const alt
[24] =
7553 "fcomi\t{%y1, %0|%0, %y1}",
7554 "fcomip\t{%y1, %0|%0, %y1}",
7555 "fucomi\t{%y1, %0|%0, %y1}",
7556 "fucomip\t{%y1, %0|%0, %y1}",
7563 "fcom%z2\t%y2\n\tfnstsw\t%0",
7564 "fcomp%z2\t%y2\n\tfnstsw\t%0",
7565 "fucom%z2\t%y2\n\tfnstsw\t%0",
7566 "fucomp%z2\t%y2\n\tfnstsw\t%0",
7568 "ficom%z2\t%y2\n\tfnstsw\t%0",
7569 "ficomp%z2\t%y2\n\tfnstsw\t%0",
7577 mask
= eflags_p
<< 3;
7578 mask
|= (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
) << 2;
7579 mask
|= unordered_p
<< 1;
7580 mask
|= stack_top_dies
;
7593 ix86_output_addr_vec_elt (file
, value
)
7597 const char *directive
= ASM_LONG
;
7602 directive
= ASM_QUAD
;
7608 fprintf (file
, "%s%s%d\n", directive
, LPREFIX
, value
);
7612 ix86_output_addr_diff_elt (file
, value
, rel
)
7617 fprintf (file
, "%s%s%d-%s%d\n",
7618 ASM_LONG
, LPREFIX
, value
, LPREFIX
, rel
);
7619 else if (HAVE_AS_GOTOFF_IN_DATA
)
7620 fprintf (file
, "%s%s%d@GOTOFF\n", ASM_LONG
, LPREFIX
, value
);
7622 else if (TARGET_MACHO
)
7623 fprintf (file
, "%s%s%d-%s\n", ASM_LONG
, LPREFIX
, value
,
7624 machopic_function_base_name () + 1);
7627 asm_fprintf (file
, "%s%U%s+[.-%s%d]\n",
7628 ASM_LONG
, GOT_SYMBOL_NAME
, LPREFIX
, value
);
7631 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
7635 ix86_expand_clear (dest
)
7640 /* We play register width games, which are only valid after reload. */
7641 if (!reload_completed
)
7644 /* Avoid HImode and its attendant prefix byte. */
7645 if (GET_MODE_SIZE (GET_MODE (dest
)) < 4)
7646 dest
= gen_rtx_REG (SImode
, REGNO (dest
));
7648 tmp
= gen_rtx_SET (VOIDmode
, dest
, const0_rtx
);
7650 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
7651 if (reload_completed
&& (!TARGET_USE_MOV0
|| optimize_size
))
7653 rtx clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, 17));
7654 tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, tmp
, clob
));
7660 /* X is an unchanging MEM. If it is a constant pool reference, return
7661 the constant pool rtx, else NULL. */
7664 maybe_get_pool_constant (x
)
7669 if (flag_pic
&& ! TARGET_64BIT
)
7671 if (GET_CODE (x
) != PLUS
)
7673 if (XEXP (x
, 0) != pic_offset_table_rtx
)
7676 if (GET_CODE (x
) != CONST
)
7679 if (GET_CODE (x
) != UNSPEC
)
7681 if (XINT (x
, 1) != UNSPEC_GOTOFF
)
7683 x
= XVECEXP (x
, 0, 0);
7686 if (GET_CODE (x
) == SYMBOL_REF
&& CONSTANT_POOL_ADDRESS_P (x
))
7687 return get_pool_constant (x
);
7693 ix86_expand_move (mode
, operands
)
7694 enum machine_mode mode
;
7697 int strict
= (reload_in_progress
|| reload_completed
);
7698 rtx insn
, op0
, op1
, tmp
;
7703 if (tls_symbolic_operand (op1
, Pmode
))
7705 op1
= legitimize_address (op1
, op1
, VOIDmode
);
7706 if (GET_CODE (op0
) == MEM
)
7708 tmp
= gen_reg_rtx (mode
);
7709 emit_insn (gen_rtx_SET (VOIDmode
, tmp
, op1
));
7713 else if (flag_pic
&& mode
== Pmode
&& symbolic_operand (op1
, Pmode
))
7718 rtx temp
= ((reload_in_progress
7719 || ((op0
&& GET_CODE (op0
) == REG
)
7721 ? op0
: gen_reg_rtx (Pmode
));
7722 op1
= machopic_indirect_data_reference (op1
, temp
);
7723 op1
= machopic_legitimize_pic_address (op1
, mode
,
7724 temp
== op1
? 0 : temp
);
7728 if (MACHOPIC_INDIRECT
)
7729 op1
= machopic_indirect_data_reference (op1
, 0);
7733 insn
= gen_rtx_SET (VOIDmode
, op0
, op1
);
7737 #endif /* TARGET_MACHO */
7738 if (GET_CODE (op0
) == MEM
)
7739 op1
= force_reg (Pmode
, op1
);
7743 if (GET_CODE (temp
) != REG
)
7744 temp
= gen_reg_rtx (Pmode
);
7745 temp
= legitimize_pic_address (op1
, temp
);
7753 if (GET_CODE (op0
) == MEM
7754 && (PUSH_ROUNDING (GET_MODE_SIZE (mode
)) != GET_MODE_SIZE (mode
)
7755 || !push_operand (op0
, mode
))
7756 && GET_CODE (op1
) == MEM
)
7757 op1
= force_reg (mode
, op1
);
7759 if (push_operand (op0
, mode
)
7760 && ! general_no_elim_operand (op1
, mode
))
7761 op1
= copy_to_mode_reg (mode
, op1
);
7763 /* Force large constants in 64bit compilation into register
7764 to get them CSEed. */
7765 if (TARGET_64BIT
&& mode
== DImode
7766 && immediate_operand (op1
, mode
)
7767 && !x86_64_zero_extended_value (op1
)
7768 && !register_operand (op0
, mode
)
7769 && optimize
&& !reload_completed
&& !reload_in_progress
)
7770 op1
= copy_to_mode_reg (mode
, op1
);
7772 if (FLOAT_MODE_P (mode
))
7774 /* If we are loading a floating point constant to a register,
7775 force the value to memory now, since we'll get better code
7776 out the back end. */
7780 else if (GET_CODE (op1
) == CONST_DOUBLE
7781 && register_operand (op0
, mode
))
7782 op1
= validize_mem (force_const_mem (mode
, op1
));
7786 insn
= gen_rtx_SET (VOIDmode
, op0
, op1
);
7792 ix86_expand_vector_move (mode
, operands
)
7793 enum machine_mode mode
;
7796 /* Force constants other than zero into memory. We do not know how
7797 the instructions used to build constants modify the upper 64 bits
7798 of the register, once we have that information we may be able
7799 to handle some of them more efficiently. */
7800 if ((reload_in_progress
| reload_completed
) == 0
7801 && register_operand (operands
[0], mode
)
7802 && CONSTANT_P (operands
[1]))
7803 operands
[1] = force_const_mem (mode
, operands
[1]);
7805 /* Make operand1 a register if it isn't already. */
7807 && !register_operand (operands
[0], mode
)
7808 && !register_operand (operands
[1], mode
))
7810 rtx temp
= force_reg (GET_MODE (operands
[1]), operands
[1]);
7811 emit_move_insn (operands
[0], temp
);
7815 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0], operands
[1]));
7818 /* Attempt to expand a binary operator. Make the expansion closer to the
7819 actual machine, then just general_operand, which will allow 3 separate
7820 memory references (one output, two input) in a single insn. */
7823 ix86_expand_binary_operator (code
, mode
, operands
)
7825 enum machine_mode mode
;
7828 int matching_memory
;
7829 rtx src1
, src2
, dst
, op
, clob
;
7835 /* Recognize <var1> = <value> <op> <var1> for commutative operators */
7836 if (GET_RTX_CLASS (code
) == 'c'
7837 && (rtx_equal_p (dst
, src2
)
7838 || immediate_operand (src1
, mode
)))
7845 /* If the destination is memory, and we do not have matching source
7846 operands, do things in registers. */
7847 matching_memory
= 0;
7848 if (GET_CODE (dst
) == MEM
)
7850 if (rtx_equal_p (dst
, src1
))
7851 matching_memory
= 1;
7852 else if (GET_RTX_CLASS (code
) == 'c'
7853 && rtx_equal_p (dst
, src2
))
7854 matching_memory
= 2;
7856 dst
= gen_reg_rtx (mode
);
7859 /* Both source operands cannot be in memory. */
7860 if (GET_CODE (src1
) == MEM
&& GET_CODE (src2
) == MEM
)
7862 if (matching_memory
!= 2)
7863 src2
= force_reg (mode
, src2
);
7865 src1
= force_reg (mode
, src1
);
7868 /* If the operation is not commutable, source 1 cannot be a constant
7869 or non-matching memory. */
7870 if ((CONSTANT_P (src1
)
7871 || (!matching_memory
&& GET_CODE (src1
) == MEM
))
7872 && GET_RTX_CLASS (code
) != 'c')
7873 src1
= force_reg (mode
, src1
);
7875 /* If optimizing, copy to regs to improve CSE */
7876 if (optimize
&& ! no_new_pseudos
)
7878 if (GET_CODE (dst
) == MEM
)
7879 dst
= gen_reg_rtx (mode
);
7880 if (GET_CODE (src1
) == MEM
)
7881 src1
= force_reg (mode
, src1
);
7882 if (GET_CODE (src2
) == MEM
)
7883 src2
= force_reg (mode
, src2
);
7886 /* Emit the instruction. */
7888 op
= gen_rtx_SET (VOIDmode
, dst
, gen_rtx_fmt_ee (code
, mode
, src1
, src2
));
7889 if (reload_in_progress
)
7891 /* Reload doesn't know about the flags register, and doesn't know that
7892 it doesn't want to clobber it. We can only do this with PLUS. */
7899 clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
7900 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, op
, clob
)));
7903 /* Fix up the destination if needed. */
7904 if (dst
!= operands
[0])
7905 emit_move_insn (operands
[0], dst
);
7908 /* Return TRUE or FALSE depending on whether the binary operator meets the
7909 appropriate constraints. */
7912 ix86_binary_operator_ok (code
, mode
, operands
)
7914 enum machine_mode mode ATTRIBUTE_UNUSED
;
7917 /* Both source operands cannot be in memory. */
7918 if (GET_CODE (operands
[1]) == MEM
&& GET_CODE (operands
[2]) == MEM
)
7920 /* If the operation is not commutable, source 1 cannot be a constant. */
7921 if (CONSTANT_P (operands
[1]) && GET_RTX_CLASS (code
) != 'c')
7923 /* If the destination is memory, we must have a matching source operand. */
7924 if (GET_CODE (operands
[0]) == MEM
7925 && ! (rtx_equal_p (operands
[0], operands
[1])
7926 || (GET_RTX_CLASS (code
) == 'c'
7927 && rtx_equal_p (operands
[0], operands
[2]))))
7929 /* If the operation is not commutable and the source 1 is memory, we must
7930 have a matching destination. */
7931 if (GET_CODE (operands
[1]) == MEM
7932 && GET_RTX_CLASS (code
) != 'c'
7933 && ! rtx_equal_p (operands
[0], operands
[1]))
7938 /* Attempt to expand a unary operator. Make the expansion closer to the
7939 actual machine, then just general_operand, which will allow 2 separate
7940 memory references (one output, one input) in a single insn. */
7943 ix86_expand_unary_operator (code
, mode
, operands
)
7945 enum machine_mode mode
;
7948 int matching_memory
;
7949 rtx src
, dst
, op
, clob
;
7954 /* If the destination is memory, and we do not have matching source
7955 operands, do things in registers. */
7956 matching_memory
= 0;
7957 if (GET_CODE (dst
) == MEM
)
7959 if (rtx_equal_p (dst
, src
))
7960 matching_memory
= 1;
7962 dst
= gen_reg_rtx (mode
);
7965 /* When source operand is memory, destination must match. */
7966 if (!matching_memory
&& GET_CODE (src
) == MEM
)
7967 src
= force_reg (mode
, src
);
7969 /* If optimizing, copy to regs to improve CSE */
7970 if (optimize
&& ! no_new_pseudos
)
7972 if (GET_CODE (dst
) == MEM
)
7973 dst
= gen_reg_rtx (mode
);
7974 if (GET_CODE (src
) == MEM
)
7975 src
= force_reg (mode
, src
);
7978 /* Emit the instruction. */
7980 op
= gen_rtx_SET (VOIDmode
, dst
, gen_rtx_fmt_e (code
, mode
, src
));
7981 if (reload_in_progress
|| code
== NOT
)
7983 /* Reload doesn't know about the flags register, and doesn't know that
7984 it doesn't want to clobber it. */
7991 clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
7992 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, op
, clob
)));
7995 /* Fix up the destination if needed. */
7996 if (dst
!= operands
[0])
7997 emit_move_insn (operands
[0], dst
);
8000 /* Return TRUE or FALSE depending on whether the unary operator meets the
8001 appropriate constraints. */
8004 ix86_unary_operator_ok (code
, mode
, operands
)
8005 enum rtx_code code ATTRIBUTE_UNUSED
;
8006 enum machine_mode mode ATTRIBUTE_UNUSED
;
8007 rtx operands
[2] ATTRIBUTE_UNUSED
;
8009 /* If one of operands is memory, source and destination must match. */
8010 if ((GET_CODE (operands
[0]) == MEM
8011 || GET_CODE (operands
[1]) == MEM
)
8012 && ! rtx_equal_p (operands
[0], operands
[1]))
8017 /* Return TRUE or FALSE depending on whether the first SET in INSN
8018 has source and destination with matching CC modes, and that the
8019 CC mode is at least as constrained as REQ_MODE. */
8022 ix86_match_ccmode (insn
, req_mode
)
8024 enum machine_mode req_mode
;
8027 enum machine_mode set_mode
;
8029 set
= PATTERN (insn
);
8030 if (GET_CODE (set
) == PARALLEL
)
8031 set
= XVECEXP (set
, 0, 0);
8032 if (GET_CODE (set
) != SET
)
8034 if (GET_CODE (SET_SRC (set
)) != COMPARE
)
8037 set_mode
= GET_MODE (SET_DEST (set
));
8041 if (req_mode
!= CCNOmode
8042 && (req_mode
!= CCmode
8043 || XEXP (SET_SRC (set
), 1) != const0_rtx
))
8047 if (req_mode
== CCGCmode
)
8051 if (req_mode
== CCGOCmode
|| req_mode
== CCNOmode
)
8055 if (req_mode
== CCZmode
)
8065 return (GET_MODE (SET_SRC (set
)) == set_mode
);
8068 /* Generate insn patterns to do an integer compare of OPERANDS. */
8071 ix86_expand_int_compare (code
, op0
, op1
)
8075 enum machine_mode cmpmode
;
8078 cmpmode
= SELECT_CC_MODE (code
, op0
, op1
);
8079 flags
= gen_rtx_REG (cmpmode
, FLAGS_REG
);
8081 /* This is very simple, but making the interface the same as in the
8082 FP case makes the rest of the code easier. */
8083 tmp
= gen_rtx_COMPARE (cmpmode
, op0
, op1
);
8084 emit_insn (gen_rtx_SET (VOIDmode
, flags
, tmp
));
8086 /* Return the test that should be put into the flags user, i.e.
8087 the bcc, scc, or cmov instruction. */
8088 return gen_rtx_fmt_ee (code
, VOIDmode
, flags
, const0_rtx
);
8091 /* Figure out whether to use ordered or unordered fp comparisons.
8092 Return the appropriate mode to use. */
8095 ix86_fp_compare_mode (code
)
8096 enum rtx_code code ATTRIBUTE_UNUSED
;
8098 /* ??? In order to make all comparisons reversible, we do all comparisons
8099 non-trapping when compiling for IEEE. Once gcc is able to distinguish
8100 all forms trapping and nontrapping comparisons, we can make inequality
8101 comparisons trapping again, since it results in better code when using
8102 FCOM based compares. */
8103 return TARGET_IEEE_FP
? CCFPUmode
: CCFPmode
;
8107 ix86_cc_mode (code
, op0
, op1
)
8111 if (GET_MODE_CLASS (GET_MODE (op0
)) == MODE_FLOAT
)
8112 return ix86_fp_compare_mode (code
);
8115 /* Only zero flag is needed. */
8117 case NE
: /* ZF!=0 */
8119 /* Codes needing carry flag. */
8120 case GEU
: /* CF=0 */
8121 case GTU
: /* CF=0 & ZF=0 */
8122 case LTU
: /* CF=1 */
8123 case LEU
: /* CF=1 | ZF=1 */
8125 /* Codes possibly doable only with sign flag when
8126 comparing against zero. */
8127 case GE
: /* SF=OF or SF=0 */
8128 case LT
: /* SF<>OF or SF=1 */
8129 if (op1
== const0_rtx
)
8132 /* For other cases Carry flag is not required. */
8134 /* Codes doable only with sign flag when comparing
8135 against zero, but we miss jump instruction for it
8136 so we need to use relational tests agains overflow
8137 that thus needs to be zero. */
8138 case GT
: /* ZF=0 & SF=OF */
8139 case LE
: /* ZF=1 | SF<>OF */
8140 if (op1
== const0_rtx
)
8144 /* strcmp pattern do (use flags) and combine may ask us for proper
8153 /* Return true if we should use an FCOMI instruction for this fp comparison. */
8156 ix86_use_fcomi_compare (code
)
8157 enum rtx_code code ATTRIBUTE_UNUSED
;
8159 enum rtx_code swapped_code
= swap_condition (code
);
8160 return ((ix86_fp_comparison_cost (code
) == ix86_fp_comparison_fcomi_cost (code
))
8161 || (ix86_fp_comparison_cost (swapped_code
)
8162 == ix86_fp_comparison_fcomi_cost (swapped_code
)));
8165 /* Swap, force into registers, or otherwise massage the two operands
8166 to a fp comparison. The operands are updated in place; the new
8167 comparsion code is returned. */
8169 static enum rtx_code
8170 ix86_prepare_fp_compare_args (code
, pop0
, pop1
)
8174 enum machine_mode fpcmp_mode
= ix86_fp_compare_mode (code
);
8175 rtx op0
= *pop0
, op1
= *pop1
;
8176 enum machine_mode op_mode
= GET_MODE (op0
);
8177 int is_sse
= SSE_REG_P (op0
) | SSE_REG_P (op1
);
8179 /* All of the unordered compare instructions only work on registers.
8180 The same is true of the XFmode compare instructions. The same is
8181 true of the fcomi compare instructions. */
8184 && (fpcmp_mode
== CCFPUmode
8185 || op_mode
== XFmode
8186 || op_mode
== TFmode
8187 || ix86_use_fcomi_compare (code
)))
8189 op0
= force_reg (op_mode
, op0
);
8190 op1
= force_reg (op_mode
, op1
);
8194 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
8195 things around if they appear profitable, otherwise force op0
8198 if (standard_80387_constant_p (op0
) == 0
8199 || (GET_CODE (op0
) == MEM
8200 && ! (standard_80387_constant_p (op1
) == 0
8201 || GET_CODE (op1
) == MEM
)))
8204 tmp
= op0
, op0
= op1
, op1
= tmp
;
8205 code
= swap_condition (code
);
8208 if (GET_CODE (op0
) != REG
)
8209 op0
= force_reg (op_mode
, op0
);
8211 if (CONSTANT_P (op1
))
8213 if (standard_80387_constant_p (op1
))
8214 op1
= force_reg (op_mode
, op1
);
8216 op1
= validize_mem (force_const_mem (op_mode
, op1
));
8220 /* Try to rearrange the comparison to make it cheaper. */
8221 if (ix86_fp_comparison_cost (code
)
8222 > ix86_fp_comparison_cost (swap_condition (code
))
8223 && (GET_CODE (op1
) == REG
|| !no_new_pseudos
))
8226 tmp
= op0
, op0
= op1
, op1
= tmp
;
8227 code
= swap_condition (code
);
8228 if (GET_CODE (op0
) != REG
)
8229 op0
= force_reg (op_mode
, op0
);
8237 /* Convert comparison codes we use to represent FP comparison to integer
8238 code that will result in proper branch. Return UNKNOWN if no such code
8240 static enum rtx_code
8241 ix86_fp_compare_code_to_integer (code
)
8271 /* Split comparison code CODE into comparisons we can do using branch
8272 instructions. BYPASS_CODE is comparison code for branch that will
8273 branch around FIRST_CODE and SECOND_CODE. If some of branches
8274 is not required, set value to NIL.
8275 We never require more than two branches. */
8277 ix86_fp_comparison_codes (code
, bypass_code
, first_code
, second_code
)
8278 enum rtx_code code
, *bypass_code
, *first_code
, *second_code
;
8284 /* The fcomi comparison sets flags as follows:
8294 case GT
: /* GTU - CF=0 & ZF=0 */
8295 case GE
: /* GEU - CF=0 */
8296 case ORDERED
: /* PF=0 */
8297 case UNORDERED
: /* PF=1 */
8298 case UNEQ
: /* EQ - ZF=1 */
8299 case UNLT
: /* LTU - CF=1 */
8300 case UNLE
: /* LEU - CF=1 | ZF=1 */
8301 case LTGT
: /* EQ - ZF=0 */
8303 case LT
: /* LTU - CF=1 - fails on unordered */
8305 *bypass_code
= UNORDERED
;
8307 case LE
: /* LEU - CF=1 | ZF=1 - fails on unordered */
8309 *bypass_code
= UNORDERED
;
8311 case EQ
: /* EQ - ZF=1 - fails on unordered */
8313 *bypass_code
= UNORDERED
;
8315 case NE
: /* NE - ZF=0 - fails on unordered */
8317 *second_code
= UNORDERED
;
8319 case UNGE
: /* GEU - CF=0 - fails on unordered */
8321 *second_code
= UNORDERED
;
8323 case UNGT
: /* GTU - CF=0 & ZF=0 - fails on unordered */
8325 *second_code
= UNORDERED
;
8330 if (!TARGET_IEEE_FP
)
8337 /* Return cost of comparison done fcom + arithmetics operations on AX.
8338 All following functions do use number of instructions as an cost metrics.
8339 In future this should be tweaked to compute bytes for optimize_size and
8340 take into account performance of various instructions on various CPUs. */
8342 ix86_fp_comparison_arithmetics_cost (code
)
8345 if (!TARGET_IEEE_FP
)
8347 /* The cost of code output by ix86_expand_fp_compare. */
8375 /* Return cost of comparison done using fcomi operation.
8376 See ix86_fp_comparison_arithmetics_cost for the metrics. */
8378 ix86_fp_comparison_fcomi_cost (code
)
8381 enum rtx_code bypass_code
, first_code
, second_code
;
8382 /* Return arbitarily high cost when instruction is not supported - this
8383 prevents gcc from using it. */
8386 ix86_fp_comparison_codes (code
, &bypass_code
, &first_code
, &second_code
);
8387 return (bypass_code
!= NIL
|| second_code
!= NIL
) + 2;
8390 /* Return cost of comparison done using sahf operation.
8391 See ix86_fp_comparison_arithmetics_cost for the metrics. */
8393 ix86_fp_comparison_sahf_cost (code
)
8396 enum rtx_code bypass_code
, first_code
, second_code
;
8397 /* Return arbitarily high cost when instruction is not preferred - this
8398 avoids gcc from using it. */
8399 if (!TARGET_USE_SAHF
&& !optimize_size
)
8401 ix86_fp_comparison_codes (code
, &bypass_code
, &first_code
, &second_code
);
8402 return (bypass_code
!= NIL
|| second_code
!= NIL
) + 3;
8405 /* Compute cost of the comparison done using any method.
8406 See ix86_fp_comparison_arithmetics_cost for the metrics. */
8408 ix86_fp_comparison_cost (code
)
8411 int fcomi_cost
, sahf_cost
, arithmetics_cost
= 1024;
8414 fcomi_cost
= ix86_fp_comparison_fcomi_cost (code
);
8415 sahf_cost
= ix86_fp_comparison_sahf_cost (code
);
8417 min
= arithmetics_cost
= ix86_fp_comparison_arithmetics_cost (code
);
8418 if (min
> sahf_cost
)
8420 if (min
> fcomi_cost
)
8425 /* Generate insn patterns to do a floating point compare of OPERANDS. */
8428 ix86_expand_fp_compare (code
, op0
, op1
, scratch
, second_test
, bypass_test
)
8430 rtx op0
, op1
, scratch
;
8434 enum machine_mode fpcmp_mode
, intcmp_mode
;
8436 int cost
= ix86_fp_comparison_cost (code
);
8437 enum rtx_code bypass_code
, first_code
, second_code
;
8439 fpcmp_mode
= ix86_fp_compare_mode (code
);
8440 code
= ix86_prepare_fp_compare_args (code
, &op0
, &op1
);
8443 *second_test
= NULL_RTX
;
8445 *bypass_test
= NULL_RTX
;
8447 ix86_fp_comparison_codes (code
, &bypass_code
, &first_code
, &second_code
);
8449 /* Do fcomi/sahf based test when profitable. */
8450 if ((bypass_code
== NIL
|| bypass_test
)
8451 && (second_code
== NIL
|| second_test
)
8452 && ix86_fp_comparison_arithmetics_cost (code
) > cost
)
8456 tmp
= gen_rtx_COMPARE (fpcmp_mode
, op0
, op1
);
8457 tmp
= gen_rtx_SET (VOIDmode
, gen_rtx_REG (fpcmp_mode
, FLAGS_REG
),
8463 tmp
= gen_rtx_COMPARE (fpcmp_mode
, op0
, op1
);
8464 tmp2
= gen_rtx_UNSPEC (HImode
, gen_rtvec (1, tmp
), UNSPEC_FNSTSW
);
8466 scratch
= gen_reg_rtx (HImode
);
8467 emit_insn (gen_rtx_SET (VOIDmode
, scratch
, tmp2
));
8468 emit_insn (gen_x86_sahf_1 (scratch
));
8471 /* The FP codes work out to act like unsigned. */
8472 intcmp_mode
= fpcmp_mode
;
8474 if (bypass_code
!= NIL
)
8475 *bypass_test
= gen_rtx_fmt_ee (bypass_code
, VOIDmode
,
8476 gen_rtx_REG (intcmp_mode
, FLAGS_REG
),
8478 if (second_code
!= NIL
)
8479 *second_test
= gen_rtx_fmt_ee (second_code
, VOIDmode
,
8480 gen_rtx_REG (intcmp_mode
, FLAGS_REG
),
8485 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
8486 tmp
= gen_rtx_COMPARE (fpcmp_mode
, op0
, op1
);
8487 tmp2
= gen_rtx_UNSPEC (HImode
, gen_rtvec (1, tmp
), UNSPEC_FNSTSW
);
8489 scratch
= gen_reg_rtx (HImode
);
8490 emit_insn (gen_rtx_SET (VOIDmode
, scratch
, tmp2
));
8492 /* In the unordered case, we have to check C2 for NaN's, which
8493 doesn't happen to work out to anything nice combination-wise.
8494 So do some bit twiddling on the value we've got in AH to come
8495 up with an appropriate set of condition codes. */
8497 intcmp_mode
= CCNOmode
;
8502 if (code
== GT
|| !TARGET_IEEE_FP
)
8504 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x45)));
8509 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
8510 emit_insn (gen_addqi_ext_1 (scratch
, scratch
, constm1_rtx
));
8511 emit_insn (gen_cmpqi_ext_3 (scratch
, GEN_INT (0x44)));
8512 intcmp_mode
= CCmode
;
8518 if (code
== LT
&& TARGET_IEEE_FP
)
8520 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
8521 emit_insn (gen_cmpqi_ext_3 (scratch
, GEN_INT (0x01)));
8522 intcmp_mode
= CCmode
;
8527 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x01)));
8533 if (code
== GE
|| !TARGET_IEEE_FP
)
8535 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x05)));
8540 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
8541 emit_insn (gen_xorqi_cc_ext_1 (scratch
, scratch
,
8548 if (code
== LE
&& TARGET_IEEE_FP
)
8550 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
8551 emit_insn (gen_addqi_ext_1 (scratch
, scratch
, constm1_rtx
));
8552 emit_insn (gen_cmpqi_ext_3 (scratch
, GEN_INT (0x40)));
8553 intcmp_mode
= CCmode
;
8558 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x45)));
8564 if (code
== EQ
&& TARGET_IEEE_FP
)
8566 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
8567 emit_insn (gen_cmpqi_ext_3 (scratch
, GEN_INT (0x40)));
8568 intcmp_mode
= CCmode
;
8573 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x40)));
8580 if (code
== NE
&& TARGET_IEEE_FP
)
8582 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
8583 emit_insn (gen_xorqi_cc_ext_1 (scratch
, scratch
,
8589 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x40)));
8595 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x04)));
8599 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x04)));
8608 /* Return the test that should be put into the flags user, i.e.
8609 the bcc, scc, or cmov instruction. */
8610 return gen_rtx_fmt_ee (code
, VOIDmode
,
8611 gen_rtx_REG (intcmp_mode
, FLAGS_REG
),
8616 ix86_expand_compare (code
, second_test
, bypass_test
)
8618 rtx
*second_test
, *bypass_test
;
8621 op0
= ix86_compare_op0
;
8622 op1
= ix86_compare_op1
;
8625 *second_test
= NULL_RTX
;
8627 *bypass_test
= NULL_RTX
;
8629 if (GET_MODE_CLASS (GET_MODE (op0
)) == MODE_FLOAT
)
8630 ret
= ix86_expand_fp_compare (code
, op0
, op1
, NULL_RTX
,
8631 second_test
, bypass_test
);
8633 ret
= ix86_expand_int_compare (code
, op0
, op1
);
8638 /* Return true if the CODE will result in nontrivial jump sequence. */
8640 ix86_fp_jump_nontrivial_p (code
)
8643 enum rtx_code bypass_code
, first_code
, second_code
;
8646 ix86_fp_comparison_codes (code
, &bypass_code
, &first_code
, &second_code
);
8647 return bypass_code
!= NIL
|| second_code
!= NIL
;
8651 ix86_expand_branch (code
, label
)
8657 switch (GET_MODE (ix86_compare_op0
))
8663 tmp
= ix86_expand_compare (code
, NULL
, NULL
);
8664 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
8665 gen_rtx_LABEL_REF (VOIDmode
, label
),
8667 emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
));
8677 enum rtx_code bypass_code
, first_code
, second_code
;
8679 code
= ix86_prepare_fp_compare_args (code
, &ix86_compare_op0
,
8682 ix86_fp_comparison_codes (code
, &bypass_code
, &first_code
, &second_code
);
8684 /* Check whether we will use the natural sequence with one jump. If
8685 so, we can expand jump early. Otherwise delay expansion by
8686 creating compound insn to not confuse optimizers. */
8687 if (bypass_code
== NIL
&& second_code
== NIL
8690 ix86_split_fp_branch (code
, ix86_compare_op0
, ix86_compare_op1
,
8691 gen_rtx_LABEL_REF (VOIDmode
, label
),
8696 tmp
= gen_rtx_fmt_ee (code
, VOIDmode
,
8697 ix86_compare_op0
, ix86_compare_op1
);
8698 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
8699 gen_rtx_LABEL_REF (VOIDmode
, label
),
8701 tmp
= gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
);
8703 use_fcomi
= ix86_use_fcomi_compare (code
);
8704 vec
= rtvec_alloc (3 + !use_fcomi
);
8705 RTVEC_ELT (vec
, 0) = tmp
;
8707 = gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCFPmode
, 18));
8709 = gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCFPmode
, 17));
8712 = gen_rtx_CLOBBER (VOIDmode
, gen_rtx_SCRATCH (HImode
));
8714 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode
, vec
));
8722 /* Expand DImode branch into multiple compare+branch. */
8724 rtx lo
[2], hi
[2], label2
;
8725 enum rtx_code code1
, code2
, code3
;
8727 if (CONSTANT_P (ix86_compare_op0
) && ! CONSTANT_P (ix86_compare_op1
))
8729 tmp
= ix86_compare_op0
;
8730 ix86_compare_op0
= ix86_compare_op1
;
8731 ix86_compare_op1
= tmp
;
8732 code
= swap_condition (code
);
8734 split_di (&ix86_compare_op0
, 1, lo
+0, hi
+0);
8735 split_di (&ix86_compare_op1
, 1, lo
+1, hi
+1);
8737 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
8738 avoid two branches. This costs one extra insn, so disable when
8739 optimizing for size. */
8741 if ((code
== EQ
|| code
== NE
)
8743 || hi
[1] == const0_rtx
|| lo
[1] == const0_rtx
))
8748 if (hi
[1] != const0_rtx
)
8749 xor1
= expand_binop (SImode
, xor_optab
, xor1
, hi
[1],
8750 NULL_RTX
, 0, OPTAB_WIDEN
);
8753 if (lo
[1] != const0_rtx
)
8754 xor0
= expand_binop (SImode
, xor_optab
, xor0
, lo
[1],
8755 NULL_RTX
, 0, OPTAB_WIDEN
);
8757 tmp
= expand_binop (SImode
, ior_optab
, xor1
, xor0
,
8758 NULL_RTX
, 0, OPTAB_WIDEN
);
8760 ix86_compare_op0
= tmp
;
8761 ix86_compare_op1
= const0_rtx
;
8762 ix86_expand_branch (code
, label
);
8766 /* Otherwise, if we are doing less-than or greater-or-equal-than,
8767 op1 is a constant and the low word is zero, then we can just
8768 examine the high word. */
8770 if (GET_CODE (hi
[1]) == CONST_INT
&& lo
[1] == const0_rtx
)
8773 case LT
: case LTU
: case GE
: case GEU
:
8774 ix86_compare_op0
= hi
[0];
8775 ix86_compare_op1
= hi
[1];
8776 ix86_expand_branch (code
, label
);
8782 /* Otherwise, we need two or three jumps. */
8784 label2
= gen_label_rtx ();
8787 code2
= swap_condition (code
);
8788 code3
= unsigned_condition (code
);
8792 case LT
: case GT
: case LTU
: case GTU
:
8795 case LE
: code1
= LT
; code2
= GT
; break;
8796 case GE
: code1
= GT
; code2
= LT
; break;
8797 case LEU
: code1
= LTU
; code2
= GTU
; break;
8798 case GEU
: code1
= GTU
; code2
= LTU
; break;
8800 case EQ
: code1
= NIL
; code2
= NE
; break;
8801 case NE
: code2
= NIL
; break;
8809 * if (hi(a) < hi(b)) goto true;
8810 * if (hi(a) > hi(b)) goto false;
8811 * if (lo(a) < lo(b)) goto true;
8815 ix86_compare_op0
= hi
[0];
8816 ix86_compare_op1
= hi
[1];
8819 ix86_expand_branch (code1
, label
);
8821 ix86_expand_branch (code2
, label2
);
8823 ix86_compare_op0
= lo
[0];
8824 ix86_compare_op1
= lo
[1];
8825 ix86_expand_branch (code3
, label
);
8828 emit_label (label2
);
8837 /* Split branch based on floating point condition. */
8839 ix86_split_fp_branch (code
, op1
, op2
, target1
, target2
, tmp
)
8841 rtx op1
, op2
, target1
, target2
, tmp
;
8844 rtx label
= NULL_RTX
;
8846 int bypass_probability
= -1, second_probability
= -1, probability
= -1;
8849 if (target2
!= pc_rtx
)
8852 code
= reverse_condition_maybe_unordered (code
);
8857 condition
= ix86_expand_fp_compare (code
, op1
, op2
,
8858 tmp
, &second
, &bypass
);
8860 if (split_branch_probability
>= 0)
8862 /* Distribute the probabilities across the jumps.
8863 Assume the BYPASS and SECOND to be always test
8865 probability
= split_branch_probability
;
8867 /* Value of 1 is low enough to make no need for probability
8868 to be updated. Later we may run some experiments and see
8869 if unordered values are more frequent in practice. */
8871 bypass_probability
= 1;
8873 second_probability
= 1;
8875 if (bypass
!= NULL_RTX
)
8877 label
= gen_label_rtx ();
8878 i
= emit_jump_insn (gen_rtx_SET
8880 gen_rtx_IF_THEN_ELSE (VOIDmode
,
8882 gen_rtx_LABEL_REF (VOIDmode
,
8885 if (bypass_probability
>= 0)
8887 = gen_rtx_EXPR_LIST (REG_BR_PROB
,
8888 GEN_INT (bypass_probability
),
8891 i
= emit_jump_insn (gen_rtx_SET
8893 gen_rtx_IF_THEN_ELSE (VOIDmode
,
8894 condition
, target1
, target2
)));
8895 if (probability
>= 0)
8897 = gen_rtx_EXPR_LIST (REG_BR_PROB
,
8898 GEN_INT (probability
),
8900 if (second
!= NULL_RTX
)
8902 i
= emit_jump_insn (gen_rtx_SET
8904 gen_rtx_IF_THEN_ELSE (VOIDmode
, second
, target1
,
8906 if (second_probability
>= 0)
8908 = gen_rtx_EXPR_LIST (REG_BR_PROB
,
8909 GEN_INT (second_probability
),
8912 if (label
!= NULL_RTX
)
8917 ix86_expand_setcc (code
, dest
)
8921 rtx ret
, tmp
, tmpreg
;
8922 rtx second_test
, bypass_test
;
8924 if (GET_MODE (ix86_compare_op0
) == DImode
8926 return 0; /* FAIL */
8928 if (GET_MODE (dest
) != QImode
)
8931 ret
= ix86_expand_compare (code
, &second_test
, &bypass_test
);
8932 PUT_MODE (ret
, QImode
);
8937 emit_insn (gen_rtx_SET (VOIDmode
, tmp
, ret
));
8938 if (bypass_test
|| second_test
)
8940 rtx test
= second_test
;
8942 rtx tmp2
= gen_reg_rtx (QImode
);
8949 PUT_CODE (test
, reverse_condition_maybe_unordered (GET_CODE (test
)));
8951 PUT_MODE (test
, QImode
);
8952 emit_insn (gen_rtx_SET (VOIDmode
, tmp2
, test
));
8955 emit_insn (gen_andqi3 (tmp
, tmpreg
, tmp2
));
8957 emit_insn (gen_iorqi3 (tmp
, tmpreg
, tmp2
));
8960 return 1; /* DONE */
8964 ix86_expand_int_movcc (operands
)
8967 enum rtx_code code
= GET_CODE (operands
[1]), compare_code
;
8968 rtx compare_seq
, compare_op
;
8969 rtx second_test
, bypass_test
;
8970 enum machine_mode mode
= GET_MODE (operands
[0]);
8972 /* When the compare code is not LTU or GEU, we can not use sbbl case.
8973 In case comparsion is done with immediate, we can convert it to LTU or
8974 GEU by altering the integer. */
8976 if ((code
== LEU
|| code
== GTU
)
8977 && GET_CODE (ix86_compare_op1
) == CONST_INT
8979 && INTVAL (ix86_compare_op1
) != -1
8980 /* For x86-64, the immediate field in the instruction is 32-bit
8981 signed, so we can't increment a DImode value above 0x7fffffff. */
8983 || GET_MODE (ix86_compare_op0
) != DImode
8984 || INTVAL (ix86_compare_op1
) != 0x7fffffff)
8985 && GET_CODE (operands
[2]) == CONST_INT
8986 && GET_CODE (operands
[3]) == CONST_INT
)
8992 ix86_compare_op1
= gen_int_mode (INTVAL (ix86_compare_op1
) + 1,
8993 GET_MODE (ix86_compare_op0
));
8997 compare_op
= ix86_expand_compare (code
, &second_test
, &bypass_test
);
8998 compare_seq
= get_insns ();
9001 compare_code
= GET_CODE (compare_op
);
9003 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
9004 HImode insns, we'd be swallowed in word prefix ops. */
9007 && (mode
!= DImode
|| TARGET_64BIT
)
9008 && GET_CODE (operands
[2]) == CONST_INT
9009 && GET_CODE (operands
[3]) == CONST_INT
)
9011 rtx out
= operands
[0];
9012 HOST_WIDE_INT ct
= INTVAL (operands
[2]);
9013 HOST_WIDE_INT cf
= INTVAL (operands
[3]);
9016 if ((compare_code
== LTU
|| compare_code
== GEU
)
9017 && !second_test
&& !bypass_test
)
9019 /* Detect overlap between destination and compare sources. */
9022 /* To simplify rest of code, restrict to the GEU case. */
9023 if (compare_code
== LTU
)
9025 HOST_WIDE_INT tmp
= ct
;
9028 compare_code
= reverse_condition (compare_code
);
9029 code
= reverse_condition (code
);
9033 if (reg_overlap_mentioned_p (out
, ix86_compare_op0
)
9034 || reg_overlap_mentioned_p (out
, ix86_compare_op1
))
9035 tmp
= gen_reg_rtx (mode
);
9037 emit_insn (compare_seq
);
9039 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp
));
9041 emit_insn (gen_x86_movsicc_0_m1 (tmp
));
9053 tmp
= expand_simple_binop (mode
, PLUS
,
9055 tmp
, 1, OPTAB_DIRECT
);
9066 tmp
= expand_simple_binop (mode
, IOR
,
9068 tmp
, 1, OPTAB_DIRECT
);
9070 else if (diff
== -1 && ct
)
9080 tmp
= expand_simple_unop (mode
, NOT
, tmp
, tmp
, 1);
9082 tmp
= expand_simple_binop (mode
, PLUS
,
9084 tmp
, 1, OPTAB_DIRECT
);
9092 * andl cf - ct, dest
9102 tmp
= expand_simple_unop (mode
, NOT
, tmp
, tmp
, 1);
9105 tmp
= expand_simple_binop (mode
, AND
,
9107 gen_int_mode (cf
- ct
, mode
),
9108 tmp
, 1, OPTAB_DIRECT
);
9110 tmp
= expand_simple_binop (mode
, PLUS
,
9112 tmp
, 1, OPTAB_DIRECT
);
9116 emit_move_insn (out
, tmp
);
9118 return 1; /* DONE */
9125 tmp
= ct
, ct
= cf
, cf
= tmp
;
9127 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0
)))
9129 /* We may be reversing unordered compare to normal compare, that
9130 is not valid in general (we may convert non-trapping condition
9131 to trapping one), however on i386 we currently emit all
9132 comparisons unordered. */
9133 compare_code
= reverse_condition_maybe_unordered (compare_code
);
9134 code
= reverse_condition_maybe_unordered (code
);
9138 compare_code
= reverse_condition (compare_code
);
9139 code
= reverse_condition (code
);
9144 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0
)) == MODE_INT
9145 && GET_CODE (ix86_compare_op1
) == CONST_INT
)
9147 if (ix86_compare_op1
== const0_rtx
9148 && (code
== LT
|| code
== GE
))
9149 compare_code
= code
;
9150 else if (ix86_compare_op1
== constm1_rtx
)
9154 else if (code
== GT
)
9159 /* Optimize dest = (op0 < 0) ? -1 : cf. */
9160 if (compare_code
!= NIL
9161 && GET_MODE (ix86_compare_op0
) == GET_MODE (out
)
9162 && (cf
== -1 || ct
== -1))
9164 /* If lea code below could be used, only optimize
9165 if it results in a 2 insn sequence. */
9167 if (! (diff
== 1 || diff
== 2 || diff
== 4 || diff
== 8
9168 || diff
== 3 || diff
== 5 || diff
== 9)
9169 || (compare_code
== LT
&& ct
== -1)
9170 || (compare_code
== GE
&& cf
== -1))
9173 * notl op1 (if necessary)
9181 code
= reverse_condition (code
);
9184 out
= emit_store_flag (out
, code
, ix86_compare_op0
,
9185 ix86_compare_op1
, VOIDmode
, 0, -1);
9187 out
= expand_simple_binop (mode
, IOR
,
9189 out
, 1, OPTAB_DIRECT
);
9190 if (out
!= operands
[0])
9191 emit_move_insn (operands
[0], out
);
9193 return 1; /* DONE */
9197 if ((diff
== 1 || diff
== 2 || diff
== 4 || diff
== 8
9198 || diff
== 3 || diff
== 5 || diff
== 9)
9199 && (mode
!= DImode
|| x86_64_sign_extended_value (GEN_INT (cf
))))
9205 * lea cf(dest*(ct-cf)),dest
9209 * This also catches the degenerate setcc-only case.
9215 out
= emit_store_flag (out
, code
, ix86_compare_op0
,
9216 ix86_compare_op1
, VOIDmode
, 0, 1);
9219 /* On x86_64 the lea instruction operates on Pmode, so we need
9220 to get arithmetics done in proper mode to match. */
9227 tmp
= gen_rtx_MULT (mode
, out1
, GEN_INT (diff
& ~1));
9231 tmp
= gen_rtx_PLUS (mode
, tmp
, out1
);
9237 tmp
= gen_rtx_PLUS (mode
, tmp
, GEN_INT (cf
));
9241 && (GET_CODE (tmp
) != SUBREG
|| SUBREG_REG (tmp
) != out
))
9244 out
= force_operand (tmp
, out
);
9246 emit_insn (gen_rtx_SET (VOIDmode
, out
, tmp
));
9248 if (out
!= operands
[0])
9249 emit_move_insn (operands
[0], copy_rtx (out
));
9251 return 1; /* DONE */
9255 * General case: Jumpful:
9256 * xorl dest,dest cmpl op1, op2
9257 * cmpl op1, op2 movl ct, dest
9259 * decl dest movl cf, dest
9260 * andl (cf-ct),dest 1:
9265 * This is reasonably steep, but branch mispredict costs are
9266 * high on modern cpus, so consider failing only if optimizing
9269 * %%% Parameterize branch_cost on the tuning architecture, then
9270 * use that. The 80386 couldn't care less about mispredicts.
9273 if (!optimize_size
&& !TARGET_CMOVE
)
9279 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0
)))
9280 /* We may be reversing unordered compare to normal compare,
9281 that is not valid in general (we may convert non-trapping
9282 condition to trapping one), however on i386 we currently
9283 emit all comparisons unordered. */
9284 code
= reverse_condition_maybe_unordered (code
);
9287 code
= reverse_condition (code
);
9288 if (compare_code
!= NIL
)
9289 compare_code
= reverse_condition (compare_code
);
9293 if (compare_code
!= NIL
)
9295 /* notl op1 (if needed)
9300 For x < 0 (resp. x <= -1) there will be no notl,
9301 so if possible swap the constants to get rid of the
9303 True/false will be -1/0 while code below (store flag
9304 followed by decrement) is 0/-1, so the constants need
9305 to be exchanged once more. */
9307 if (compare_code
== GE
|| !cf
)
9309 code
= reverse_condition (code
);
9314 HOST_WIDE_INT tmp
= cf
;
9319 out
= emit_store_flag (out
, code
, ix86_compare_op0
,
9320 ix86_compare_op1
, VOIDmode
, 0, -1);
9324 out
= emit_store_flag (out
, code
, ix86_compare_op0
,
9325 ix86_compare_op1
, VOIDmode
, 0, 1);
9327 out
= expand_simple_binop (mode
, PLUS
, out
, constm1_rtx
,
9328 out
, 1, OPTAB_DIRECT
);
9331 out
= expand_simple_binop (mode
, AND
, out
,
9332 gen_int_mode (cf
- ct
, mode
),
9333 out
, 1, OPTAB_DIRECT
);
9335 out
= expand_simple_binop (mode
, PLUS
, out
, GEN_INT (ct
),
9336 out
, 1, OPTAB_DIRECT
);
9337 if (out
!= operands
[0])
9338 emit_move_insn (operands
[0], out
);
9340 return 1; /* DONE */
9346 /* Try a few things more with specific constants and a variable. */
9349 rtx var
, orig_out
, out
, tmp
;
9352 return 0; /* FAIL */
9354 /* If one of the two operands is an interesting constant, load a
9355 constant with the above and mask it in with a logical operation. */
9357 if (GET_CODE (operands
[2]) == CONST_INT
)
9360 if (INTVAL (operands
[2]) == 0)
9361 operands
[3] = constm1_rtx
, op
= and_optab
;
9362 else if (INTVAL (operands
[2]) == -1)
9363 operands
[3] = const0_rtx
, op
= ior_optab
;
9365 return 0; /* FAIL */
9367 else if (GET_CODE (operands
[3]) == CONST_INT
)
9370 if (INTVAL (operands
[3]) == 0)
9371 operands
[2] = constm1_rtx
, op
= and_optab
;
9372 else if (INTVAL (operands
[3]) == -1)
9373 operands
[2] = const0_rtx
, op
= ior_optab
;
9375 return 0; /* FAIL */
9378 return 0; /* FAIL */
9380 orig_out
= operands
[0];
9381 tmp
= gen_reg_rtx (mode
);
9384 /* Recurse to get the constant loaded. */
9385 if (ix86_expand_int_movcc (operands
) == 0)
9386 return 0; /* FAIL */
9388 /* Mask in the interesting variable. */
9389 out
= expand_binop (mode
, op
, var
, tmp
, orig_out
, 0,
9391 if (out
!= orig_out
)
9392 emit_move_insn (orig_out
, out
);
9394 return 1; /* DONE */
9398 * For comparison with above,
9408 if (! nonimmediate_operand (operands
[2], mode
))
9409 operands
[2] = force_reg (mode
, operands
[2]);
9410 if (! nonimmediate_operand (operands
[3], mode
))
9411 operands
[3] = force_reg (mode
, operands
[3]);
9413 if (bypass_test
&& reg_overlap_mentioned_p (operands
[0], operands
[3]))
9415 rtx tmp
= gen_reg_rtx (mode
);
9416 emit_move_insn (tmp
, operands
[3]);
9419 if (second_test
&& reg_overlap_mentioned_p (operands
[0], operands
[2]))
9421 rtx tmp
= gen_reg_rtx (mode
);
9422 emit_move_insn (tmp
, operands
[2]);
9425 if (! register_operand (operands
[2], VOIDmode
)
9426 && ! register_operand (operands
[3], VOIDmode
))
9427 operands
[2] = force_reg (mode
, operands
[2]);
9429 emit_insn (compare_seq
);
9430 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
9431 gen_rtx_IF_THEN_ELSE (mode
,
9432 compare_op
, operands
[2],
9435 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
9436 gen_rtx_IF_THEN_ELSE (mode
,
9441 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
9442 gen_rtx_IF_THEN_ELSE (mode
,
9447 return 1; /* DONE */
9451 ix86_expand_fp_movcc (operands
)
9456 rtx compare_op
, second_test
, bypass_test
;
9458 /* For SF/DFmode conditional moves based on comparisons
9459 in same mode, we may want to use SSE min/max instructions. */
9460 if (((TARGET_SSE_MATH
&& GET_MODE (operands
[0]) == SFmode
)
9461 || (TARGET_SSE2
&& TARGET_SSE_MATH
&& GET_MODE (operands
[0]) == DFmode
))
9462 && GET_MODE (ix86_compare_op0
) == GET_MODE (operands
[0])
9463 /* The SSE comparisons does not support the LTGT/UNEQ pair. */
9465 || (GET_CODE (operands
[1]) != LTGT
&& GET_CODE (operands
[1]) != UNEQ
))
9466 /* We may be called from the post-reload splitter. */
9467 && (!REG_P (operands
[0])
9468 || SSE_REG_P (operands
[0])
9469 || REGNO (operands
[0]) >= FIRST_PSEUDO_REGISTER
))
9471 rtx op0
= ix86_compare_op0
, op1
= ix86_compare_op1
;
9472 code
= GET_CODE (operands
[1]);
9474 /* See if we have (cross) match between comparison operands and
9475 conditional move operands. */
9476 if (rtx_equal_p (operands
[2], op1
))
9481 code
= reverse_condition_maybe_unordered (code
);
9483 if (rtx_equal_p (operands
[2], op0
) && rtx_equal_p (operands
[3], op1
))
9485 /* Check for min operation. */
9488 operands
[0] = force_reg (GET_MODE (operands
[0]), operands
[0]);
9489 if (memory_operand (op0
, VOIDmode
))
9490 op0
= force_reg (GET_MODE (operands
[0]), op0
);
9491 if (GET_MODE (operands
[0]) == SFmode
)
9492 emit_insn (gen_minsf3 (operands
[0], op0
, op1
));
9494 emit_insn (gen_mindf3 (operands
[0], op0
, op1
));
9497 /* Check for max operation. */
9500 operands
[0] = force_reg (GET_MODE (operands
[0]), operands
[0]);
9501 if (memory_operand (op0
, VOIDmode
))
9502 op0
= force_reg (GET_MODE (operands
[0]), op0
);
9503 if (GET_MODE (operands
[0]) == SFmode
)
9504 emit_insn (gen_maxsf3 (operands
[0], op0
, op1
));
9506 emit_insn (gen_maxdf3 (operands
[0], op0
, op1
));
9510 /* Manage condition to be sse_comparison_operator. In case we are
9511 in non-ieee mode, try to canonicalize the destination operand
9512 to be first in the comparison - this helps reload to avoid extra
9514 if (!sse_comparison_operator (operands
[1], VOIDmode
)
9515 || (rtx_equal_p (operands
[0], ix86_compare_op1
) && !TARGET_IEEE_FP
))
9517 rtx tmp
= ix86_compare_op0
;
9518 ix86_compare_op0
= ix86_compare_op1
;
9519 ix86_compare_op1
= tmp
;
9520 operands
[1] = gen_rtx_fmt_ee (swap_condition (GET_CODE (operands
[1])),
9521 VOIDmode
, ix86_compare_op0
,
9524 /* Similary try to manage result to be first operand of conditional
9525 move. We also don't support the NE comparison on SSE, so try to
9527 if ((rtx_equal_p (operands
[0], operands
[3])
9528 && (!TARGET_IEEE_FP
|| GET_CODE (operands
[1]) != EQ
))
9529 || (GET_CODE (operands
[1]) == NE
&& TARGET_IEEE_FP
))
9531 rtx tmp
= operands
[2];
9532 operands
[2] = operands
[3];
9534 operands
[1] = gen_rtx_fmt_ee (reverse_condition_maybe_unordered
9535 (GET_CODE (operands
[1])),
9536 VOIDmode
, ix86_compare_op0
,
9539 if (GET_MODE (operands
[0]) == SFmode
)
9540 emit_insn (gen_sse_movsfcc (operands
[0], operands
[1],
9541 operands
[2], operands
[3],
9542 ix86_compare_op0
, ix86_compare_op1
));
9544 emit_insn (gen_sse_movdfcc (operands
[0], operands
[1],
9545 operands
[2], operands
[3],
9546 ix86_compare_op0
, ix86_compare_op1
));
9550 /* The floating point conditional move instructions don't directly
9551 support conditions resulting from a signed integer comparison. */
9553 code
= GET_CODE (operands
[1]);
9554 compare_op
= ix86_expand_compare (code
, &second_test
, &bypass_test
);
9556 /* The floating point conditional move instructions don't directly
9557 support signed integer comparisons. */
9559 if (!fcmov_comparison_operator (compare_op
, VOIDmode
))
9561 if (second_test
!= NULL
|| bypass_test
!= NULL
)
9563 tmp
= gen_reg_rtx (QImode
);
9564 ix86_expand_setcc (code
, tmp
);
9566 ix86_compare_op0
= tmp
;
9567 ix86_compare_op1
= const0_rtx
;
9568 compare_op
= ix86_expand_compare (code
, &second_test
, &bypass_test
);
9570 if (bypass_test
&& reg_overlap_mentioned_p (operands
[0], operands
[3]))
9572 tmp
= gen_reg_rtx (GET_MODE (operands
[0]));
9573 emit_move_insn (tmp
, operands
[3]);
9576 if (second_test
&& reg_overlap_mentioned_p (operands
[0], operands
[2]))
9578 tmp
= gen_reg_rtx (GET_MODE (operands
[0]));
9579 emit_move_insn (tmp
, operands
[2]);
9583 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
9584 gen_rtx_IF_THEN_ELSE (GET_MODE (operands
[0]),
9589 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
9590 gen_rtx_IF_THEN_ELSE (GET_MODE (operands
[0]),
9595 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
9596 gen_rtx_IF_THEN_ELSE (GET_MODE (operands
[0]),
9604 /* Split operands 0 and 1 into SImode parts. Similar to split_di, but
9605 works for floating pointer parameters and nonoffsetable memories.
9606 For pushes, it returns just stack offsets; the values will be saved
9607 in the right order. Maximally three parts are generated. */
9610 ix86_split_to_parts (operand
, parts
, mode
)
9613 enum machine_mode mode
;
9618 size
= mode
== TFmode
? 3 : (GET_MODE_SIZE (mode
) / 4);
9620 size
= (GET_MODE_SIZE (mode
) + 4) / 8;
9622 if (GET_CODE (operand
) == REG
&& MMX_REGNO_P (REGNO (operand
)))
9624 if (size
< 2 || size
> 3)
9627 /* Optimize constant pool reference to immediates. This is used by fp
9628 moves, that force all constants to memory to allow combining. */
9629 if (GET_CODE (operand
) == MEM
&& RTX_UNCHANGING_P (operand
))
9631 rtx tmp
= maybe_get_pool_constant (operand
);
9636 if (GET_CODE (operand
) == MEM
&& !offsettable_memref_p (operand
))
9638 /* The only non-offsetable memories we handle are pushes. */
9639 if (! push_operand (operand
, VOIDmode
))
9642 operand
= copy_rtx (operand
);
9643 PUT_MODE (operand
, Pmode
);
9644 parts
[0] = parts
[1] = parts
[2] = operand
;
9646 else if (!TARGET_64BIT
)
9649 split_di (&operand
, 1, &parts
[0], &parts
[1]);
9652 if (REG_P (operand
))
9654 if (!reload_completed
)
9656 parts
[0] = gen_rtx_REG (SImode
, REGNO (operand
) + 0);
9657 parts
[1] = gen_rtx_REG (SImode
, REGNO (operand
) + 1);
9659 parts
[2] = gen_rtx_REG (SImode
, REGNO (operand
) + 2);
9661 else if (offsettable_memref_p (operand
))
9663 operand
= adjust_address (operand
, SImode
, 0);
9665 parts
[1] = adjust_address (operand
, SImode
, 4);
9667 parts
[2] = adjust_address (operand
, SImode
, 8);
9669 else if (GET_CODE (operand
) == CONST_DOUBLE
)
9674 REAL_VALUE_FROM_CONST_DOUBLE (r
, operand
);
9679 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r
, l
);
9680 parts
[2] = gen_int_mode (l
[2], SImode
);
9683 REAL_VALUE_TO_TARGET_DOUBLE (r
, l
);
9688 parts
[1] = gen_int_mode (l
[1], SImode
);
9689 parts
[0] = gen_int_mode (l
[0], SImode
);
9698 split_ti (&operand
, 1, &parts
[0], &parts
[1]);
9699 if (mode
== XFmode
|| mode
== TFmode
)
9701 if (REG_P (operand
))
9703 if (!reload_completed
)
9705 parts
[0] = gen_rtx_REG (DImode
, REGNO (operand
) + 0);
9706 parts
[1] = gen_rtx_REG (SImode
, REGNO (operand
) + 1);
9708 else if (offsettable_memref_p (operand
))
9710 operand
= adjust_address (operand
, DImode
, 0);
9712 parts
[1] = adjust_address (operand
, SImode
, 8);
9714 else if (GET_CODE (operand
) == CONST_DOUBLE
)
9719 REAL_VALUE_FROM_CONST_DOUBLE (r
, operand
);
9720 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r
, l
);
9721 /* Do not use shift by 32 to avoid warning on 32bit systems. */
9722 if (HOST_BITS_PER_WIDE_INT
>= 64)
9725 ((l
[0] & (((HOST_WIDE_INT
) 2 << 31) - 1))
9726 + ((((HOST_WIDE_INT
) l
[1]) << 31) << 1),
9729 parts
[0] = immed_double_const (l
[0], l
[1], DImode
);
9730 parts
[1] = gen_int_mode (l
[2], SImode
);
9740 /* Emit insns to perform a move or push of DI, DF, and XF values.
9741 Return false when normal moves are needed; true when all required
9742 insns have been emitted. Operands 2-4 contain the input values
9743 int the correct order; operands 5-7 contain the output values. */
9746 ix86_split_long_move (operands
)
9753 enum machine_mode mode
= GET_MODE (operands
[0]);
9755 /* The DFmode expanders may ask us to move double.
9756 For 64bit target this is single move. By hiding the fact
9757 here we simplify i386.md splitters. */
9758 if (GET_MODE_SIZE (GET_MODE (operands
[0])) == 8 && TARGET_64BIT
)
9760 /* Optimize constant pool reference to immediates. This is used by
9761 fp moves, that force all constants to memory to allow combining. */
9763 if (GET_CODE (operands
[1]) == MEM
9764 && GET_CODE (XEXP (operands
[1], 0)) == SYMBOL_REF
9765 && CONSTANT_POOL_ADDRESS_P (XEXP (operands
[1], 0)))
9766 operands
[1] = get_pool_constant (XEXP (operands
[1], 0));
9767 if (push_operand (operands
[0], VOIDmode
))
9769 operands
[0] = copy_rtx (operands
[0]);
9770 PUT_MODE (operands
[0], Pmode
);
9773 operands
[0] = gen_lowpart (DImode
, operands
[0]);
9774 operands
[1] = gen_lowpart (DImode
, operands
[1]);
9775 emit_move_insn (operands
[0], operands
[1]);
9779 /* The only non-offsettable memory we handle is push. */
9780 if (push_operand (operands
[0], VOIDmode
))
9782 else if (GET_CODE (operands
[0]) == MEM
9783 && ! offsettable_memref_p (operands
[0]))
9786 nparts
= ix86_split_to_parts (operands
[1], part
[1], GET_MODE (operands
[0]));
9787 ix86_split_to_parts (operands
[0], part
[0], GET_MODE (operands
[0]));
9789 /* When emitting push, take care for source operands on the stack. */
9790 if (push
&& GET_CODE (operands
[1]) == MEM
9791 && reg_overlap_mentioned_p (stack_pointer_rtx
, operands
[1]))
9794 part
[1][1] = change_address (part
[1][1], GET_MODE (part
[1][1]),
9795 XEXP (part
[1][2], 0));
9796 part
[1][0] = change_address (part
[1][0], GET_MODE (part
[1][0]),
9797 XEXP (part
[1][1], 0));
9800 /* We need to do copy in the right order in case an address register
9801 of the source overlaps the destination. */
9802 if (REG_P (part
[0][0]) && GET_CODE (part
[1][0]) == MEM
)
9804 if (reg_overlap_mentioned_p (part
[0][0], XEXP (part
[1][0], 0)))
9806 if (reg_overlap_mentioned_p (part
[0][1], XEXP (part
[1][0], 0)))
9809 && reg_overlap_mentioned_p (part
[0][2], XEXP (part
[1][0], 0)))
9812 /* Collision in the middle part can be handled by reordering. */
9813 if (collisions
== 1 && nparts
== 3
9814 && reg_overlap_mentioned_p (part
[0][1], XEXP (part
[1][0], 0)))
9817 tmp
= part
[0][1]; part
[0][1] = part
[0][2]; part
[0][2] = tmp
;
9818 tmp
= part
[1][1]; part
[1][1] = part
[1][2]; part
[1][2] = tmp
;
9821 /* If there are more collisions, we can't handle it by reordering.
9822 Do an lea to the last part and use only one colliding move. */
9823 else if (collisions
> 1)
9826 emit_insn (gen_rtx_SET (VOIDmode
, part
[0][nparts
- 1],
9827 XEXP (part
[1][0], 0)));
9828 part
[1][0] = change_address (part
[1][0],
9829 TARGET_64BIT
? DImode
: SImode
,
9830 part
[0][nparts
- 1]);
9831 part
[1][1] = adjust_address (part
[1][0], VOIDmode
, UNITS_PER_WORD
);
9833 part
[1][2] = adjust_address (part
[1][0], VOIDmode
, 8);
9843 /* We use only first 12 bytes of TFmode value, but for pushing we
9844 are required to adjust stack as if we were pushing real 16byte
9846 if (mode
== TFmode
&& !TARGET_64BIT
)
9847 emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
,
9849 emit_move_insn (part
[0][2], part
[1][2]);
9854 /* In 64bit mode we don't have 32bit push available. In case this is
9855 register, it is OK - we will just use larger counterpart. We also
9856 retype memory - these comes from attempt to avoid REX prefix on
9857 moving of second half of TFmode value. */
9858 if (GET_MODE (part
[1][1]) == SImode
)
9860 if (GET_CODE (part
[1][1]) == MEM
)
9861 part
[1][1] = adjust_address (part
[1][1], DImode
, 0);
9862 else if (REG_P (part
[1][1]))
9863 part
[1][1] = gen_rtx_REG (DImode
, REGNO (part
[1][1]));
9866 if (GET_MODE (part
[1][0]) == SImode
)
9867 part
[1][0] = part
[1][1];
9870 emit_move_insn (part
[0][1], part
[1][1]);
9871 emit_move_insn (part
[0][0], part
[1][0]);
9875 /* Choose correct order to not overwrite the source before it is copied. */
9876 if ((REG_P (part
[0][0])
9877 && REG_P (part
[1][1])
9878 && (REGNO (part
[0][0]) == REGNO (part
[1][1])
9880 && REGNO (part
[0][0]) == REGNO (part
[1][2]))))
9882 && reg_overlap_mentioned_p (part
[0][0], XEXP (part
[1][0], 0))))
9886 operands
[2] = part
[0][2];
9887 operands
[3] = part
[0][1];
9888 operands
[4] = part
[0][0];
9889 operands
[5] = part
[1][2];
9890 operands
[6] = part
[1][1];
9891 operands
[7] = part
[1][0];
9895 operands
[2] = part
[0][1];
9896 operands
[3] = part
[0][0];
9897 operands
[5] = part
[1][1];
9898 operands
[6] = part
[1][0];
9905 operands
[2] = part
[0][0];
9906 operands
[3] = part
[0][1];
9907 operands
[4] = part
[0][2];
9908 operands
[5] = part
[1][0];
9909 operands
[6] = part
[1][1];
9910 operands
[7] = part
[1][2];
9914 operands
[2] = part
[0][0];
9915 operands
[3] = part
[0][1];
9916 operands
[5] = part
[1][0];
9917 operands
[6] = part
[1][1];
9920 emit_move_insn (operands
[2], operands
[5]);
9921 emit_move_insn (operands
[3], operands
[6]);
9923 emit_move_insn (operands
[4], operands
[7]);
9929 ix86_split_ashldi (operands
, scratch
)
9930 rtx
*operands
, scratch
;
9932 rtx low
[2], high
[2];
9935 if (GET_CODE (operands
[2]) == CONST_INT
)
9937 split_di (operands
, 2, low
, high
);
9938 count
= INTVAL (operands
[2]) & 63;
9942 emit_move_insn (high
[0], low
[1]);
9943 emit_move_insn (low
[0], const0_rtx
);
9946 emit_insn (gen_ashlsi3 (high
[0], high
[0], GEN_INT (count
- 32)));
9950 if (!rtx_equal_p (operands
[0], operands
[1]))
9951 emit_move_insn (operands
[0], operands
[1]);
9952 emit_insn (gen_x86_shld_1 (high
[0], low
[0], GEN_INT (count
)));
9953 emit_insn (gen_ashlsi3 (low
[0], low
[0], GEN_INT (count
)));
9958 if (!rtx_equal_p (operands
[0], operands
[1]))
9959 emit_move_insn (operands
[0], operands
[1]);
9961 split_di (operands
, 1, low
, high
);
9963 emit_insn (gen_x86_shld_1 (high
[0], low
[0], operands
[2]));
9964 emit_insn (gen_ashlsi3 (low
[0], low
[0], operands
[2]));
9966 if (TARGET_CMOVE
&& (! no_new_pseudos
|| scratch
))
9968 if (! no_new_pseudos
)
9969 scratch
= force_reg (SImode
, const0_rtx
);
9971 emit_move_insn (scratch
, const0_rtx
);
9973 emit_insn (gen_x86_shift_adj_1 (high
[0], low
[0], operands
[2],
9977 emit_insn (gen_x86_shift_adj_2 (high
[0], low
[0], operands
[2]));
9982 ix86_split_ashrdi (operands
, scratch
)
9983 rtx
*operands
, scratch
;
9985 rtx low
[2], high
[2];
9988 if (GET_CODE (operands
[2]) == CONST_INT
)
9990 split_di (operands
, 2, low
, high
);
9991 count
= INTVAL (operands
[2]) & 63;
9995 emit_move_insn (low
[0], high
[1]);
9997 if (! reload_completed
)
9998 emit_insn (gen_ashrsi3 (high
[0], low
[0], GEN_INT (31)));
10001 emit_move_insn (high
[0], low
[0]);
10002 emit_insn (gen_ashrsi3 (high
[0], high
[0], GEN_INT (31)));
10006 emit_insn (gen_ashrsi3 (low
[0], low
[0], GEN_INT (count
- 32)));
10010 if (!rtx_equal_p (operands
[0], operands
[1]))
10011 emit_move_insn (operands
[0], operands
[1]);
10012 emit_insn (gen_x86_shrd_1 (low
[0], high
[0], GEN_INT (count
)));
10013 emit_insn (gen_ashrsi3 (high
[0], high
[0], GEN_INT (count
)));
10018 if (!rtx_equal_p (operands
[0], operands
[1]))
10019 emit_move_insn (operands
[0], operands
[1]);
10021 split_di (operands
, 1, low
, high
);
10023 emit_insn (gen_x86_shrd_1 (low
[0], high
[0], operands
[2]));
10024 emit_insn (gen_ashrsi3 (high
[0], high
[0], operands
[2]));
10026 if (TARGET_CMOVE
&& (! no_new_pseudos
|| scratch
))
10028 if (! no_new_pseudos
)
10029 scratch
= gen_reg_rtx (SImode
);
10030 emit_move_insn (scratch
, high
[0]);
10031 emit_insn (gen_ashrsi3 (scratch
, scratch
, GEN_INT (31)));
10032 emit_insn (gen_x86_shift_adj_1 (low
[0], high
[0], operands
[2],
10036 emit_insn (gen_x86_shift_adj_3 (low
[0], high
[0], operands
[2]));
10041 ix86_split_lshrdi (operands
, scratch
)
10042 rtx
*operands
, scratch
;
10044 rtx low
[2], high
[2];
10047 if (GET_CODE (operands
[2]) == CONST_INT
)
10049 split_di (operands
, 2, low
, high
);
10050 count
= INTVAL (operands
[2]) & 63;
10054 emit_move_insn (low
[0], high
[1]);
10055 emit_move_insn (high
[0], const0_rtx
);
10058 emit_insn (gen_lshrsi3 (low
[0], low
[0], GEN_INT (count
- 32)));
10062 if (!rtx_equal_p (operands
[0], operands
[1]))
10063 emit_move_insn (operands
[0], operands
[1]);
10064 emit_insn (gen_x86_shrd_1 (low
[0], high
[0], GEN_INT (count
)));
10065 emit_insn (gen_lshrsi3 (high
[0], high
[0], GEN_INT (count
)));
10070 if (!rtx_equal_p (operands
[0], operands
[1]))
10071 emit_move_insn (operands
[0], operands
[1]);
10073 split_di (operands
, 1, low
, high
);
10075 emit_insn (gen_x86_shrd_1 (low
[0], high
[0], operands
[2]));
10076 emit_insn (gen_lshrsi3 (high
[0], high
[0], operands
[2]));
10078 /* Heh. By reversing the arguments, we can reuse this pattern. */
10079 if (TARGET_CMOVE
&& (! no_new_pseudos
|| scratch
))
10081 if (! no_new_pseudos
)
10082 scratch
= force_reg (SImode
, const0_rtx
);
10084 emit_move_insn (scratch
, const0_rtx
);
10086 emit_insn (gen_x86_shift_adj_1 (low
[0], high
[0], operands
[2],
10090 emit_insn (gen_x86_shift_adj_2 (low
[0], high
[0], operands
[2]));
10094 /* Helper function for the string operations below. Dest VARIABLE whether
10095 it is aligned to VALUE bytes. If true, jump to the label. */
10097 ix86_expand_aligntest (variable
, value
)
10101 rtx label
= gen_label_rtx ();
10102 rtx tmpcount
= gen_reg_rtx (GET_MODE (variable
));
10103 if (GET_MODE (variable
) == DImode
)
10104 emit_insn (gen_anddi3 (tmpcount
, variable
, GEN_INT (value
)));
10106 emit_insn (gen_andsi3 (tmpcount
, variable
, GEN_INT (value
)));
10107 emit_cmp_and_jump_insns (tmpcount
, const0_rtx
, EQ
, 0, GET_MODE (variable
),
10112 /* Adjust COUNTER by the VALUE. */
10114 ix86_adjust_counter (countreg
, value
)
10116 HOST_WIDE_INT value
;
10118 if (GET_MODE (countreg
) == DImode
)
10119 emit_insn (gen_adddi3 (countreg
, countreg
, GEN_INT (-value
)));
10121 emit_insn (gen_addsi3 (countreg
, countreg
, GEN_INT (-value
)));
10124 /* Zero extend possibly SImode EXP to Pmode register. */
10126 ix86_zero_extend_to_Pmode (exp
)
10130 if (GET_MODE (exp
) == VOIDmode
)
10131 return force_reg (Pmode
, exp
);
10132 if (GET_MODE (exp
) == Pmode
)
10133 return copy_to_mode_reg (Pmode
, exp
);
10134 r
= gen_reg_rtx (Pmode
);
10135 emit_insn (gen_zero_extendsidi2 (r
, exp
));
10139 /* Expand string move (memcpy) operation. Use i386 string operations when
10140 profitable. expand_clrstr contains similar code. */
10142 ix86_expand_movstr (dst
, src
, count_exp
, align_exp
)
10143 rtx dst
, src
, count_exp
, align_exp
;
10145 rtx srcreg
, destreg
, countreg
;
10146 enum machine_mode counter_mode
;
10147 HOST_WIDE_INT align
= 0;
10148 unsigned HOST_WIDE_INT count
= 0;
10153 if (GET_CODE (align_exp
) == CONST_INT
)
10154 align
= INTVAL (align_exp
);
10156 /* This simple hack avoids all inlining code and simplifies code below. */
10157 if (!TARGET_ALIGN_STRINGOPS
)
10160 if (GET_CODE (count_exp
) == CONST_INT
)
10161 count
= INTVAL (count_exp
);
10163 /* Figure out proper mode for counter. For 32bits it is always SImode,
10164 for 64bits use SImode when possible, otherwise DImode.
10165 Set count to number of bytes copied when known at compile time. */
10166 if (!TARGET_64BIT
|| GET_MODE (count_exp
) == SImode
10167 || x86_64_zero_extended_value (count_exp
))
10168 counter_mode
= SImode
;
10170 counter_mode
= DImode
;
10172 if (counter_mode
!= SImode
&& counter_mode
!= DImode
)
10175 destreg
= copy_to_mode_reg (Pmode
, XEXP (dst
, 0));
10176 srcreg
= copy_to_mode_reg (Pmode
, XEXP (src
, 0));
10178 emit_insn (gen_cld ());
10180 /* When optimizing for size emit simple rep ; movsb instruction for
10181 counts not divisible by 4. */
10183 if ((!optimize
|| optimize_size
) && (count
== 0 || (count
& 0x03)))
10185 countreg
= ix86_zero_extend_to_Pmode (count_exp
);
10187 emit_insn (gen_rep_movqi_rex64 (destreg
, srcreg
, countreg
,
10188 destreg
, srcreg
, countreg
));
10190 emit_insn (gen_rep_movqi (destreg
, srcreg
, countreg
,
10191 destreg
, srcreg
, countreg
));
10194 /* For constant aligned (or small unaligned) copies use rep movsl
10195 followed by code copying the rest. For PentiumPro ensure 8 byte
10196 alignment to allow rep movsl acceleration. */
10198 else if (count
!= 0
10200 || (!TARGET_PENTIUMPRO
&& !TARGET_64BIT
&& align
>= 4)
10201 || optimize_size
|| count
< (unsigned int) 64))
10203 int size
= TARGET_64BIT
&& !optimize_size
? 8 : 4;
10204 if (count
& ~(size
- 1))
10206 countreg
= copy_to_mode_reg (counter_mode
,
10207 GEN_INT ((count
>> (size
== 4 ? 2 : 3))
10208 & (TARGET_64BIT
? -1 : 0x3fffffff)));
10209 countreg
= ix86_zero_extend_to_Pmode (countreg
);
10213 emit_insn (gen_rep_movsi_rex64 (destreg
, srcreg
, countreg
,
10214 destreg
, srcreg
, countreg
));
10216 emit_insn (gen_rep_movsi (destreg
, srcreg
, countreg
,
10217 destreg
, srcreg
, countreg
));
10220 emit_insn (gen_rep_movdi_rex64 (destreg
, srcreg
, countreg
,
10221 destreg
, srcreg
, countreg
));
10223 if (size
== 8 && (count
& 0x04))
10224 emit_insn (gen_strmovsi (destreg
, srcreg
));
10226 emit_insn (gen_strmovhi (destreg
, srcreg
));
10228 emit_insn (gen_strmovqi (destreg
, srcreg
));
10230 /* The generic code based on the glibc implementation:
10231 - align destination to 4 bytes (8 byte alignment is used for PentiumPro
10232 allowing accelerated copying there)
10233 - copy the data using rep movsl
10234 - copy the rest. */
10239 int desired_alignment
= (TARGET_PENTIUMPRO
10240 && (count
== 0 || count
>= (unsigned int) 260)
10241 ? 8 : UNITS_PER_WORD
);
10243 /* In case we don't know anything about the alignment, default to
10244 library version, since it is usually equally fast and result in
10246 if (!TARGET_INLINE_ALL_STRINGOPS
&& align
< UNITS_PER_WORD
)
10252 if (TARGET_SINGLE_STRINGOP
)
10253 emit_insn (gen_cld ());
10255 countreg2
= gen_reg_rtx (Pmode
);
10256 countreg
= copy_to_mode_reg (counter_mode
, count_exp
);
10258 /* We don't use loops to align destination and to copy parts smaller
10259 than 4 bytes, because gcc is able to optimize such code better (in
10260 the case the destination or the count really is aligned, gcc is often
10261 able to predict the branches) and also it is friendlier to the
10262 hardware branch prediction.
10264 Using loops is benefical for generic case, because we can
10265 handle small counts using the loops. Many CPUs (such as Athlon)
10266 have large REP prefix setup costs.
10268 This is quite costy. Maybe we can revisit this decision later or
10269 add some customizability to this code. */
10271 if (count
== 0 && align
< desired_alignment
)
10273 label
= gen_label_rtx ();
10274 emit_cmp_and_jump_insns (countreg
, GEN_INT (desired_alignment
- 1),
10275 LEU
, 0, counter_mode
, 1, label
);
10279 rtx label
= ix86_expand_aligntest (destreg
, 1);
10280 emit_insn (gen_strmovqi (destreg
, srcreg
));
10281 ix86_adjust_counter (countreg
, 1);
10282 emit_label (label
);
10283 LABEL_NUSES (label
) = 1;
10287 rtx label
= ix86_expand_aligntest (destreg
, 2);
10288 emit_insn (gen_strmovhi (destreg
, srcreg
));
10289 ix86_adjust_counter (countreg
, 2);
10290 emit_label (label
);
10291 LABEL_NUSES (label
) = 1;
10293 if (align
<= 4 && desired_alignment
> 4)
10295 rtx label
= ix86_expand_aligntest (destreg
, 4);
10296 emit_insn (gen_strmovsi (destreg
, srcreg
));
10297 ix86_adjust_counter (countreg
, 4);
10298 emit_label (label
);
10299 LABEL_NUSES (label
) = 1;
10302 if (label
&& desired_alignment
> 4 && !TARGET_64BIT
)
10304 emit_label (label
);
10305 LABEL_NUSES (label
) = 1;
10308 if (!TARGET_SINGLE_STRINGOP
)
10309 emit_insn (gen_cld ());
10312 emit_insn (gen_lshrdi3 (countreg2
, ix86_zero_extend_to_Pmode (countreg
),
10314 emit_insn (gen_rep_movdi_rex64 (destreg
, srcreg
, countreg2
,
10315 destreg
, srcreg
, countreg2
));
10319 emit_insn (gen_lshrsi3 (countreg2
, countreg
, GEN_INT (2)));
10320 emit_insn (gen_rep_movsi (destreg
, srcreg
, countreg2
,
10321 destreg
, srcreg
, countreg2
));
10326 emit_label (label
);
10327 LABEL_NUSES (label
) = 1;
10329 if (TARGET_64BIT
&& align
> 4 && count
!= 0 && (count
& 4))
10330 emit_insn (gen_strmovsi (destreg
, srcreg
));
10331 if ((align
<= 4 || count
== 0) && TARGET_64BIT
)
10333 rtx label
= ix86_expand_aligntest (countreg
, 4);
10334 emit_insn (gen_strmovsi (destreg
, srcreg
));
10335 emit_label (label
);
10336 LABEL_NUSES (label
) = 1;
10338 if (align
> 2 && count
!= 0 && (count
& 2))
10339 emit_insn (gen_strmovhi (destreg
, srcreg
));
10340 if (align
<= 2 || count
== 0)
10342 rtx label
= ix86_expand_aligntest (countreg
, 2);
10343 emit_insn (gen_strmovhi (destreg
, srcreg
));
10344 emit_label (label
);
10345 LABEL_NUSES (label
) = 1;
10347 if (align
> 1 && count
!= 0 && (count
& 1))
10348 emit_insn (gen_strmovqi (destreg
, srcreg
));
10349 if (align
<= 1 || count
== 0)
10351 rtx label
= ix86_expand_aligntest (countreg
, 1);
10352 emit_insn (gen_strmovqi (destreg
, srcreg
));
10353 emit_label (label
);
10354 LABEL_NUSES (label
) = 1;
10358 insns
= get_insns ();
10361 ix86_set_move_mem_attrs (insns
, dst
, src
, destreg
, srcreg
);
10366 /* Expand string clear operation (bzero). Use i386 string operations when
10367 profitable. expand_movstr contains similar code. */
10369 ix86_expand_clrstr (src
, count_exp
, align_exp
)
10370 rtx src
, count_exp
, align_exp
;
10372 rtx destreg
, zeroreg
, countreg
;
10373 enum machine_mode counter_mode
;
10374 HOST_WIDE_INT align
= 0;
10375 unsigned HOST_WIDE_INT count
= 0;
10377 if (GET_CODE (align_exp
) == CONST_INT
)
10378 align
= INTVAL (align_exp
);
10380 /* This simple hack avoids all inlining code and simplifies code below. */
10381 if (!TARGET_ALIGN_STRINGOPS
)
10384 if (GET_CODE (count_exp
) == CONST_INT
)
10385 count
= INTVAL (count_exp
);
10386 /* Figure out proper mode for counter. For 32bits it is always SImode,
10387 for 64bits use SImode when possible, otherwise DImode.
10388 Set count to number of bytes copied when known at compile time. */
10389 if (!TARGET_64BIT
|| GET_MODE (count_exp
) == SImode
10390 || x86_64_zero_extended_value (count_exp
))
10391 counter_mode
= SImode
;
10393 counter_mode
= DImode
;
10395 destreg
= copy_to_mode_reg (Pmode
, XEXP (src
, 0));
10397 emit_insn (gen_cld ());
10399 /* When optimizing for size emit simple rep ; movsb instruction for
10400 counts not divisible by 4. */
10402 if ((!optimize
|| optimize_size
) && (count
== 0 || (count
& 0x03)))
10404 countreg
= ix86_zero_extend_to_Pmode (count_exp
);
10405 zeroreg
= copy_to_mode_reg (QImode
, const0_rtx
);
10407 emit_insn (gen_rep_stosqi_rex64 (destreg
, countreg
, zeroreg
,
10408 destreg
, countreg
));
10410 emit_insn (gen_rep_stosqi (destreg
, countreg
, zeroreg
,
10411 destreg
, countreg
));
10413 else if (count
!= 0
10415 || (!TARGET_PENTIUMPRO
&& !TARGET_64BIT
&& align
>= 4)
10416 || optimize_size
|| count
< (unsigned int) 64))
10418 int size
= TARGET_64BIT
&& !optimize_size
? 8 : 4;
10419 zeroreg
= copy_to_mode_reg (size
== 4 ? SImode
: DImode
, const0_rtx
);
10420 if (count
& ~(size
- 1))
10422 countreg
= copy_to_mode_reg (counter_mode
,
10423 GEN_INT ((count
>> (size
== 4 ? 2 : 3))
10424 & (TARGET_64BIT
? -1 : 0x3fffffff)));
10425 countreg
= ix86_zero_extend_to_Pmode (countreg
);
10429 emit_insn (gen_rep_stossi_rex64 (destreg
, countreg
, zeroreg
,
10430 destreg
, countreg
));
10432 emit_insn (gen_rep_stossi (destreg
, countreg
, zeroreg
,
10433 destreg
, countreg
));
10436 emit_insn (gen_rep_stosdi_rex64 (destreg
, countreg
, zeroreg
,
10437 destreg
, countreg
));
10439 if (size
== 8 && (count
& 0x04))
10440 emit_insn (gen_strsetsi (destreg
,
10441 gen_rtx_SUBREG (SImode
, zeroreg
, 0)));
10443 emit_insn (gen_strsethi (destreg
,
10444 gen_rtx_SUBREG (HImode
, zeroreg
, 0)));
10446 emit_insn (gen_strsetqi (destreg
,
10447 gen_rtx_SUBREG (QImode
, zeroreg
, 0)));
10453 /* Compute desired alignment of the string operation. */
10454 int desired_alignment
= (TARGET_PENTIUMPRO
10455 && (count
== 0 || count
>= (unsigned int) 260)
10456 ? 8 : UNITS_PER_WORD
);
10458 /* In case we don't know anything about the alignment, default to
10459 library version, since it is usually equally fast and result in
10461 if (!TARGET_INLINE_ALL_STRINGOPS
&& align
< UNITS_PER_WORD
)
10464 if (TARGET_SINGLE_STRINGOP
)
10465 emit_insn (gen_cld ());
10467 countreg2
= gen_reg_rtx (Pmode
);
10468 countreg
= copy_to_mode_reg (counter_mode
, count_exp
);
10469 zeroreg
= copy_to_mode_reg (Pmode
, const0_rtx
);
10471 if (count
== 0 && align
< desired_alignment
)
10473 label
= gen_label_rtx ();
10474 emit_cmp_and_jump_insns (countreg
, GEN_INT (desired_alignment
- 1),
10475 LEU
, 0, counter_mode
, 1, label
);
10479 rtx label
= ix86_expand_aligntest (destreg
, 1);
10480 emit_insn (gen_strsetqi (destreg
,
10481 gen_rtx_SUBREG (QImode
, zeroreg
, 0)));
10482 ix86_adjust_counter (countreg
, 1);
10483 emit_label (label
);
10484 LABEL_NUSES (label
) = 1;
10488 rtx label
= ix86_expand_aligntest (destreg
, 2);
10489 emit_insn (gen_strsethi (destreg
,
10490 gen_rtx_SUBREG (HImode
, zeroreg
, 0)));
10491 ix86_adjust_counter (countreg
, 2);
10492 emit_label (label
);
10493 LABEL_NUSES (label
) = 1;
10495 if (align
<= 4 && desired_alignment
> 4)
10497 rtx label
= ix86_expand_aligntest (destreg
, 4);
10498 emit_insn (gen_strsetsi (destreg
, (TARGET_64BIT
10499 ? gen_rtx_SUBREG (SImode
, zeroreg
, 0)
10501 ix86_adjust_counter (countreg
, 4);
10502 emit_label (label
);
10503 LABEL_NUSES (label
) = 1;
10506 if (label
&& desired_alignment
> 4 && !TARGET_64BIT
)
10508 emit_label (label
);
10509 LABEL_NUSES (label
) = 1;
10513 if (!TARGET_SINGLE_STRINGOP
)
10514 emit_insn (gen_cld ());
10517 emit_insn (gen_lshrdi3 (countreg2
, ix86_zero_extend_to_Pmode (countreg
),
10519 emit_insn (gen_rep_stosdi_rex64 (destreg
, countreg2
, zeroreg
,
10520 destreg
, countreg2
));
10524 emit_insn (gen_lshrsi3 (countreg2
, countreg
, GEN_INT (2)));
10525 emit_insn (gen_rep_stossi (destreg
, countreg2
, zeroreg
,
10526 destreg
, countreg2
));
10530 emit_label (label
);
10531 LABEL_NUSES (label
) = 1;
10534 if (TARGET_64BIT
&& align
> 4 && count
!= 0 && (count
& 4))
10535 emit_insn (gen_strsetsi (destreg
,
10536 gen_rtx_SUBREG (SImode
, zeroreg
, 0)));
10537 if (TARGET_64BIT
&& (align
<= 4 || count
== 0))
10539 rtx label
= ix86_expand_aligntest (countreg
, 4);
10540 emit_insn (gen_strsetsi (destreg
,
10541 gen_rtx_SUBREG (SImode
, zeroreg
, 0)));
10542 emit_label (label
);
10543 LABEL_NUSES (label
) = 1;
10545 if (align
> 2 && count
!= 0 && (count
& 2))
10546 emit_insn (gen_strsethi (destreg
,
10547 gen_rtx_SUBREG (HImode
, zeroreg
, 0)));
10548 if (align
<= 2 || count
== 0)
10550 rtx label
= ix86_expand_aligntest (countreg
, 2);
10551 emit_insn (gen_strsethi (destreg
,
10552 gen_rtx_SUBREG (HImode
, zeroreg
, 0)));
10553 emit_label (label
);
10554 LABEL_NUSES (label
) = 1;
10556 if (align
> 1 && count
!= 0 && (count
& 1))
10557 emit_insn (gen_strsetqi (destreg
,
10558 gen_rtx_SUBREG (QImode
, zeroreg
, 0)));
10559 if (align
<= 1 || count
== 0)
10561 rtx label
= ix86_expand_aligntest (countreg
, 1);
10562 emit_insn (gen_strsetqi (destreg
,
10563 gen_rtx_SUBREG (QImode
, zeroreg
, 0)));
10564 emit_label (label
);
10565 LABEL_NUSES (label
) = 1;
10570 /* Expand strlen. */
10572 ix86_expand_strlen (out
, src
, eoschar
, align
)
10573 rtx out
, src
, eoschar
, align
;
10575 rtx addr
, scratch1
, scratch2
, scratch3
, scratch4
;
10577 /* The generic case of strlen expander is long. Avoid it's
10578 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
10580 if (TARGET_UNROLL_STRLEN
&& eoschar
== const0_rtx
&& optimize
> 1
10581 && !TARGET_INLINE_ALL_STRINGOPS
10583 && (GET_CODE (align
) != CONST_INT
|| INTVAL (align
) < 4))
10586 addr
= force_reg (Pmode
, XEXP (src
, 0));
10587 scratch1
= gen_reg_rtx (Pmode
);
10589 if (TARGET_UNROLL_STRLEN
&& eoschar
== const0_rtx
&& optimize
> 1
10592 /* Well it seems that some optimizer does not combine a call like
10593 foo(strlen(bar), strlen(bar));
10594 when the move and the subtraction is done here. It does calculate
10595 the length just once when these instructions are done inside of
10596 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
10597 often used and I use one fewer register for the lifetime of
10598 output_strlen_unroll() this is better. */
10600 emit_move_insn (out
, addr
);
10602 ix86_expand_strlensi_unroll_1 (out
, align
);
10604 /* strlensi_unroll_1 returns the address of the zero at the end of
10605 the string, like memchr(), so compute the length by subtracting
10606 the start address. */
10608 emit_insn (gen_subdi3 (out
, out
, addr
));
10610 emit_insn (gen_subsi3 (out
, out
, addr
));
10614 scratch2
= gen_reg_rtx (Pmode
);
10615 scratch3
= gen_reg_rtx (Pmode
);
10616 scratch4
= force_reg (Pmode
, constm1_rtx
);
10618 emit_move_insn (scratch3
, addr
);
10619 eoschar
= force_reg (QImode
, eoschar
);
10621 emit_insn (gen_cld ());
10624 emit_insn (gen_strlenqi_rex_1 (scratch1
, scratch3
, eoschar
,
10625 align
, scratch4
, scratch3
));
10626 emit_insn (gen_one_cmpldi2 (scratch2
, scratch1
));
10627 emit_insn (gen_adddi3 (out
, scratch2
, constm1_rtx
));
10631 emit_insn (gen_strlenqi_1 (scratch1
, scratch3
, eoschar
,
10632 align
, scratch4
, scratch3
));
10633 emit_insn (gen_one_cmplsi2 (scratch2
, scratch1
));
10634 emit_insn (gen_addsi3 (out
, scratch2
, constm1_rtx
));
10640 /* Expand the appropriate insns for doing strlen if not just doing
10643 out = result, initialized with the start address
10644 align_rtx = alignment of the address.
10645 scratch = scratch register, initialized with the startaddress when
10646 not aligned, otherwise undefined
10648 This is just the body. It needs the initialisations mentioned above and
10649 some address computing at the end. These things are done in i386.md. */
10652 ix86_expand_strlensi_unroll_1 (out
, align_rtx
)
10653 rtx out
, align_rtx
;
10657 rtx align_2_label
= NULL_RTX
;
10658 rtx align_3_label
= NULL_RTX
;
10659 rtx align_4_label
= gen_label_rtx ();
10660 rtx end_0_label
= gen_label_rtx ();
10662 rtx tmpreg
= gen_reg_rtx (SImode
);
10663 rtx scratch
= gen_reg_rtx (SImode
);
10666 if (GET_CODE (align_rtx
) == CONST_INT
)
10667 align
= INTVAL (align_rtx
);
10669 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
10671 /* Is there a known alignment and is it less than 4? */
10674 rtx scratch1
= gen_reg_rtx (Pmode
);
10675 emit_move_insn (scratch1
, out
);
10676 /* Is there a known alignment and is it not 2? */
10679 align_3_label
= gen_label_rtx (); /* Label when aligned to 3-byte */
10680 align_2_label
= gen_label_rtx (); /* Label when aligned to 2-byte */
10682 /* Leave just the 3 lower bits. */
10683 align_rtx
= expand_binop (Pmode
, and_optab
, scratch1
, GEN_INT (3),
10684 NULL_RTX
, 0, OPTAB_WIDEN
);
10686 emit_cmp_and_jump_insns (align_rtx
, const0_rtx
, EQ
, NULL
,
10687 Pmode
, 1, align_4_label
);
10688 emit_cmp_and_jump_insns (align_rtx
, GEN_INT (2), EQ
, NULL
,
10689 Pmode
, 1, align_2_label
);
10690 emit_cmp_and_jump_insns (align_rtx
, GEN_INT (2), GTU
, NULL
,
10691 Pmode
, 1, align_3_label
);
10695 /* Since the alignment is 2, we have to check 2 or 0 bytes;
10696 check if is aligned to 4 - byte. */
10698 align_rtx
= expand_binop (Pmode
, and_optab
, scratch1
, GEN_INT (2),
10699 NULL_RTX
, 0, OPTAB_WIDEN
);
10701 emit_cmp_and_jump_insns (align_rtx
, const0_rtx
, EQ
, NULL
,
10702 Pmode
, 1, align_4_label
);
10705 mem
= gen_rtx_MEM (QImode
, out
);
10707 /* Now compare the bytes. */
10709 /* Compare the first n unaligned byte on a byte per byte basis. */
10710 emit_cmp_and_jump_insns (mem
, const0_rtx
, EQ
, NULL
,
10711 QImode
, 1, end_0_label
);
10713 /* Increment the address. */
10715 emit_insn (gen_adddi3 (out
, out
, const1_rtx
));
10717 emit_insn (gen_addsi3 (out
, out
, const1_rtx
));
10719 /* Not needed with an alignment of 2 */
10722 emit_label (align_2_label
);
10724 emit_cmp_and_jump_insns (mem
, const0_rtx
, EQ
, NULL
, QImode
, 1,
10728 emit_insn (gen_adddi3 (out
, out
, const1_rtx
));
10730 emit_insn (gen_addsi3 (out
, out
, const1_rtx
));
10732 emit_label (align_3_label
);
10735 emit_cmp_and_jump_insns (mem
, const0_rtx
, EQ
, NULL
, QImode
, 1,
10739 emit_insn (gen_adddi3 (out
, out
, const1_rtx
));
10741 emit_insn (gen_addsi3 (out
, out
, const1_rtx
));
10744 /* Generate loop to check 4 bytes at a time. It is not a good idea to
10745 align this loop. It gives only huge programs, but does not help to
10747 emit_label (align_4_label
);
10749 mem
= gen_rtx_MEM (SImode
, out
);
10750 emit_move_insn (scratch
, mem
);
10752 emit_insn (gen_adddi3 (out
, out
, GEN_INT (4)));
10754 emit_insn (gen_addsi3 (out
, out
, GEN_INT (4)));
10756 /* This formula yields a nonzero result iff one of the bytes is zero.
10757 This saves three branches inside loop and many cycles. */
10759 emit_insn (gen_addsi3 (tmpreg
, scratch
, GEN_INT (-0x01010101)));
10760 emit_insn (gen_one_cmplsi2 (scratch
, scratch
));
10761 emit_insn (gen_andsi3 (tmpreg
, tmpreg
, scratch
));
10762 emit_insn (gen_andsi3 (tmpreg
, tmpreg
,
10763 gen_int_mode (0x80808080, SImode
)));
10764 emit_cmp_and_jump_insns (tmpreg
, const0_rtx
, EQ
, 0, SImode
, 1,
10769 rtx reg
= gen_reg_rtx (SImode
);
10770 rtx reg2
= gen_reg_rtx (Pmode
);
10771 emit_move_insn (reg
, tmpreg
);
10772 emit_insn (gen_lshrsi3 (reg
, reg
, GEN_INT (16)));
10774 /* If zero is not in the first two bytes, move two bytes forward. */
10775 emit_insn (gen_testsi_ccno_1 (tmpreg
, GEN_INT (0x8080)));
10776 tmp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
10777 tmp
= gen_rtx_EQ (VOIDmode
, tmp
, const0_rtx
);
10778 emit_insn (gen_rtx_SET (VOIDmode
, tmpreg
,
10779 gen_rtx_IF_THEN_ELSE (SImode
, tmp
,
10782 /* Emit lea manually to avoid clobbering of flags. */
10783 emit_insn (gen_rtx_SET (SImode
, reg2
,
10784 gen_rtx_PLUS (Pmode
, out
, GEN_INT (2))));
10786 tmp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
10787 tmp
= gen_rtx_EQ (VOIDmode
, tmp
, const0_rtx
);
10788 emit_insn (gen_rtx_SET (VOIDmode
, out
,
10789 gen_rtx_IF_THEN_ELSE (Pmode
, tmp
,
10796 rtx end_2_label
= gen_label_rtx ();
10797 /* Is zero in the first two bytes? */
10799 emit_insn (gen_testsi_ccno_1 (tmpreg
, GEN_INT (0x8080)));
10800 tmp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
10801 tmp
= gen_rtx_NE (VOIDmode
, tmp
, const0_rtx
);
10802 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
10803 gen_rtx_LABEL_REF (VOIDmode
, end_2_label
),
10805 tmp
= emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
));
10806 JUMP_LABEL (tmp
) = end_2_label
;
10808 /* Not in the first two. Move two bytes forward. */
10809 emit_insn (gen_lshrsi3 (tmpreg
, tmpreg
, GEN_INT (16)));
10811 emit_insn (gen_adddi3 (out
, out
, GEN_INT (2)));
10813 emit_insn (gen_addsi3 (out
, out
, GEN_INT (2)));
10815 emit_label (end_2_label
);
10819 /* Avoid branch in fixing the byte. */
10820 tmpreg
= gen_lowpart (QImode
, tmpreg
);
10821 emit_insn (gen_addqi3_cc (tmpreg
, tmpreg
, tmpreg
));
10823 emit_insn (gen_subdi3_carry_rex64 (out
, out
, GEN_INT (3)));
10825 emit_insn (gen_subsi3_carry (out
, out
, GEN_INT (3)));
10827 emit_label (end_0_label
);
10831 ix86_expand_call (retval
, fnaddr
, callarg1
, callarg2
, pop
)
10832 rtx retval
, fnaddr
, callarg1
, callarg2
, pop
;
10834 rtx use
= NULL
, call
;
10836 if (pop
== const0_rtx
)
10838 if (TARGET_64BIT
&& pop
)
10842 if (flag_pic
&& GET_CODE (XEXP (fnaddr
, 0)) == SYMBOL_REF
)
10843 fnaddr
= machopic_indirect_call_target (fnaddr
);
10845 /* Static functions and indirect calls don't need the pic register. */
10846 if (! TARGET_64BIT
&& flag_pic
10847 && GET_CODE (XEXP (fnaddr
, 0)) == SYMBOL_REF
10848 && ! SYMBOL_REF_FLAG (XEXP (fnaddr
, 0)))
10849 use_reg (&use
, pic_offset_table_rtx
);
10851 if (TARGET_64BIT
&& INTVAL (callarg2
) >= 0)
10853 rtx al
= gen_rtx_REG (QImode
, 0);
10854 emit_move_insn (al
, callarg2
);
10855 use_reg (&use
, al
);
10857 #endif /* TARGET_MACHO */
10859 if (! call_insn_operand (XEXP (fnaddr
, 0), Pmode
))
10861 fnaddr
= copy_to_mode_reg (Pmode
, XEXP (fnaddr
, 0));
10862 fnaddr
= gen_rtx_MEM (QImode
, fnaddr
);
10865 call
= gen_rtx_CALL (VOIDmode
, fnaddr
, callarg1
);
10867 call
= gen_rtx_SET (VOIDmode
, retval
, call
);
10870 pop
= gen_rtx_PLUS (Pmode
, stack_pointer_rtx
, pop
);
10871 pop
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, pop
);
10872 call
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, call
, pop
));
10875 call
= emit_call_insn (call
);
10877 CALL_INSN_FUNCTION_USAGE (call
) = use
;
10881 /* Clear stack slot assignments remembered from previous functions.
10882 This is called from INIT_EXPANDERS once before RTL is emitted for each
10885 static struct machine_function
*
10886 ix86_init_machine_status ()
10888 return ggc_alloc_cleared (sizeof (struct machine_function
));
10891 /* Return a MEM corresponding to a stack slot with mode MODE.
10892 Allocate a new slot if necessary.
10894 The RTL for a function can have several slots available: N is
10895 which slot to use. */
10898 assign_386_stack_local (mode
, n
)
10899 enum machine_mode mode
;
10902 if (n
< 0 || n
>= MAX_386_STACK_LOCALS
)
10905 if (ix86_stack_locals
[(int) mode
][n
] == NULL_RTX
)
10906 ix86_stack_locals
[(int) mode
][n
]
10907 = assign_stack_local (mode
, GET_MODE_SIZE (mode
), 0);
10909 return ix86_stack_locals
[(int) mode
][n
];
10912 /* Construct the SYMBOL_REF for the tls_get_addr function. */
10914 static GTY(()) rtx ix86_tls_symbol
;
10916 ix86_tls_get_addr ()
10919 if (!ix86_tls_symbol
)
10921 ix86_tls_symbol
= gen_rtx_SYMBOL_REF (Pmode
,
10922 (TARGET_GNU_TLS
&& !TARGET_64BIT
)
10923 ? "___tls_get_addr"
10924 : "__tls_get_addr");
10927 return ix86_tls_symbol
;
10930 /* Calculate the length of the memory address in the instruction
10931 encoding. Does not include the one-byte modrm, opcode, or prefix. */
10934 memory_address_length (addr
)
10937 struct ix86_address parts
;
10938 rtx base
, index
, disp
;
10941 if (GET_CODE (addr
) == PRE_DEC
10942 || GET_CODE (addr
) == POST_INC
10943 || GET_CODE (addr
) == PRE_MODIFY
10944 || GET_CODE (addr
) == POST_MODIFY
)
10947 if (! ix86_decompose_address (addr
, &parts
))
10951 index
= parts
.index
;
10955 /* Register Indirect. */
10956 if (base
&& !index
&& !disp
)
10958 /* Special cases: ebp and esp need the two-byte modrm form. */
10959 if (addr
== stack_pointer_rtx
10960 || addr
== arg_pointer_rtx
10961 || addr
== frame_pointer_rtx
10962 || addr
== hard_frame_pointer_rtx
)
10966 /* Direct Addressing. */
10967 else if (disp
&& !base
&& !index
)
10972 /* Find the length of the displacement constant. */
10975 if (GET_CODE (disp
) == CONST_INT
10976 && CONST_OK_FOR_LETTER_P (INTVAL (disp
), 'K'))
10982 /* An index requires the two-byte modrm form. */
10990 /* Compute default value for "length_immediate" attribute. When SHORTFORM
10991 is set, expect that insn have 8bit immediate alternative. */
10993 ix86_attr_length_immediate_default (insn
, shortform
)
10999 extract_insn_cached (insn
);
11000 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
11001 if (CONSTANT_P (recog_data
.operand
[i
]))
11006 && GET_CODE (recog_data
.operand
[i
]) == CONST_INT
11007 && CONST_OK_FOR_LETTER_P (INTVAL (recog_data
.operand
[i
]), 'K'))
11011 switch (get_attr_mode (insn
))
11022 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
11027 fatal_insn ("unknown insn mode", insn
);
11033 /* Compute default value for "length_address" attribute. */
11035 ix86_attr_length_address_default (insn
)
11039 extract_insn_cached (insn
);
11040 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
11041 if (GET_CODE (recog_data
.operand
[i
]) == MEM
)
11043 return memory_address_length (XEXP (recog_data
.operand
[i
], 0));
11049 /* Return the maximum number of instructions a cpu can issue. */
11056 case PROCESSOR_PENTIUM
:
11060 case PROCESSOR_PENTIUMPRO
:
11061 case PROCESSOR_PENTIUM4
:
11062 case PROCESSOR_ATHLON
:
11070 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
11071 by DEP_INSN and nothing set by DEP_INSN. */
11074 ix86_flags_dependant (insn
, dep_insn
, insn_type
)
11075 rtx insn
, dep_insn
;
11076 enum attr_type insn_type
;
11080 /* Simplify the test for uninteresting insns. */
11081 if (insn_type
!= TYPE_SETCC
11082 && insn_type
!= TYPE_ICMOV
11083 && insn_type
!= TYPE_FCMOV
11084 && insn_type
!= TYPE_IBR
)
11087 if ((set
= single_set (dep_insn
)) != 0)
11089 set
= SET_DEST (set
);
11092 else if (GET_CODE (PATTERN (dep_insn
)) == PARALLEL
11093 && XVECLEN (PATTERN (dep_insn
), 0) == 2
11094 && GET_CODE (XVECEXP (PATTERN (dep_insn
), 0, 0)) == SET
11095 && GET_CODE (XVECEXP (PATTERN (dep_insn
), 0, 1)) == SET
)
11097 set
= SET_DEST (XVECEXP (PATTERN (dep_insn
), 0, 0));
11098 set2
= SET_DEST (XVECEXP (PATTERN (dep_insn
), 0, 0));
11103 if (GET_CODE (set
) != REG
|| REGNO (set
) != FLAGS_REG
)
11106 /* This test is true if the dependent insn reads the flags but
11107 not any other potentially set register. */
11108 if (!reg_overlap_mentioned_p (set
, PATTERN (insn
)))
11111 if (set2
&& reg_overlap_mentioned_p (set2
, PATTERN (insn
)))
11117 /* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
11118 address with operands set by DEP_INSN. */
11121 ix86_agi_dependant (insn
, dep_insn
, insn_type
)
11122 rtx insn
, dep_insn
;
11123 enum attr_type insn_type
;
11127 if (insn_type
== TYPE_LEA
11130 addr
= PATTERN (insn
);
11131 if (GET_CODE (addr
) == SET
)
11133 else if (GET_CODE (addr
) == PARALLEL
11134 && GET_CODE (XVECEXP (addr
, 0, 0)) == SET
)
11135 addr
= XVECEXP (addr
, 0, 0);
11138 addr
= SET_SRC (addr
);
11143 extract_insn_cached (insn
);
11144 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
11145 if (GET_CODE (recog_data
.operand
[i
]) == MEM
)
11147 addr
= XEXP (recog_data
.operand
[i
], 0);
11154 return modified_in_p (addr
, dep_insn
);
11158 ix86_adjust_cost (insn
, link
, dep_insn
, cost
)
11159 rtx insn
, link
, dep_insn
;
11162 enum attr_type insn_type
, dep_insn_type
;
11163 enum attr_memory memory
, dep_memory
;
11165 int dep_insn_code_number
;
11167 /* Anti and output depenancies have zero cost on all CPUs. */
11168 if (REG_NOTE_KIND (link
) != 0)
11171 dep_insn_code_number
= recog_memoized (dep_insn
);
11173 /* If we can't recognize the insns, we can't really do anything. */
11174 if (dep_insn_code_number
< 0 || recog_memoized (insn
) < 0)
11177 insn_type
= get_attr_type (insn
);
11178 dep_insn_type
= get_attr_type (dep_insn
);
11182 case PROCESSOR_PENTIUM
:
11183 /* Address Generation Interlock adds a cycle of latency. */
11184 if (ix86_agi_dependant (insn
, dep_insn
, insn_type
))
11187 /* ??? Compares pair with jump/setcc. */
11188 if (ix86_flags_dependant (insn
, dep_insn
, insn_type
))
11191 /* Floating point stores require value to be ready one cycle ealier. */
11192 if (insn_type
== TYPE_FMOV
11193 && get_attr_memory (insn
) == MEMORY_STORE
11194 && !ix86_agi_dependant (insn
, dep_insn
, insn_type
))
11198 case PROCESSOR_PENTIUMPRO
:
11199 memory
= get_attr_memory (insn
);
11200 dep_memory
= get_attr_memory (dep_insn
);
11202 /* Since we can't represent delayed latencies of load+operation,
11203 increase the cost here for non-imov insns. */
11204 if (dep_insn_type
!= TYPE_IMOV
11205 && dep_insn_type
!= TYPE_FMOV
11206 && (dep_memory
== MEMORY_LOAD
|| dep_memory
== MEMORY_BOTH
))
11209 /* INT->FP conversion is expensive. */
11210 if (get_attr_fp_int_src (dep_insn
))
11213 /* There is one cycle extra latency between an FP op and a store. */
11214 if (insn_type
== TYPE_FMOV
11215 && (set
= single_set (dep_insn
)) != NULL_RTX
11216 && (set2
= single_set (insn
)) != NULL_RTX
11217 && rtx_equal_p (SET_DEST (set
), SET_SRC (set2
))
11218 && GET_CODE (SET_DEST (set2
)) == MEM
)
11221 /* Show ability of reorder buffer to hide latency of load by executing
11222 in parallel with previous instruction in case
11223 previous instruction is not needed to compute the address. */
11224 if ((memory
== MEMORY_LOAD
|| memory
== MEMORY_BOTH
)
11225 && !ix86_agi_dependant (insn
, dep_insn
, insn_type
))
11227 /* Claim moves to take one cycle, as core can issue one load
11228 at time and the next load can start cycle later. */
11229 if (dep_insn_type
== TYPE_IMOV
11230 || dep_insn_type
== TYPE_FMOV
)
11238 memory
= get_attr_memory (insn
);
11239 dep_memory
= get_attr_memory (dep_insn
);
11240 /* The esp dependency is resolved before the instruction is really
11242 if ((insn_type
== TYPE_PUSH
|| insn_type
== TYPE_POP
)
11243 && (dep_insn_type
== TYPE_PUSH
|| dep_insn_type
== TYPE_POP
))
11246 /* Since we can't represent delayed latencies of load+operation,
11247 increase the cost here for non-imov insns. */
11248 if (dep_memory
== MEMORY_LOAD
|| dep_memory
== MEMORY_BOTH
)
11249 cost
+= (dep_insn_type
!= TYPE_IMOV
) ? 2 : 1;
11251 /* INT->FP conversion is expensive. */
11252 if (get_attr_fp_int_src (dep_insn
))
11255 /* Show ability of reorder buffer to hide latency of load by executing
11256 in parallel with previous instruction in case
11257 previous instruction is not needed to compute the address. */
11258 if ((memory
== MEMORY_LOAD
|| memory
== MEMORY_BOTH
)
11259 && !ix86_agi_dependant (insn
, dep_insn
, insn_type
))
11261 /* Claim moves to take one cycle, as core can issue one load
11262 at time and the next load can start cycle later. */
11263 if (dep_insn_type
== TYPE_IMOV
11264 || dep_insn_type
== TYPE_FMOV
)
11273 case PROCESSOR_ATHLON
:
11274 memory
= get_attr_memory (insn
);
11275 dep_memory
= get_attr_memory (dep_insn
);
11277 if (dep_memory
== MEMORY_LOAD
|| dep_memory
== MEMORY_BOTH
)
11279 if (dep_insn_type
== TYPE_IMOV
|| dep_insn_type
== TYPE_FMOV
)
11284 /* Show ability of reorder buffer to hide latency of load by executing
11285 in parallel with previous instruction in case
11286 previous instruction is not needed to compute the address. */
11287 if ((memory
== MEMORY_LOAD
|| memory
== MEMORY_BOTH
)
11288 && !ix86_agi_dependant (insn
, dep_insn
, insn_type
))
11290 /* Claim moves to take one cycle, as core can issue one load
11291 at time and the next load can start cycle later. */
11292 if (dep_insn_type
== TYPE_IMOV
11293 || dep_insn_type
== TYPE_FMOV
)
11295 else if (cost
>= 3)
11310 struct ppro_sched_data
11313 int issued_this_cycle
;
11317 static enum attr_ppro_uops
11318 ix86_safe_ppro_uops (insn
)
11321 if (recog_memoized (insn
) >= 0)
11322 return get_attr_ppro_uops (insn
);
11324 return PPRO_UOPS_MANY
;
11328 ix86_dump_ppro_packet (dump
)
11331 if (ix86_sched_data
.ppro
.decode
[0])
11333 fprintf (dump
, "PPRO packet: %d",
11334 INSN_UID (ix86_sched_data
.ppro
.decode
[0]));
11335 if (ix86_sched_data
.ppro
.decode
[1])
11336 fprintf (dump
, " %d", INSN_UID (ix86_sched_data
.ppro
.decode
[1]));
11337 if (ix86_sched_data
.ppro
.decode
[2])
11338 fprintf (dump
, " %d", INSN_UID (ix86_sched_data
.ppro
.decode
[2]));
11339 fputc ('\n', dump
);
11343 /* We're beginning a new block. Initialize data structures as necessary. */
11346 ix86_sched_init (dump
, sched_verbose
, veclen
)
11347 FILE *dump ATTRIBUTE_UNUSED
;
11348 int sched_verbose ATTRIBUTE_UNUSED
;
11349 int veclen ATTRIBUTE_UNUSED
;
11351 memset (&ix86_sched_data
, 0, sizeof (ix86_sched_data
));
11354 /* Shift INSN to SLOT, and shift everything else down. */
11357 ix86_reorder_insn (insnp
, slot
)
11364 insnp
[0] = insnp
[1];
11365 while (++insnp
!= slot
);
11371 ix86_sched_reorder_ppro (ready
, e_ready
)
11376 enum attr_ppro_uops cur_uops
;
11377 int issued_this_cycle
;
11381 /* At this point .ppro.decode contains the state of the three
11382 decoders from last "cycle". That is, those insns that were
11383 actually independent. But here we're scheduling for the
11384 decoder, and we may find things that are decodable in the
11387 memcpy (decode
, ix86_sched_data
.ppro
.decode
, sizeof (decode
));
11388 issued_this_cycle
= 0;
11391 cur_uops
= ix86_safe_ppro_uops (*insnp
);
11393 /* If the decoders are empty, and we've a complex insn at the
11394 head of the priority queue, let it issue without complaint. */
11395 if (decode
[0] == NULL
)
11397 if (cur_uops
== PPRO_UOPS_MANY
)
11399 decode
[0] = *insnp
;
11403 /* Otherwise, search for a 2-4 uop unsn to issue. */
11404 while (cur_uops
!= PPRO_UOPS_FEW
)
11406 if (insnp
== ready
)
11408 cur_uops
= ix86_safe_ppro_uops (*--insnp
);
11411 /* If so, move it to the head of the line. */
11412 if (cur_uops
== PPRO_UOPS_FEW
)
11413 ix86_reorder_insn (insnp
, e_ready
);
11415 /* Issue the head of the queue. */
11416 issued_this_cycle
= 1;
11417 decode
[0] = *e_ready
--;
11420 /* Look for simple insns to fill in the other two slots. */
11421 for (i
= 1; i
< 3; ++i
)
11422 if (decode
[i
] == NULL
)
11424 if (ready
> e_ready
)
11428 cur_uops
= ix86_safe_ppro_uops (*insnp
);
11429 while (cur_uops
!= PPRO_UOPS_ONE
)
11431 if (insnp
== ready
)
11433 cur_uops
= ix86_safe_ppro_uops (*--insnp
);
11436 /* Found one. Move it to the head of the queue and issue it. */
11437 if (cur_uops
== PPRO_UOPS_ONE
)
11439 ix86_reorder_insn (insnp
, e_ready
);
11440 decode
[i
] = *e_ready
--;
11441 issued_this_cycle
++;
11445 /* ??? Didn't find one. Ideally, here we would do a lazy split
11446 of 2-uop insns, issue one and queue the other. */
11450 if (issued_this_cycle
== 0)
11451 issued_this_cycle
= 1;
11452 ix86_sched_data
.ppro
.issued_this_cycle
= issued_this_cycle
;
11455 /* We are about to being issuing insns for this clock cycle.
11456 Override the default sort algorithm to better slot instructions. */
11458 ix86_sched_reorder (dump
, sched_verbose
, ready
, n_readyp
, clock_var
)
11459 FILE *dump ATTRIBUTE_UNUSED
;
11460 int sched_verbose ATTRIBUTE_UNUSED
;
11463 int clock_var ATTRIBUTE_UNUSED
;
11465 int n_ready
= *n_readyp
;
11466 rtx
*e_ready
= ready
+ n_ready
- 1;
11468 /* Make sure to go ahead and initialize key items in
11469 ix86_sched_data if we are not going to bother trying to
11470 reorder the ready queue. */
11473 ix86_sched_data
.ppro
.issued_this_cycle
= 1;
11482 case PROCESSOR_PENTIUMPRO
:
11483 ix86_sched_reorder_ppro (ready
, e_ready
);
11488 return ix86_issue_rate ();
11491 /* We are about to issue INSN. Return the number of insns left on the
11492 ready queue that can be issued this cycle. */
11495 ix86_variable_issue (dump
, sched_verbose
, insn
, can_issue_more
)
11499 int can_issue_more
;
11505 return can_issue_more
- 1;
11507 case PROCESSOR_PENTIUMPRO
:
11509 enum attr_ppro_uops uops
= ix86_safe_ppro_uops (insn
);
11511 if (uops
== PPRO_UOPS_MANY
)
11514 ix86_dump_ppro_packet (dump
);
11515 ix86_sched_data
.ppro
.decode
[0] = insn
;
11516 ix86_sched_data
.ppro
.decode
[1] = NULL
;
11517 ix86_sched_data
.ppro
.decode
[2] = NULL
;
11519 ix86_dump_ppro_packet (dump
);
11520 ix86_sched_data
.ppro
.decode
[0] = NULL
;
11522 else if (uops
== PPRO_UOPS_FEW
)
11525 ix86_dump_ppro_packet (dump
);
11526 ix86_sched_data
.ppro
.decode
[0] = insn
;
11527 ix86_sched_data
.ppro
.decode
[1] = NULL
;
11528 ix86_sched_data
.ppro
.decode
[2] = NULL
;
11532 for (i
= 0; i
< 3; ++i
)
11533 if (ix86_sched_data
.ppro
.decode
[i
] == NULL
)
11535 ix86_sched_data
.ppro
.decode
[i
] = insn
;
11543 ix86_dump_ppro_packet (dump
);
11544 ix86_sched_data
.ppro
.decode
[0] = NULL
;
11545 ix86_sched_data
.ppro
.decode
[1] = NULL
;
11546 ix86_sched_data
.ppro
.decode
[2] = NULL
;
11550 return --ix86_sched_data
.ppro
.issued_this_cycle
;
11555 ia32_use_dfa_pipeline_interface ()
11557 if (ix86_cpu
== PROCESSOR_PENTIUM
)
11562 /* How many alternative schedules to try. This should be as wide as the
11563 scheduling freedom in the DFA, but no wider. Making this value too
11564 large results extra work for the scheduler. */
11567 ia32_multipass_dfa_lookahead ()
11569 if (ix86_cpu
== PROCESSOR_PENTIUM
)
11576 /* Walk through INSNS and look for MEM references whose address is DSTREG or
11577 SRCREG and set the memory attribute to those of DSTREF and SRCREF, as
11581 ix86_set_move_mem_attrs (insns
, dstref
, srcref
, dstreg
, srcreg
)
11583 rtx dstref
, srcref
, dstreg
, srcreg
;
11587 for (insn
= insns
; insn
!= 0 ; insn
= NEXT_INSN (insn
))
11589 ix86_set_move_mem_attrs_1 (PATTERN (insn
), dstref
, srcref
,
11593 /* Subroutine of above to actually do the updating by recursively walking
11597 ix86_set_move_mem_attrs_1 (x
, dstref
, srcref
, dstreg
, srcreg
)
11599 rtx dstref
, srcref
, dstreg
, srcreg
;
11601 enum rtx_code code
= GET_CODE (x
);
11602 const char *format_ptr
= GET_RTX_FORMAT (code
);
11605 if (code
== MEM
&& XEXP (x
, 0) == dstreg
)
11606 MEM_COPY_ATTRIBUTES (x
, dstref
);
11607 else if (code
== MEM
&& XEXP (x
, 0) == srcreg
)
11608 MEM_COPY_ATTRIBUTES (x
, srcref
);
11610 for (i
= 0; i
< GET_RTX_LENGTH (code
); i
++, format_ptr
++)
11612 if (*format_ptr
== 'e')
11613 ix86_set_move_mem_attrs_1 (XEXP (x
, i
), dstref
, srcref
,
11615 else if (*format_ptr
== 'E')
11616 for (j
= XVECLEN (x
, i
) - 1; j
>= 0; j
--)
11617 ix86_set_move_mem_attrs_1 (XVECEXP (x
, i
, j
), dstref
, srcref
,
11622 /* Compute the alignment given to a constant that is being placed in memory.
11623 EXP is the constant and ALIGN is the alignment that the object would
11625 The value of this function is used instead of that alignment to align
11629 ix86_constant_alignment (exp
, align
)
11633 if (TREE_CODE (exp
) == REAL_CST
)
11635 if (TYPE_MODE (TREE_TYPE (exp
)) == DFmode
&& align
< 64)
11637 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp
))) && align
< 128)
11640 else if (TREE_CODE (exp
) == STRING_CST
&& TREE_STRING_LENGTH (exp
) >= 31
11647 /* Compute the alignment for a static variable.
11648 TYPE is the data type, and ALIGN is the alignment that
11649 the object would ordinarily have. The value of this function is used
11650 instead of that alignment to align the object. */
11653 ix86_data_alignment (type
, align
)
11657 if (AGGREGATE_TYPE_P (type
)
11658 && TYPE_SIZE (type
)
11659 && TREE_CODE (TYPE_SIZE (type
)) == INTEGER_CST
11660 && (TREE_INT_CST_LOW (TYPE_SIZE (type
)) >= 256
11661 || TREE_INT_CST_HIGH (TYPE_SIZE (type
))) && align
< 256)
11664 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
11665 to 16byte boundary. */
11668 if (AGGREGATE_TYPE_P (type
)
11669 && TYPE_SIZE (type
)
11670 && TREE_CODE (TYPE_SIZE (type
)) == INTEGER_CST
11671 && (TREE_INT_CST_LOW (TYPE_SIZE (type
)) >= 128
11672 || TREE_INT_CST_HIGH (TYPE_SIZE (type
))) && align
< 128)
11676 if (TREE_CODE (type
) == ARRAY_TYPE
)
11678 if (TYPE_MODE (TREE_TYPE (type
)) == DFmode
&& align
< 64)
11680 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type
))) && align
< 128)
11683 else if (TREE_CODE (type
) == COMPLEX_TYPE
)
11686 if (TYPE_MODE (type
) == DCmode
&& align
< 64)
11688 if (TYPE_MODE (type
) == XCmode
&& align
< 128)
11691 else if ((TREE_CODE (type
) == RECORD_TYPE
11692 || TREE_CODE (type
) == UNION_TYPE
11693 || TREE_CODE (type
) == QUAL_UNION_TYPE
)
11694 && TYPE_FIELDS (type
))
11696 if (DECL_MODE (TYPE_FIELDS (type
)) == DFmode
&& align
< 64)
11698 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type
))) && align
< 128)
11701 else if (TREE_CODE (type
) == REAL_TYPE
|| TREE_CODE (type
) == VECTOR_TYPE
11702 || TREE_CODE (type
) == INTEGER_TYPE
)
11704 if (TYPE_MODE (type
) == DFmode
&& align
< 64)
11706 if (ALIGN_MODE_128 (TYPE_MODE (type
)) && align
< 128)
11713 /* Compute the alignment for a local variable.
11714 TYPE is the data type, and ALIGN is the alignment that
11715 the object would ordinarily have. The value of this macro is used
11716 instead of that alignment to align the object. */
11719 ix86_local_alignment (type
, align
)
11723 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
11724 to 16byte boundary. */
11727 if (AGGREGATE_TYPE_P (type
)
11728 && TYPE_SIZE (type
)
11729 && TREE_CODE (TYPE_SIZE (type
)) == INTEGER_CST
11730 && (TREE_INT_CST_LOW (TYPE_SIZE (type
)) >= 16
11731 || TREE_INT_CST_HIGH (TYPE_SIZE (type
))) && align
< 128)
11734 if (TREE_CODE (type
) == ARRAY_TYPE
)
11736 if (TYPE_MODE (TREE_TYPE (type
)) == DFmode
&& align
< 64)
11738 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type
))) && align
< 128)
11741 else if (TREE_CODE (type
) == COMPLEX_TYPE
)
11743 if (TYPE_MODE (type
) == DCmode
&& align
< 64)
11745 if (TYPE_MODE (type
) == XCmode
&& align
< 128)
11748 else if ((TREE_CODE (type
) == RECORD_TYPE
11749 || TREE_CODE (type
) == UNION_TYPE
11750 || TREE_CODE (type
) == QUAL_UNION_TYPE
)
11751 && TYPE_FIELDS (type
))
11753 if (DECL_MODE (TYPE_FIELDS (type
)) == DFmode
&& align
< 64)
11755 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type
))) && align
< 128)
11758 else if (TREE_CODE (type
) == REAL_TYPE
|| TREE_CODE (type
) == VECTOR_TYPE
11759 || TREE_CODE (type
) == INTEGER_TYPE
)
11762 if (TYPE_MODE (type
) == DFmode
&& align
< 64)
11764 if (ALIGN_MODE_128 (TYPE_MODE (type
)) && align
< 128)
11770 /* Emit RTL insns to initialize the variable parts of a trampoline.
11771 FNADDR is an RTX for the address of the function's pure code.
11772 CXT is an RTX for the static chain value for the function. */
11774 x86_initialize_trampoline (tramp
, fnaddr
, cxt
)
11775 rtx tramp
, fnaddr
, cxt
;
11779 /* Compute offset from the end of the jmp to the target function. */
11780 rtx disp
= expand_binop (SImode
, sub_optab
, fnaddr
,
11781 plus_constant (tramp
, 10),
11782 NULL_RTX
, 1, OPTAB_DIRECT
);
11783 emit_move_insn (gen_rtx_MEM (QImode
, tramp
),
11784 gen_int_mode (0xb9, QImode
));
11785 emit_move_insn (gen_rtx_MEM (SImode
, plus_constant (tramp
, 1)), cxt
);
11786 emit_move_insn (gen_rtx_MEM (QImode
, plus_constant (tramp
, 5)),
11787 gen_int_mode (0xe9, QImode
));
11788 emit_move_insn (gen_rtx_MEM (SImode
, plus_constant (tramp
, 6)), disp
);
11793 /* Try to load address using shorter movl instead of movabs.
11794 We may want to support movq for kernel mode, but kernel does not use
11795 trampolines at the moment. */
11796 if (x86_64_zero_extended_value (fnaddr
))
11798 fnaddr
= copy_to_mode_reg (DImode
, fnaddr
);
11799 emit_move_insn (gen_rtx_MEM (HImode
, plus_constant (tramp
, offset
)),
11800 gen_int_mode (0xbb41, HImode
));
11801 emit_move_insn (gen_rtx_MEM (SImode
, plus_constant (tramp
, offset
+ 2)),
11802 gen_lowpart (SImode
, fnaddr
));
11807 emit_move_insn (gen_rtx_MEM (HImode
, plus_constant (tramp
, offset
)),
11808 gen_int_mode (0xbb49, HImode
));
11809 emit_move_insn (gen_rtx_MEM (DImode
, plus_constant (tramp
, offset
+ 2)),
11813 /* Load static chain using movabs to r10. */
11814 emit_move_insn (gen_rtx_MEM (HImode
, plus_constant (tramp
, offset
)),
11815 gen_int_mode (0xba49, HImode
));
11816 emit_move_insn (gen_rtx_MEM (DImode
, plus_constant (tramp
, offset
+ 2)),
11819 /* Jump to the r11 */
11820 emit_move_insn (gen_rtx_MEM (HImode
, plus_constant (tramp
, offset
)),
11821 gen_int_mode (0xff49, HImode
));
11822 emit_move_insn (gen_rtx_MEM (QImode
, plus_constant (tramp
, offset
+2)),
11823 gen_int_mode (0xe3, QImode
));
11825 if (offset
> TRAMPOLINE_SIZE
)
11829 #ifdef TRANSFER_FROM_TRAMPOLINE
11830 emit_library_call (gen_rtx (SYMBOL_REF
, Pmode
, "__enable_execute_stack"),
11831 LCT_NORMAL
, VOIDmode
, 1, tramp
, Pmode
);
11835 #define def_builtin(MASK, NAME, TYPE, CODE) \
11837 if ((MASK) & target_flags) \
11838 builtin_function ((NAME), (TYPE), (CODE), BUILT_IN_MD, \
11839 NULL, NULL_TREE); \
11842 struct builtin_description
11844 const unsigned int mask
;
11845 const enum insn_code icode
;
11846 const char *const name
;
11847 const enum ix86_builtins code
;
11848 const enum rtx_code comparison
;
11849 const unsigned int flag
;
11852 /* Used for builtins that are enabled both by -msse and -msse2. */
11853 #define MASK_SSE1 (MASK_SSE | MASK_SSE2)
11855 static const struct builtin_description bdesc_comi
[] =
11857 { MASK_SSE1
, CODE_FOR_sse_comi
, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS
, UNEQ
, 0 },
11858 { MASK_SSE1
, CODE_FOR_sse_comi
, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS
, UNLT
, 0 },
11859 { MASK_SSE1
, CODE_FOR_sse_comi
, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS
, UNLE
, 0 },
11860 { MASK_SSE1
, CODE_FOR_sse_comi
, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS
, GT
, 0 },
11861 { MASK_SSE1
, CODE_FOR_sse_comi
, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS
, GE
, 0 },
11862 { MASK_SSE1
, CODE_FOR_sse_comi
, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS
, LTGT
, 0 },
11863 { MASK_SSE1
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS
, UNEQ
, 0 },
11864 { MASK_SSE1
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS
, UNLT
, 0 },
11865 { MASK_SSE1
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS
, UNLE
, 0 },
11866 { MASK_SSE1
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS
, GT
, 0 },
11867 { MASK_SSE1
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS
, GE
, 0 },
11868 { MASK_SSE1
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS
, LTGT
, 0 },
11869 { MASK_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD
, UNEQ
, 0 },
11870 { MASK_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD
, UNLT
, 0 },
11871 { MASK_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD
, UNLE
, 0 },
11872 { MASK_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD
, GT
, 0 },
11873 { MASK_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD
, GE
, 0 },
11874 { MASK_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD
, LTGT
, 0 },
11875 { MASK_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD
, UNEQ
, 0 },
11876 { MASK_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD
, UNLT
, 0 },
11877 { MASK_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD
, UNLE
, 0 },
11878 { MASK_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD
, GT
, 0 },
11879 { MASK_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD
, GE
, 0 },
11880 { MASK_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD
, LTGT
, 0 },
11883 static const struct builtin_description bdesc_2arg
[] =
11886 { MASK_SSE1
, CODE_FOR_addv4sf3
, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS
, 0, 0 },
11887 { MASK_SSE1
, CODE_FOR_subv4sf3
, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS
, 0, 0 },
11888 { MASK_SSE1
, CODE_FOR_mulv4sf3
, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS
, 0, 0 },
11889 { MASK_SSE1
, CODE_FOR_divv4sf3
, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS
, 0, 0 },
11890 { MASK_SSE1
, CODE_FOR_vmaddv4sf3
, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS
, 0, 0 },
11891 { MASK_SSE1
, CODE_FOR_vmsubv4sf3
, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS
, 0, 0 },
11892 { MASK_SSE1
, CODE_FOR_vmmulv4sf3
, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS
, 0, 0 },
11893 { MASK_SSE1
, CODE_FOR_vmdivv4sf3
, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS
, 0, 0 },
11895 { MASK_SSE1
, CODE_FOR_maskcmpv4sf3
, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS
, EQ
, 0 },
11896 { MASK_SSE1
, CODE_FOR_maskcmpv4sf3
, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS
, LT
, 0 },
11897 { MASK_SSE1
, CODE_FOR_maskcmpv4sf3
, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS
, LE
, 0 },
11898 { MASK_SSE1
, CODE_FOR_maskcmpv4sf3
, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS
, LT
, 1 },
11899 { MASK_SSE1
, CODE_FOR_maskcmpv4sf3
, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS
, LE
, 1 },
11900 { MASK_SSE1
, CODE_FOR_maskcmpv4sf3
, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS
, UNORDERED
, 0 },
11901 { MASK_SSE1
, CODE_FOR_maskncmpv4sf3
, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS
, EQ
, 0 },
11902 { MASK_SSE1
, CODE_FOR_maskncmpv4sf3
, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS
, LT
, 0 },
11903 { MASK_SSE1
, CODE_FOR_maskncmpv4sf3
, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS
, LE
, 0 },
11904 { MASK_SSE1
, CODE_FOR_maskncmpv4sf3
, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS
, LT
, 1 },
11905 { MASK_SSE1
, CODE_FOR_maskncmpv4sf3
, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS
, LE
, 1 },
11906 { MASK_SSE1
, CODE_FOR_maskncmpv4sf3
, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS
, UNORDERED
, 0 },
11907 { MASK_SSE1
, CODE_FOR_vmmaskcmpv4sf3
, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS
, EQ
, 0 },
11908 { MASK_SSE1
, CODE_FOR_vmmaskcmpv4sf3
, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS
, LT
, 0 },
11909 { MASK_SSE1
, CODE_FOR_vmmaskcmpv4sf3
, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS
, LE
, 0 },
11910 { MASK_SSE1
, CODE_FOR_vmmaskcmpv4sf3
, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS
, UNORDERED
, 0 },
11911 { MASK_SSE1
, CODE_FOR_vmmaskncmpv4sf3
, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS
, EQ
, 0 },
11912 { MASK_SSE1
, CODE_FOR_vmmaskncmpv4sf3
, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS
, LT
, 0 },
11913 { MASK_SSE1
, CODE_FOR_vmmaskncmpv4sf3
, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS
, LE
, 0 },
11914 { MASK_SSE1
, CODE_FOR_vmmaskncmpv4sf3
, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS
, UNORDERED
, 0 },
11916 { MASK_SSE1
, CODE_FOR_sminv4sf3
, "__builtin_ia32_minps", IX86_BUILTIN_MINPS
, 0, 0 },
11917 { MASK_SSE1
, CODE_FOR_smaxv4sf3
, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS
, 0, 0 },
11918 { MASK_SSE1
, CODE_FOR_vmsminv4sf3
, "__builtin_ia32_minss", IX86_BUILTIN_MINSS
, 0, 0 },
11919 { MASK_SSE1
, CODE_FOR_vmsmaxv4sf3
, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS
, 0, 0 },
11921 { MASK_SSE1
, CODE_FOR_sse_andv4sf3
, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS
, 0, 0 },
11922 { MASK_SSE1
, CODE_FOR_sse_nandv4sf3
, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS
, 0, 0 },
11923 { MASK_SSE1
, CODE_FOR_sse_iorv4sf3
, "__builtin_ia32_orps", IX86_BUILTIN_ORPS
, 0, 0 },
11924 { MASK_SSE1
, CODE_FOR_sse_xorv4sf3
, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS
, 0, 0 },
11926 { MASK_SSE1
, CODE_FOR_sse_movss
, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS
, 0, 0 },
11927 { MASK_SSE1
, CODE_FOR_sse_movhlps
, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS
, 0, 0 },
11928 { MASK_SSE1
, CODE_FOR_sse_movlhps
, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS
, 0, 0 },
11929 { MASK_SSE1
, CODE_FOR_sse_unpckhps
, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS
, 0, 0 },
11930 { MASK_SSE1
, CODE_FOR_sse_unpcklps
, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS
, 0, 0 },
11933 { MASK_MMX
, CODE_FOR_addv8qi3
, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB
, 0, 0 },
11934 { MASK_MMX
, CODE_FOR_addv4hi3
, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW
, 0, 0 },
11935 { MASK_MMX
, CODE_FOR_addv2si3
, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD
, 0, 0 },
11936 { MASK_MMX
, CODE_FOR_subv8qi3
, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB
, 0, 0 },
11937 { MASK_MMX
, CODE_FOR_subv4hi3
, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW
, 0, 0 },
11938 { MASK_MMX
, CODE_FOR_subv2si3
, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD
, 0, 0 },
11940 { MASK_MMX
, CODE_FOR_ssaddv8qi3
, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB
, 0, 0 },
11941 { MASK_MMX
, CODE_FOR_ssaddv4hi3
, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW
, 0, 0 },
11942 { MASK_MMX
, CODE_FOR_sssubv8qi3
, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB
, 0, 0 },
11943 { MASK_MMX
, CODE_FOR_sssubv4hi3
, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW
, 0, 0 },
11944 { MASK_MMX
, CODE_FOR_usaddv8qi3
, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB
, 0, 0 },
11945 { MASK_MMX
, CODE_FOR_usaddv4hi3
, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW
, 0, 0 },
11946 { MASK_MMX
, CODE_FOR_ussubv8qi3
, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB
, 0, 0 },
11947 { MASK_MMX
, CODE_FOR_ussubv4hi3
, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW
, 0, 0 },
11949 { MASK_MMX
, CODE_FOR_mulv4hi3
, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW
, 0, 0 },
11950 { MASK_MMX
, CODE_FOR_smulv4hi3_highpart
, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW
, 0, 0 },
11951 { MASK_SSE1
| MASK_3DNOW_A
, CODE_FOR_umulv4hi3_highpart
, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW
, 0, 0 },
11953 { MASK_MMX
, CODE_FOR_mmx_anddi3
, "__builtin_ia32_pand", IX86_BUILTIN_PAND
, 0, 0 },
11954 { MASK_MMX
, CODE_FOR_mmx_nanddi3
, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN
, 0, 0 },
11955 { MASK_MMX
, CODE_FOR_mmx_iordi3
, "__builtin_ia32_por", IX86_BUILTIN_POR
, 0, 0 },
11956 { MASK_MMX
, CODE_FOR_mmx_xordi3
, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR
, 0, 0 },
11958 { MASK_SSE1
| MASK_3DNOW_A
, CODE_FOR_mmx_uavgv8qi3
, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB
, 0, 0 },
11959 { MASK_SSE1
| MASK_3DNOW_A
, CODE_FOR_mmx_uavgv4hi3
, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW
, 0, 0 },
11961 { MASK_MMX
, CODE_FOR_eqv8qi3
, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB
, 0, 0 },
11962 { MASK_MMX
, CODE_FOR_eqv4hi3
, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW
, 0, 0 },
11963 { MASK_MMX
, CODE_FOR_eqv2si3
, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD
, 0, 0 },
11964 { MASK_MMX
, CODE_FOR_gtv8qi3
, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB
, 0, 0 },
11965 { MASK_MMX
, CODE_FOR_gtv4hi3
, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW
, 0, 0 },
11966 { MASK_MMX
, CODE_FOR_gtv2si3
, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD
, 0, 0 },
11968 { MASK_SSE1
| MASK_3DNOW_A
, CODE_FOR_umaxv8qi3
, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB
, 0, 0 },
11969 { MASK_SSE1
| MASK_3DNOW_A
, CODE_FOR_smaxv4hi3
, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW
, 0, 0 },
11970 { MASK_SSE1
| MASK_3DNOW_A
, CODE_FOR_uminv8qi3
, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB
, 0, 0 },
11971 { MASK_SSE1
| MASK_3DNOW_A
, CODE_FOR_sminv4hi3
, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW
, 0, 0 },
11973 { MASK_MMX
, CODE_FOR_mmx_punpckhbw
, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW
, 0, 0 },
11974 { MASK_MMX
, CODE_FOR_mmx_punpckhwd
, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD
, 0, 0 },
11975 { MASK_MMX
, CODE_FOR_mmx_punpckhdq
, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ
, 0, 0 },
11976 { MASK_MMX
, CODE_FOR_mmx_punpcklbw
, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW
, 0, 0 },
11977 { MASK_MMX
, CODE_FOR_mmx_punpcklwd
, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD
, 0, 0 },
11978 { MASK_MMX
, CODE_FOR_mmx_punpckldq
, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ
, 0, 0 },
11981 { MASK_MMX
, CODE_FOR_mmx_packsswb
, 0, IX86_BUILTIN_PACKSSWB
, 0, 0 },
11982 { MASK_MMX
, CODE_FOR_mmx_packssdw
, 0, IX86_BUILTIN_PACKSSDW
, 0, 0 },
11983 { MASK_MMX
, CODE_FOR_mmx_packuswb
, 0, IX86_BUILTIN_PACKUSWB
, 0, 0 },
11985 { MASK_SSE1
, CODE_FOR_cvtpi2ps
, 0, IX86_BUILTIN_CVTPI2PS
, 0, 0 },
11986 { MASK_SSE1
, CODE_FOR_cvtsi2ss
, 0, IX86_BUILTIN_CVTSI2SS
, 0, 0 },
11988 { MASK_MMX
, CODE_FOR_ashlv4hi3
, 0, IX86_BUILTIN_PSLLW
, 0, 0 },
11989 { MASK_MMX
, CODE_FOR_ashlv4hi3
, 0, IX86_BUILTIN_PSLLWI
, 0, 0 },
11990 { MASK_MMX
, CODE_FOR_ashlv2si3
, 0, IX86_BUILTIN_PSLLD
, 0, 0 },
11991 { MASK_MMX
, CODE_FOR_ashlv2si3
, 0, IX86_BUILTIN_PSLLDI
, 0, 0 },
11992 { MASK_MMX
, CODE_FOR_mmx_ashldi3
, 0, IX86_BUILTIN_PSLLQ
, 0, 0 },
11993 { MASK_MMX
, CODE_FOR_mmx_ashldi3
, 0, IX86_BUILTIN_PSLLQI
, 0, 0 },
11995 { MASK_MMX
, CODE_FOR_lshrv4hi3
, 0, IX86_BUILTIN_PSRLW
, 0, 0 },
11996 { MASK_MMX
, CODE_FOR_lshrv4hi3
, 0, IX86_BUILTIN_PSRLWI
, 0, 0 },
11997 { MASK_MMX
, CODE_FOR_lshrv2si3
, 0, IX86_BUILTIN_PSRLD
, 0, 0 },
11998 { MASK_MMX
, CODE_FOR_lshrv2si3
, 0, IX86_BUILTIN_PSRLDI
, 0, 0 },
11999 { MASK_MMX
, CODE_FOR_mmx_lshrdi3
, 0, IX86_BUILTIN_PSRLQ
, 0, 0 },
12000 { MASK_MMX
, CODE_FOR_mmx_lshrdi3
, 0, IX86_BUILTIN_PSRLQI
, 0, 0 },
12002 { MASK_MMX
, CODE_FOR_ashrv4hi3
, 0, IX86_BUILTIN_PSRAW
, 0, 0 },
12003 { MASK_MMX
, CODE_FOR_ashrv4hi3
, 0, IX86_BUILTIN_PSRAWI
, 0, 0 },
12004 { MASK_MMX
, CODE_FOR_ashrv2si3
, 0, IX86_BUILTIN_PSRAD
, 0, 0 },
12005 { MASK_MMX
, CODE_FOR_ashrv2si3
, 0, IX86_BUILTIN_PSRADI
, 0, 0 },
12007 { MASK_SSE1
| MASK_3DNOW_A
, CODE_FOR_mmx_psadbw
, 0, IX86_BUILTIN_PSADBW
, 0, 0 },
12008 { MASK_MMX
, CODE_FOR_mmx_pmaddwd
, 0, IX86_BUILTIN_PMADDWD
, 0, 0 },
12011 { MASK_SSE2
, CODE_FOR_addv2df3
, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD
, 0, 0 },
12012 { MASK_SSE2
, CODE_FOR_subv2df3
, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD
, 0, 0 },
12013 { MASK_SSE2
, CODE_FOR_mulv2df3
, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD
, 0, 0 },
12014 { MASK_SSE2
, CODE_FOR_divv2df3
, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD
, 0, 0 },
12015 { MASK_SSE2
, CODE_FOR_vmaddv2df3
, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD
, 0, 0 },
12016 { MASK_SSE2
, CODE_FOR_vmsubv2df3
, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD
, 0, 0 },
12017 { MASK_SSE2
, CODE_FOR_vmmulv2df3
, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD
, 0, 0 },
12018 { MASK_SSE2
, CODE_FOR_vmdivv2df3
, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD
, 0, 0 },
12020 { MASK_SSE2
, CODE_FOR_maskcmpv2df3
, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD
, EQ
, 0 },
12021 { MASK_SSE2
, CODE_FOR_maskcmpv2df3
, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD
, LT
, 0 },
12022 { MASK_SSE2
, CODE_FOR_maskcmpv2df3
, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD
, LE
, 0 },
12023 { MASK_SSE2
, CODE_FOR_maskcmpv2df3
, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD
, LT
, 1 },
12024 { MASK_SSE2
, CODE_FOR_maskcmpv2df3
, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD
, LE
, 1 },
12025 { MASK_SSE2
, CODE_FOR_maskcmpv2df3
, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD
, UNORDERED
, 0 },
12026 { MASK_SSE2
, CODE_FOR_maskncmpv2df3
, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD
, EQ
, 0 },
12027 { MASK_SSE2
, CODE_FOR_maskncmpv2df3
, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD
, LT
, 0 },
12028 { MASK_SSE2
, CODE_FOR_maskncmpv2df3
, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD
, LE
, 0 },
12029 { MASK_SSE2
, CODE_FOR_maskncmpv2df3
, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD
, LT
, 1 },
12030 { MASK_SSE2
, CODE_FOR_maskncmpv2df3
, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD
, LE
, 1 },
12031 { MASK_SSE2
, CODE_FOR_maskncmpv2df3
, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD
, UNORDERED
, 0 },
12032 { MASK_SSE2
, CODE_FOR_vmmaskcmpv2df3
, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD
, EQ
, 0 },
12033 { MASK_SSE2
, CODE_FOR_vmmaskcmpv2df3
, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD
, LT
, 0 },
12034 { MASK_SSE2
, CODE_FOR_vmmaskcmpv2df3
, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD
, LE
, 0 },
12035 { MASK_SSE2
, CODE_FOR_vmmaskcmpv2df3
, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD
, UNORDERED
, 0 },
12036 { MASK_SSE2
, CODE_FOR_vmmaskncmpv2df3
, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD
, EQ
, 0 },
12037 { MASK_SSE2
, CODE_FOR_vmmaskncmpv2df3
, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD
, LT
, 0 },
12038 { MASK_SSE2
, CODE_FOR_vmmaskncmpv2df3
, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD
, LE
, 0 },
12039 { MASK_SSE2
, CODE_FOR_vmmaskncmpv2df3
, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD
, UNORDERED
, 0 },
12041 { MASK_SSE2
, CODE_FOR_sminv2df3
, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD
, 0, 0 },
12042 { MASK_SSE2
, CODE_FOR_smaxv2df3
, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD
, 0, 0 },
12043 { MASK_SSE2
, CODE_FOR_vmsminv2df3
, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD
, 0, 0 },
12044 { MASK_SSE2
, CODE_FOR_vmsmaxv2df3
, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD
, 0, 0 },
12046 { MASK_SSE2
, CODE_FOR_sse2_andv2df3
, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD
, 0, 0 },
12047 { MASK_SSE2
, CODE_FOR_sse2_nandv2df3
, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD
, 0, 0 },
12048 { MASK_SSE2
, CODE_FOR_sse2_iorv2df3
, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD
, 0, 0 },
12049 { MASK_SSE2
, CODE_FOR_sse2_xorv2df3
, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD
, 0, 0 },
12051 { MASK_SSE2
, CODE_FOR_sse2_movsd
, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD
, 0, 0 },
12052 { MASK_SSE2
, CODE_FOR_sse2_unpckhpd
, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD
, 0, 0 },
12053 { MASK_SSE2
, CODE_FOR_sse2_unpcklpd
, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD
, 0, 0 },
12056 { MASK_SSE2
, CODE_FOR_addv16qi3
, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128
, 0, 0 },
12057 { MASK_SSE2
, CODE_FOR_addv8hi3
, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128
, 0, 0 },
12058 { MASK_SSE2
, CODE_FOR_addv4si3
, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128
, 0, 0 },
12059 { MASK_SSE2
, CODE_FOR_addv4si3
, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128
, 0, 0 },
12060 { MASK_SSE2
, CODE_FOR_subv16qi3
, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128
, 0, 0 },
12061 { MASK_SSE2
, CODE_FOR_subv8hi3
, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128
, 0, 0 },
12062 { MASK_SSE2
, CODE_FOR_subv4si3
, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128
, 0, 0 },
12063 { MASK_SSE2
, CODE_FOR_subv4si3
, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128
, 0, 0 },
12065 { MASK_MMX
, CODE_FOR_ssaddv16qi3
, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128
, 0, 0 },
12066 { MASK_MMX
, CODE_FOR_ssaddv8hi3
, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128
, 0, 0 },
12067 { MASK_MMX
, CODE_FOR_sssubv16qi3
, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128
, 0, 0 },
12068 { MASK_MMX
, CODE_FOR_sssubv8hi3
, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128
, 0, 0 },
12069 { MASK_MMX
, CODE_FOR_usaddv16qi3
, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128
, 0, 0 },
12070 { MASK_MMX
, CODE_FOR_usaddv8hi3
, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128
, 0, 0 },
12071 { MASK_MMX
, CODE_FOR_ussubv16qi3
, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128
, 0, 0 },
12072 { MASK_MMX
, CODE_FOR_ussubv8hi3
, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128
, 0, 0 },
12074 { MASK_SSE2
, CODE_FOR_mulv8hi3
, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128
, 0, 0 },
12075 { MASK_SSE2
, CODE_FOR_smulv8hi3_highpart
, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128
, 0, 0 },
12076 { MASK_SSE2
, CODE_FOR_sse2_umulsidi3
, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ
, 0, 0 },
12077 { MASK_SSE2
, CODE_FOR_sse2_umulv2siv2di3
, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128
, 0, 0 },
12079 { MASK_SSE2
, CODE_FOR_sse2_andv2di3
, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128
, 0, 0 },
12080 { MASK_SSE2
, CODE_FOR_sse2_nandv2di3
, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128
, 0, 0 },
12081 { MASK_SSE2
, CODE_FOR_sse2_iorv2di3
, "__builtin_ia32_por128", IX86_BUILTIN_POR128
, 0, 0 },
12082 { MASK_SSE2
, CODE_FOR_sse2_xorv2di3
, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128
, 0, 0 },
12084 { MASK_SSE2
, CODE_FOR_sse2_uavgv16qi3
, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128
, 0, 0 },
12085 { MASK_SSE2
, CODE_FOR_sse2_uavgv8hi3
, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128
, 0, 0 },
12087 { MASK_SSE2
, CODE_FOR_eqv16qi3
, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128
, 0, 0 },
12088 { MASK_SSE2
, CODE_FOR_eqv8hi3
, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128
, 0, 0 },
12089 { MASK_SSE2
, CODE_FOR_eqv4si3
, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128
, 0, 0 },
12090 { MASK_SSE2
, CODE_FOR_gtv16qi3
, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128
, 0, 0 },
12091 { MASK_SSE2
, CODE_FOR_gtv8hi3
, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128
, 0, 0 },
12092 { MASK_SSE2
, CODE_FOR_gtv4si3
, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128
, 0, 0 },
12094 { MASK_SSE2
, CODE_FOR_umaxv16qi3
, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128
, 0, 0 },
12095 { MASK_SSE2
, CODE_FOR_smaxv8hi3
, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128
, 0, 0 },
12096 { MASK_SSE2
, CODE_FOR_uminv16qi3
, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128
, 0, 0 },
12097 { MASK_SSE2
, CODE_FOR_sminv8hi3
, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128
, 0, 0 },
12099 { MASK_SSE2
, CODE_FOR_sse2_punpckhbw
, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128
, 0, 0 },
12100 { MASK_SSE2
, CODE_FOR_sse2_punpckhwd
, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128
, 0, 0 },
12101 { MASK_SSE2
, CODE_FOR_sse2_punpckhdq
, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128
, 0, 0 },
12102 { MASK_SSE2
, CODE_FOR_sse2_punpckhqdq
, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128
, 0, 0 },
12103 { MASK_SSE2
, CODE_FOR_sse2_punpcklbw
, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128
, 0, 0 },
12104 { MASK_SSE2
, CODE_FOR_sse2_punpcklwd
, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128
, 0, 0 },
12105 { MASK_SSE2
, CODE_FOR_sse2_punpckldq
, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128
, 0, 0 },
12106 { MASK_SSE2
, CODE_FOR_sse2_punpcklqdq
, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128
, 0, 0 },
12108 { MASK_SSE2
, CODE_FOR_sse2_packsswb
, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128
, 0, 0 },
12109 { MASK_SSE2
, CODE_FOR_sse2_packssdw
, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128
, 0, 0 },
12110 { MASK_SSE2
, CODE_FOR_sse2_packuswb
, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128
, 0, 0 },
12112 { MASK_SSE2
, CODE_FOR_umulv8hi3_highpart
, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128
, 0, 0 },
12113 { MASK_SSE2
, CODE_FOR_sse2_psadbw
, 0, IX86_BUILTIN_PSADBW128
, 0, 0 },
12115 { MASK_SSE2
, CODE_FOR_ashlv8hi3_ti
, 0, IX86_BUILTIN_PSLLW128
, 0, 0 },
12116 { MASK_SSE2
, CODE_FOR_ashlv8hi3
, 0, IX86_BUILTIN_PSLLWI128
, 0, 0 },
12117 { MASK_SSE2
, CODE_FOR_ashlv4si3_ti
, 0, IX86_BUILTIN_PSLLD128
, 0, 0 },
12118 { MASK_SSE2
, CODE_FOR_ashlv4si3
, 0, IX86_BUILTIN_PSLLDI128
, 0, 0 },
12119 { MASK_SSE2
, CODE_FOR_ashlv2di3_ti
, 0, IX86_BUILTIN_PSLLQ128
, 0, 0 },
12120 { MASK_SSE2
, CODE_FOR_ashlv2di3
, 0, IX86_BUILTIN_PSLLQI128
, 0, 0 },
12122 { MASK_SSE2
, CODE_FOR_lshrv8hi3_ti
, 0, IX86_BUILTIN_PSRLW128
, 0, 0 },
12123 { MASK_SSE2
, CODE_FOR_lshrv8hi3
, 0, IX86_BUILTIN_PSRLWI128
, 0, 0 },
12124 { MASK_SSE2
, CODE_FOR_lshrv4si3_ti
, 0, IX86_BUILTIN_PSRLD128
, 0, 0 },
12125 { MASK_SSE2
, CODE_FOR_lshrv4si3
, 0, IX86_BUILTIN_PSRLDI128
, 0, 0 },
12126 { MASK_SSE2
, CODE_FOR_lshrv2di3_ti
, 0, IX86_BUILTIN_PSRLQ128
, 0, 0 },
12127 { MASK_SSE2
, CODE_FOR_lshrv2di3
, 0, IX86_BUILTIN_PSRLQI128
, 0, 0 },
12129 { MASK_SSE2
, CODE_FOR_ashrv8hi3_ti
, 0, IX86_BUILTIN_PSRAW128
, 0, 0 },
12130 { MASK_SSE2
, CODE_FOR_ashrv8hi3
, 0, IX86_BUILTIN_PSRAWI128
, 0, 0 },
12131 { MASK_SSE2
, CODE_FOR_ashrv4si3_ti
, 0, IX86_BUILTIN_PSRAD128
, 0, 0 },
12132 { MASK_SSE2
, CODE_FOR_ashrv4si3
, 0, IX86_BUILTIN_PSRADI128
, 0, 0 },
12134 { MASK_SSE2
, CODE_FOR_sse2_pmaddwd
, 0, IX86_BUILTIN_PMADDWD128
, 0, 0 },
12136 { MASK_SSE2
, CODE_FOR_cvtsi2sd
, 0, IX86_BUILTIN_CVTSI2SD
, 0, 0 },
12137 { MASK_SSE2
, CODE_FOR_cvtsd2ss
, 0, IX86_BUILTIN_CVTSD2SS
, 0, 0 },
12138 { MASK_SSE2
, CODE_FOR_cvtss2sd
, 0, IX86_BUILTIN_CVTSS2SD
, 0, 0 }
12141 static const struct builtin_description bdesc_1arg
[] =
12143 { MASK_SSE1
| MASK_3DNOW_A
, CODE_FOR_mmx_pmovmskb
, 0, IX86_BUILTIN_PMOVMSKB
, 0, 0 },
12144 { MASK_SSE1
, CODE_FOR_sse_movmskps
, 0, IX86_BUILTIN_MOVMSKPS
, 0, 0 },
12146 { MASK_SSE1
, CODE_FOR_sqrtv4sf2
, 0, IX86_BUILTIN_SQRTPS
, 0, 0 },
12147 { MASK_SSE1
, CODE_FOR_rsqrtv4sf2
, 0, IX86_BUILTIN_RSQRTPS
, 0, 0 },
12148 { MASK_SSE1
, CODE_FOR_rcpv4sf2
, 0, IX86_BUILTIN_RCPPS
, 0, 0 },
12150 { MASK_SSE1
, CODE_FOR_cvtps2pi
, 0, IX86_BUILTIN_CVTPS2PI
, 0, 0 },
12151 { MASK_SSE1
, CODE_FOR_cvtss2si
, 0, IX86_BUILTIN_CVTSS2SI
, 0, 0 },
12152 { MASK_SSE1
, CODE_FOR_cvttps2pi
, 0, IX86_BUILTIN_CVTTPS2PI
, 0, 0 },
12153 { MASK_SSE1
, CODE_FOR_cvttss2si
, 0, IX86_BUILTIN_CVTTSS2SI
, 0, 0 },
12155 { MASK_SSE2
, CODE_FOR_sse2_pmovmskb
, 0, IX86_BUILTIN_PMOVMSKB128
, 0, 0 },
12156 { MASK_SSE2
, CODE_FOR_sse2_movmskpd
, 0, IX86_BUILTIN_MOVMSKPD
, 0, 0 },
12157 { MASK_SSE2
, CODE_FOR_sse2_movq2dq
, 0, IX86_BUILTIN_MOVQ2DQ
, 0, 0 },
12158 { MASK_SSE2
, CODE_FOR_sse2_movdq2q
, 0, IX86_BUILTIN_MOVDQ2Q
, 0, 0 },
12160 { MASK_SSE2
, CODE_FOR_sqrtv2df2
, 0, IX86_BUILTIN_SQRTPD
, 0, 0 },
12162 { MASK_SSE2
, CODE_FOR_cvtdq2pd
, 0, IX86_BUILTIN_CVTDQ2PD
, 0, 0 },
12163 { MASK_SSE2
, CODE_FOR_cvtdq2ps
, 0, IX86_BUILTIN_CVTDQ2PS
, 0, 0 },
12165 { MASK_SSE2
, CODE_FOR_cvtpd2dq
, 0, IX86_BUILTIN_CVTPD2DQ
, 0, 0 },
12166 { MASK_SSE2
, CODE_FOR_cvtpd2pi
, 0, IX86_BUILTIN_CVTPD2PI
, 0, 0 },
12167 { MASK_SSE2
, CODE_FOR_cvtpd2ps
, 0, IX86_BUILTIN_CVTPD2PS
, 0, 0 },
12168 { MASK_SSE2
, CODE_FOR_cvttpd2dq
, 0, IX86_BUILTIN_CVTTPD2DQ
, 0, 0 },
12169 { MASK_SSE2
, CODE_FOR_cvttpd2pi
, 0, IX86_BUILTIN_CVTTPD2PI
, 0, 0 },
12171 { MASK_SSE2
, CODE_FOR_cvtpi2pd
, 0, IX86_BUILTIN_CVTPI2PD
, 0, 0 },
12173 { MASK_SSE2
, CODE_FOR_cvtsd2si
, 0, IX86_BUILTIN_CVTSD2SI
, 0, 0 },
12174 { MASK_SSE2
, CODE_FOR_cvttsd2si
, 0, IX86_BUILTIN_CVTTSD2SI
, 0, 0 },
12176 { MASK_SSE2
, CODE_FOR_cvtps2dq
, 0, IX86_BUILTIN_CVTPS2DQ
, 0, 0 },
12177 { MASK_SSE2
, CODE_FOR_cvtps2pd
, 0, IX86_BUILTIN_CVTPS2PD
, 0, 0 },
12178 { MASK_SSE2
, CODE_FOR_cvttps2dq
, 0, IX86_BUILTIN_CVTTPS2DQ
, 0, 0 },
12180 { MASK_SSE2
, CODE_FOR_sse2_movq
, 0, IX86_BUILTIN_MOVQ
, 0, 0 }
12184 ix86_init_builtins ()
12187 ix86_init_mmx_sse_builtins ();
12190 /* Set up all the MMX/SSE builtins. This is not called if TARGET_MMX
12191 is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX
12194 ix86_init_mmx_sse_builtins ()
12196 const struct builtin_description
* d
;
12199 tree pchar_type_node
= build_pointer_type (char_type_node
);
12200 tree pfloat_type_node
= build_pointer_type (float_type_node
);
12201 tree pv2si_type_node
= build_pointer_type (V2SI_type_node
);
12202 tree pv2di_type_node
= build_pointer_type (V2DI_type_node
);
12203 tree pdi_type_node
= build_pointer_type (long_long_unsigned_type_node
);
12206 tree int_ftype_v4sf_v4sf
12207 = build_function_type_list (integer_type_node
,
12208 V4SF_type_node
, V4SF_type_node
, NULL_TREE
);
12209 tree v4si_ftype_v4sf_v4sf
12210 = build_function_type_list (V4SI_type_node
,
12211 V4SF_type_node
, V4SF_type_node
, NULL_TREE
);
12212 /* MMX/SSE/integer conversions. */
12213 tree int_ftype_v4sf
12214 = build_function_type_list (integer_type_node
,
12215 V4SF_type_node
, NULL_TREE
);
12216 tree int_ftype_v8qi
12217 = build_function_type_list (integer_type_node
, V8QI_type_node
, NULL_TREE
);
12218 tree v4sf_ftype_v4sf_int
12219 = build_function_type_list (V4SF_type_node
,
12220 V4SF_type_node
, integer_type_node
, NULL_TREE
);
12221 tree v4sf_ftype_v4sf_v2si
12222 = build_function_type_list (V4SF_type_node
,
12223 V4SF_type_node
, V2SI_type_node
, NULL_TREE
);
12224 tree int_ftype_v4hi_int
12225 = build_function_type_list (integer_type_node
,
12226 V4HI_type_node
, integer_type_node
, NULL_TREE
);
12227 tree v4hi_ftype_v4hi_int_int
12228 = build_function_type_list (V4HI_type_node
, V4HI_type_node
,
12229 integer_type_node
, integer_type_node
,
12231 /* Miscellaneous. */
12232 tree v8qi_ftype_v4hi_v4hi
12233 = build_function_type_list (V8QI_type_node
,
12234 V4HI_type_node
, V4HI_type_node
, NULL_TREE
);
12235 tree v4hi_ftype_v2si_v2si
12236 = build_function_type_list (V4HI_type_node
,
12237 V2SI_type_node
, V2SI_type_node
, NULL_TREE
);
12238 tree v4sf_ftype_v4sf_v4sf_int
12239 = build_function_type_list (V4SF_type_node
,
12240 V4SF_type_node
, V4SF_type_node
,
12241 integer_type_node
, NULL_TREE
);
12242 tree v2si_ftype_v4hi_v4hi
12243 = build_function_type_list (V2SI_type_node
,
12244 V4HI_type_node
, V4HI_type_node
, NULL_TREE
);
12245 tree v4hi_ftype_v4hi_int
12246 = build_function_type_list (V4HI_type_node
,
12247 V4HI_type_node
, integer_type_node
, NULL_TREE
);
12248 tree v4hi_ftype_v4hi_di
12249 = build_function_type_list (V4HI_type_node
,
12250 V4HI_type_node
, long_long_unsigned_type_node
,
12252 tree v2si_ftype_v2si_di
12253 = build_function_type_list (V2SI_type_node
,
12254 V2SI_type_node
, long_long_unsigned_type_node
,
12256 tree void_ftype_void
12257 = build_function_type (void_type_node
, void_list_node
);
12258 tree void_ftype_unsigned
12259 = build_function_type_list (void_type_node
, unsigned_type_node
, NULL_TREE
);
12260 tree unsigned_ftype_void
12261 = build_function_type (unsigned_type_node
, void_list_node
);
12263 = build_function_type (long_long_unsigned_type_node
, void_list_node
);
12264 tree v4sf_ftype_void
12265 = build_function_type (V4SF_type_node
, void_list_node
);
12266 tree v2si_ftype_v4sf
12267 = build_function_type_list (V2SI_type_node
, V4SF_type_node
, NULL_TREE
);
12268 /* Loads/stores. */
12269 tree void_ftype_v8qi_v8qi_pchar
12270 = build_function_type_list (void_type_node
,
12271 V8QI_type_node
, V8QI_type_node
,
12272 pchar_type_node
, NULL_TREE
);
12273 tree v4sf_ftype_pfloat
12274 = build_function_type_list (V4SF_type_node
, pfloat_type_node
, NULL_TREE
);
12275 /* @@@ the type is bogus */
12276 tree v4sf_ftype_v4sf_pv2si
12277 = build_function_type_list (V4SF_type_node
,
12278 V4SF_type_node
, pv2si_type_node
, NULL_TREE
);
12279 tree void_ftype_pv2si_v4sf
12280 = build_function_type_list (void_type_node
,
12281 pv2si_type_node
, V4SF_type_node
, NULL_TREE
);
12282 tree void_ftype_pfloat_v4sf
12283 = build_function_type_list (void_type_node
,
12284 pfloat_type_node
, V4SF_type_node
, NULL_TREE
);
12285 tree void_ftype_pdi_di
12286 = build_function_type_list (void_type_node
,
12287 pdi_type_node
, long_long_unsigned_type_node
,
12289 tree void_ftype_pv2di_v2di
12290 = build_function_type_list (void_type_node
,
12291 pv2di_type_node
, V2DI_type_node
, NULL_TREE
);
12292 /* Normal vector unops. */
12293 tree v4sf_ftype_v4sf
12294 = build_function_type_list (V4SF_type_node
, V4SF_type_node
, NULL_TREE
);
12296 /* Normal vector binops. */
12297 tree v4sf_ftype_v4sf_v4sf
12298 = build_function_type_list (V4SF_type_node
,
12299 V4SF_type_node
, V4SF_type_node
, NULL_TREE
);
12300 tree v8qi_ftype_v8qi_v8qi
12301 = build_function_type_list (V8QI_type_node
,
12302 V8QI_type_node
, V8QI_type_node
, NULL_TREE
);
12303 tree v4hi_ftype_v4hi_v4hi
12304 = build_function_type_list (V4HI_type_node
,
12305 V4HI_type_node
, V4HI_type_node
, NULL_TREE
);
12306 tree v2si_ftype_v2si_v2si
12307 = build_function_type_list (V2SI_type_node
,
12308 V2SI_type_node
, V2SI_type_node
, NULL_TREE
);
12309 tree di_ftype_di_di
12310 = build_function_type_list (long_long_unsigned_type_node
,
12311 long_long_unsigned_type_node
,
12312 long_long_unsigned_type_node
, NULL_TREE
);
12314 tree v2si_ftype_v2sf
12315 = build_function_type_list (V2SI_type_node
, V2SF_type_node
, NULL_TREE
);
12316 tree v2sf_ftype_v2si
12317 = build_function_type_list (V2SF_type_node
, V2SI_type_node
, NULL_TREE
);
12318 tree v2si_ftype_v2si
12319 = build_function_type_list (V2SI_type_node
, V2SI_type_node
, NULL_TREE
);
12320 tree v2sf_ftype_v2sf
12321 = build_function_type_list (V2SF_type_node
, V2SF_type_node
, NULL_TREE
);
12322 tree v2sf_ftype_v2sf_v2sf
12323 = build_function_type_list (V2SF_type_node
,
12324 V2SF_type_node
, V2SF_type_node
, NULL_TREE
);
12325 tree v2si_ftype_v2sf_v2sf
12326 = build_function_type_list (V2SI_type_node
,
12327 V2SF_type_node
, V2SF_type_node
, NULL_TREE
);
12328 tree pint_type_node
= build_pointer_type (integer_type_node
);
12329 tree pdouble_type_node
= build_pointer_type (double_type_node
);
12330 tree int_ftype_v2df_v2df
12331 = build_function_type_list (integer_type_node
,
12332 V2DF_type_node
, V2DF_type_node
, NULL_TREE
);
12335 = build_function_type (intTI_type_node
, void_list_node
);
12336 tree v2di_ftype_void
12337 = build_function_type (V2DI_type_node
, void_list_node
);
12338 tree ti_ftype_ti_ti
12339 = build_function_type_list (intTI_type_node
,
12340 intTI_type_node
, intTI_type_node
, NULL_TREE
);
12341 tree void_ftype_pvoid
12342 = build_function_type_list (void_type_node
, ptr_type_node
, NULL_TREE
);
12344 = build_function_type_list (V2DI_type_node
,
12345 long_long_unsigned_type_node
, NULL_TREE
);
12347 = build_function_type_list (long_long_unsigned_type_node
,
12348 V2DI_type_node
, NULL_TREE
);
12349 tree v4sf_ftype_v4si
12350 = build_function_type_list (V4SF_type_node
, V4SI_type_node
, NULL_TREE
);
12351 tree v4si_ftype_v4sf
12352 = build_function_type_list (V4SI_type_node
, V4SF_type_node
, NULL_TREE
);
12353 tree v2df_ftype_v4si
12354 = build_function_type_list (V2DF_type_node
, V4SI_type_node
, NULL_TREE
);
12355 tree v4si_ftype_v2df
12356 = build_function_type_list (V4SI_type_node
, V2DF_type_node
, NULL_TREE
);
12357 tree v2si_ftype_v2df
12358 = build_function_type_list (V2SI_type_node
, V2DF_type_node
, NULL_TREE
);
12359 tree v4sf_ftype_v2df
12360 = build_function_type_list (V4SF_type_node
, V2DF_type_node
, NULL_TREE
);
12361 tree v2df_ftype_v2si
12362 = build_function_type_list (V2DF_type_node
, V2SI_type_node
, NULL_TREE
);
12363 tree v2df_ftype_v4sf
12364 = build_function_type_list (V2DF_type_node
, V4SF_type_node
, NULL_TREE
);
12365 tree int_ftype_v2df
12366 = build_function_type_list (integer_type_node
, V2DF_type_node
, NULL_TREE
);
12367 tree v2df_ftype_v2df_int
12368 = build_function_type_list (V2DF_type_node
,
12369 V2DF_type_node
, integer_type_node
, NULL_TREE
);
12370 tree v4sf_ftype_v4sf_v2df
12371 = build_function_type_list (V4SF_type_node
,
12372 V4SF_type_node
, V2DF_type_node
, NULL_TREE
);
12373 tree v2df_ftype_v2df_v4sf
12374 = build_function_type_list (V2DF_type_node
,
12375 V2DF_type_node
, V4SF_type_node
, NULL_TREE
);
12376 tree v2df_ftype_v2df_v2df_int
12377 = build_function_type_list (V2DF_type_node
,
12378 V2DF_type_node
, V2DF_type_node
,
12381 tree v2df_ftype_v2df_pv2si
12382 = build_function_type_list (V2DF_type_node
,
12383 V2DF_type_node
, pv2si_type_node
, NULL_TREE
);
12384 tree void_ftype_pv2si_v2df
12385 = build_function_type_list (void_type_node
,
12386 pv2si_type_node
, V2DF_type_node
, NULL_TREE
);
12387 tree void_ftype_pdouble_v2df
12388 = build_function_type_list (void_type_node
,
12389 pdouble_type_node
, V2DF_type_node
, NULL_TREE
);
12390 tree void_ftype_pint_int
12391 = build_function_type_list (void_type_node
,
12392 pint_type_node
, integer_type_node
, NULL_TREE
);
12393 tree void_ftype_v16qi_v16qi_pchar
12394 = build_function_type_list (void_type_node
,
12395 V16QI_type_node
, V16QI_type_node
,
12396 pchar_type_node
, NULL_TREE
);
12397 tree v2df_ftype_pdouble
12398 = build_function_type_list (V2DF_type_node
, pdouble_type_node
, NULL_TREE
);
12399 tree v2df_ftype_v2df_v2df
12400 = build_function_type_list (V2DF_type_node
,
12401 V2DF_type_node
, V2DF_type_node
, NULL_TREE
);
12402 tree v16qi_ftype_v16qi_v16qi
12403 = build_function_type_list (V16QI_type_node
,
12404 V16QI_type_node
, V16QI_type_node
, NULL_TREE
);
12405 tree v8hi_ftype_v8hi_v8hi
12406 = build_function_type_list (V8HI_type_node
,
12407 V8HI_type_node
, V8HI_type_node
, NULL_TREE
);
12408 tree v4si_ftype_v4si_v4si
12409 = build_function_type_list (V4SI_type_node
,
12410 V4SI_type_node
, V4SI_type_node
, NULL_TREE
);
12411 tree v2di_ftype_v2di_v2di
12412 = build_function_type_list (V2DI_type_node
,
12413 V2DI_type_node
, V2DI_type_node
, NULL_TREE
);
12414 tree v2di_ftype_v2df_v2df
12415 = build_function_type_list (V2DI_type_node
,
12416 V2DF_type_node
, V2DF_type_node
, NULL_TREE
);
12417 tree v2df_ftype_v2df
12418 = build_function_type_list (V2DF_type_node
, V2DF_type_node
, NULL_TREE
);
12419 tree v2df_ftype_double
12420 = build_function_type_list (V2DF_type_node
, double_type_node
, NULL_TREE
);
12421 tree v2df_ftype_double_double
12422 = build_function_type_list (V2DF_type_node
,
12423 double_type_node
, double_type_node
, NULL_TREE
);
12424 tree int_ftype_v8hi_int
12425 = build_function_type_list (integer_type_node
,
12426 V8HI_type_node
, integer_type_node
, NULL_TREE
);
12427 tree v8hi_ftype_v8hi_int_int
12428 = build_function_type_list (V8HI_type_node
,
12429 V8HI_type_node
, integer_type_node
,
12430 integer_type_node
, NULL_TREE
);
12431 tree v2di_ftype_v2di_int
12432 = build_function_type_list (V2DI_type_node
,
12433 V2DI_type_node
, integer_type_node
, NULL_TREE
);
12434 tree v4si_ftype_v4si_int
12435 = build_function_type_list (V4SI_type_node
,
12436 V4SI_type_node
, integer_type_node
, NULL_TREE
);
12437 tree v8hi_ftype_v8hi_int
12438 = build_function_type_list (V8HI_type_node
,
12439 V8HI_type_node
, integer_type_node
, NULL_TREE
);
12440 tree v8hi_ftype_v8hi_v2di
12441 = build_function_type_list (V8HI_type_node
,
12442 V8HI_type_node
, V2DI_type_node
, NULL_TREE
);
12443 tree v4si_ftype_v4si_v2di
12444 = build_function_type_list (V4SI_type_node
,
12445 V4SI_type_node
, V2DI_type_node
, NULL_TREE
);
12446 tree v4si_ftype_v8hi_v8hi
12447 = build_function_type_list (V4SI_type_node
,
12448 V8HI_type_node
, V8HI_type_node
, NULL_TREE
);
12449 tree di_ftype_v8qi_v8qi
12450 = build_function_type_list (long_long_unsigned_type_node
,
12451 V8QI_type_node
, V8QI_type_node
, NULL_TREE
);
12452 tree v2di_ftype_v16qi_v16qi
12453 = build_function_type_list (V2DI_type_node
,
12454 V16QI_type_node
, V16QI_type_node
, NULL_TREE
);
12455 tree int_ftype_v16qi
12456 = build_function_type_list (integer_type_node
, V16QI_type_node
, NULL_TREE
);
12457 tree v16qi_ftype_pchar
12458 = build_function_type_list (V16QI_type_node
, pchar_type_node
, NULL_TREE
);
12459 tree void_ftype_pchar_v16qi
12460 = build_function_type_list (void_type_node
,
12461 pchar_type_node
, V16QI_type_node
, NULL_TREE
);
12462 tree v4si_ftype_pchar
12463 = build_function_type_list (V4SI_type_node
, pchar_type_node
, NULL_TREE
);
12464 tree void_ftype_pchar_v4si
12465 = build_function_type_list (void_type_node
,
12466 pchar_type_node
, V4SI_type_node
, NULL_TREE
);
12467 tree v2di_ftype_v2di
12468 = build_function_type_list (V2DI_type_node
, V2DI_type_node
, NULL_TREE
);
12470 /* Add all builtins that are more or less simple operations on two
12472 for (i
= 0, d
= bdesc_2arg
; i
< ARRAY_SIZE (bdesc_2arg
); i
++, d
++)
12474 /* Use one of the operands; the target can have a different mode for
12475 mask-generating compares. */
12476 enum machine_mode mode
;
12481 mode
= insn_data
[d
->icode
].operand
[1].mode
;
12486 type
= v16qi_ftype_v16qi_v16qi
;
12489 type
= v8hi_ftype_v8hi_v8hi
;
12492 type
= v4si_ftype_v4si_v4si
;
12495 type
= v2di_ftype_v2di_v2di
;
12498 type
= v2df_ftype_v2df_v2df
;
12501 type
= ti_ftype_ti_ti
;
12504 type
= v4sf_ftype_v4sf_v4sf
;
12507 type
= v8qi_ftype_v8qi_v8qi
;
12510 type
= v4hi_ftype_v4hi_v4hi
;
12513 type
= v2si_ftype_v2si_v2si
;
12516 type
= di_ftype_di_di
;
12523 /* Override for comparisons. */
12524 if (d
->icode
== CODE_FOR_maskcmpv4sf3
12525 || d
->icode
== CODE_FOR_maskncmpv4sf3
12526 || d
->icode
== CODE_FOR_vmmaskcmpv4sf3
12527 || d
->icode
== CODE_FOR_vmmaskncmpv4sf3
)
12528 type
= v4si_ftype_v4sf_v4sf
;
12530 if (d
->icode
== CODE_FOR_maskcmpv2df3
12531 || d
->icode
== CODE_FOR_maskncmpv2df3
12532 || d
->icode
== CODE_FOR_vmmaskcmpv2df3
12533 || d
->icode
== CODE_FOR_vmmaskncmpv2df3
)
12534 type
= v2di_ftype_v2df_v2df
;
12536 def_builtin (d
->mask
, d
->name
, type
, d
->code
);
12539 /* Add the remaining MMX insns with somewhat more complicated types. */
12540 def_builtin (MASK_MMX
, "__builtin_ia32_mmx_zero", di_ftype_void
, IX86_BUILTIN_MMX_ZERO
);
12541 def_builtin (MASK_MMX
, "__builtin_ia32_emms", void_ftype_void
, IX86_BUILTIN_EMMS
);
12542 def_builtin (MASK_MMX
, "__builtin_ia32_ldmxcsr", void_ftype_unsigned
, IX86_BUILTIN_LDMXCSR
);
12543 def_builtin (MASK_MMX
, "__builtin_ia32_stmxcsr", unsigned_ftype_void
, IX86_BUILTIN_STMXCSR
);
12544 def_builtin (MASK_MMX
, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di
, IX86_BUILTIN_PSLLW
);
12545 def_builtin (MASK_MMX
, "__builtin_ia32_pslld", v2si_ftype_v2si_di
, IX86_BUILTIN_PSLLD
);
12546 def_builtin (MASK_MMX
, "__builtin_ia32_psllq", di_ftype_di_di
, IX86_BUILTIN_PSLLQ
);
12548 def_builtin (MASK_MMX
, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_di
, IX86_BUILTIN_PSRLW
);
12549 def_builtin (MASK_MMX
, "__builtin_ia32_psrld", v2si_ftype_v2si_di
, IX86_BUILTIN_PSRLD
);
12550 def_builtin (MASK_MMX
, "__builtin_ia32_psrlq", di_ftype_di_di
, IX86_BUILTIN_PSRLQ
);
12552 def_builtin (MASK_MMX
, "__builtin_ia32_psraw", v4hi_ftype_v4hi_di
, IX86_BUILTIN_PSRAW
);
12553 def_builtin (MASK_MMX
, "__builtin_ia32_psrad", v2si_ftype_v2si_di
, IX86_BUILTIN_PSRAD
);
12555 def_builtin (MASK_MMX
, "__builtin_ia32_pshufw", v4hi_ftype_v4hi_int
, IX86_BUILTIN_PSHUFW
);
12556 def_builtin (MASK_MMX
, "__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi
, IX86_BUILTIN_PMADDWD
);
12558 /* comi/ucomi insns. */
12559 for (i
= 0, d
= bdesc_comi
; i
< ARRAY_SIZE (bdesc_comi
); i
++, d
++)
12560 if (d
->mask
== MASK_SSE2
)
12561 def_builtin (d
->mask
, d
->name
, int_ftype_v2df_v2df
, d
->code
);
12563 def_builtin (d
->mask
, d
->name
, int_ftype_v4sf_v4sf
, d
->code
);
12565 def_builtin (MASK_MMX
, "__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi
, IX86_BUILTIN_PACKSSWB
);
12566 def_builtin (MASK_MMX
, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si
, IX86_BUILTIN_PACKSSDW
);
12567 def_builtin (MASK_MMX
, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi
, IX86_BUILTIN_PACKUSWB
);
12569 def_builtin (MASK_SSE1
, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si
, IX86_BUILTIN_CVTPI2PS
);
12570 def_builtin (MASK_SSE1
, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf
, IX86_BUILTIN_CVTPS2PI
);
12571 def_builtin (MASK_SSE1
, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int
, IX86_BUILTIN_CVTSI2SS
);
12572 def_builtin (MASK_SSE1
, "__builtin_ia32_cvtss2si", int_ftype_v4sf
, IX86_BUILTIN_CVTSS2SI
);
12573 def_builtin (MASK_SSE1
, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf
, IX86_BUILTIN_CVTTPS2PI
);
12574 def_builtin (MASK_SSE1
, "__builtin_ia32_cvttss2si", int_ftype_v4sf
, IX86_BUILTIN_CVTTSS2SI
);
12576 def_builtin (MASK_SSE1
| MASK_3DNOW_A
, "__builtin_ia32_pextrw", int_ftype_v4hi_int
, IX86_BUILTIN_PEXTRW
);
12577 def_builtin (MASK_SSE1
| MASK_3DNOW_A
, "__builtin_ia32_pinsrw", v4hi_ftype_v4hi_int_int
, IX86_BUILTIN_PINSRW
);
12579 def_builtin (MASK_SSE1
| MASK_3DNOW_A
, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar
, IX86_BUILTIN_MASKMOVQ
);
12581 def_builtin (MASK_SSE1
, "__builtin_ia32_loadaps", v4sf_ftype_pfloat
, IX86_BUILTIN_LOADAPS
);
12582 def_builtin (MASK_SSE1
, "__builtin_ia32_loadups", v4sf_ftype_pfloat
, IX86_BUILTIN_LOADUPS
);
12583 def_builtin (MASK_SSE1
, "__builtin_ia32_loadss", v4sf_ftype_pfloat
, IX86_BUILTIN_LOADSS
);
12584 def_builtin (MASK_SSE1
, "__builtin_ia32_storeaps", void_ftype_pfloat_v4sf
, IX86_BUILTIN_STOREAPS
);
12585 def_builtin (MASK_SSE1
, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf
, IX86_BUILTIN_STOREUPS
);
12586 def_builtin (MASK_SSE1
, "__builtin_ia32_storess", void_ftype_pfloat_v4sf
, IX86_BUILTIN_STORESS
);
12588 def_builtin (MASK_SSE1
, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si
, IX86_BUILTIN_LOADHPS
);
12589 def_builtin (MASK_SSE1
, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si
, IX86_BUILTIN_LOADLPS
);
12590 def_builtin (MASK_SSE1
, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf
, IX86_BUILTIN_STOREHPS
);
12591 def_builtin (MASK_SSE1
, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf
, IX86_BUILTIN_STORELPS
);
12593 def_builtin (MASK_SSE1
, "__builtin_ia32_movmskps", int_ftype_v4sf
, IX86_BUILTIN_MOVMSKPS
);
12594 def_builtin (MASK_SSE1
| MASK_3DNOW_A
, "__builtin_ia32_pmovmskb", int_ftype_v8qi
, IX86_BUILTIN_PMOVMSKB
);
12595 def_builtin (MASK_SSE1
, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf
, IX86_BUILTIN_MOVNTPS
);
12596 def_builtin (MASK_SSE1
| MASK_3DNOW_A
, "__builtin_ia32_movntq", void_ftype_pdi_di
, IX86_BUILTIN_MOVNTQ
);
12598 def_builtin (MASK_SSE1
| MASK_3DNOW_A
, "__builtin_ia32_sfence", void_ftype_void
, IX86_BUILTIN_SFENCE
);
12600 def_builtin (MASK_SSE1
| MASK_3DNOW_A
, "__builtin_ia32_psadbw", di_ftype_v8qi_v8qi
, IX86_BUILTIN_PSADBW
);
12602 def_builtin (MASK_SSE1
, "__builtin_ia32_rcpps", v4sf_ftype_v4sf
, IX86_BUILTIN_RCPPS
);
12603 def_builtin (MASK_SSE1
, "__builtin_ia32_rcpss", v4sf_ftype_v4sf
, IX86_BUILTIN_RCPSS
);
12604 def_builtin (MASK_SSE1
, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf
, IX86_BUILTIN_RSQRTPS
);
12605 def_builtin (MASK_SSE1
, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf
, IX86_BUILTIN_RSQRTSS
);
12606 def_builtin (MASK_SSE1
, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf
, IX86_BUILTIN_SQRTPS
);
12607 def_builtin (MASK_SSE1
, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf
, IX86_BUILTIN_SQRTSS
);
12609 def_builtin (MASK_SSE1
, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int
, IX86_BUILTIN_SHUFPS
);
12611 /* Original 3DNow! */
12612 def_builtin (MASK_3DNOW
, "__builtin_ia32_femms", void_ftype_void
, IX86_BUILTIN_FEMMS
);
12613 def_builtin (MASK_3DNOW
, "__builtin_ia32_pavgusb", v8qi_ftype_v8qi_v8qi
, IX86_BUILTIN_PAVGUSB
);
12614 def_builtin (MASK_3DNOW
, "__builtin_ia32_pf2id", v2si_ftype_v2sf
, IX86_BUILTIN_PF2ID
);
12615 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfacc", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFACC
);
12616 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfadd", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFADD
);
12617 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfcmpeq", v2si_ftype_v2sf_v2sf
, IX86_BUILTIN_PFCMPEQ
);
12618 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfcmpge", v2si_ftype_v2sf_v2sf
, IX86_BUILTIN_PFCMPGE
);
12619 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfcmpgt", v2si_ftype_v2sf_v2sf
, IX86_BUILTIN_PFCMPGT
);
12620 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfmax", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFMAX
);
12621 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfmin", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFMIN
);
12622 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfmul", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFMUL
);
12623 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfrcp", v2sf_ftype_v2sf
, IX86_BUILTIN_PFRCP
);
12624 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfrcpit1", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFRCPIT1
);
12625 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfrcpit2", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFRCPIT2
);
12626 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfrsqrt", v2sf_ftype_v2sf
, IX86_BUILTIN_PFRSQRT
);
12627 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfrsqit1", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFRSQIT1
);
12628 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfsub", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFSUB
);
12629 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFSUBR
);
12630 def_builtin (MASK_3DNOW
, "__builtin_ia32_pi2fd", v2sf_ftype_v2si
, IX86_BUILTIN_PI2FD
);
12631 def_builtin (MASK_3DNOW
, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi
, IX86_BUILTIN_PMULHRW
);
12633 /* 3DNow! extension as used in the Athlon CPU. */
12634 def_builtin (MASK_3DNOW_A
, "__builtin_ia32_pf2iw", v2si_ftype_v2sf
, IX86_BUILTIN_PF2IW
);
12635 def_builtin (MASK_3DNOW_A
, "__builtin_ia32_pfnacc", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFNACC
);
12636 def_builtin (MASK_3DNOW_A
, "__builtin_ia32_pfpnacc", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFPNACC
);
12637 def_builtin (MASK_3DNOW_A
, "__builtin_ia32_pi2fw", v2sf_ftype_v2si
, IX86_BUILTIN_PI2FW
);
12638 def_builtin (MASK_3DNOW_A
, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf
, IX86_BUILTIN_PSWAPDSF
);
12639 def_builtin (MASK_3DNOW_A
, "__builtin_ia32_pswapdsi", v2si_ftype_v2si
, IX86_BUILTIN_PSWAPDSI
);
12641 def_builtin (MASK_SSE1
, "__builtin_ia32_setzerops", v4sf_ftype_void
, IX86_BUILTIN_SSE_ZERO
);
12644 def_builtin (MASK_SSE2
, "__builtin_ia32_pextrw128", int_ftype_v8hi_int
, IX86_BUILTIN_PEXTRW128
);
12645 def_builtin (MASK_SSE2
, "__builtin_ia32_pinsrw128", v8hi_ftype_v8hi_int_int
, IX86_BUILTIN_PINSRW128
);
12647 def_builtin (MASK_SSE2
, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar
, IX86_BUILTIN_MASKMOVDQU
);
12648 def_builtin (MASK_SSE2
, "__builtin_ia32_movq2dq", v2di_ftype_di
, IX86_BUILTIN_MOVQ2DQ
);
12649 def_builtin (MASK_SSE2
, "__builtin_ia32_movdq2q", di_ftype_v2di
, IX86_BUILTIN_MOVDQ2Q
);
12651 def_builtin (MASK_SSE2
, "__builtin_ia32_loadapd", v2df_ftype_pdouble
, IX86_BUILTIN_LOADAPD
);
12652 def_builtin (MASK_SSE2
, "__builtin_ia32_loadupd", v2df_ftype_pdouble
, IX86_BUILTIN_LOADUPD
);
12653 def_builtin (MASK_SSE2
, "__builtin_ia32_loadsd", v2df_ftype_pdouble
, IX86_BUILTIN_LOADSD
);
12654 def_builtin (MASK_SSE2
, "__builtin_ia32_storeapd", void_ftype_pdouble_v2df
, IX86_BUILTIN_STOREAPD
);
12655 def_builtin (MASK_SSE2
, "__builtin_ia32_storeupd", void_ftype_pdouble_v2df
, IX86_BUILTIN_STOREUPD
);
12656 def_builtin (MASK_SSE2
, "__builtin_ia32_storesd", void_ftype_pdouble_v2df
, IX86_BUILTIN_STORESD
);
12658 def_builtin (MASK_SSE2
, "__builtin_ia32_loadhpd", v2df_ftype_v2df_pv2si
, IX86_BUILTIN_LOADHPD
);
12659 def_builtin (MASK_SSE2
, "__builtin_ia32_loadlpd", v2df_ftype_v2df_pv2si
, IX86_BUILTIN_LOADLPD
);
12660 def_builtin (MASK_SSE2
, "__builtin_ia32_storehpd", void_ftype_pv2si_v2df
, IX86_BUILTIN_STOREHPD
);
12661 def_builtin (MASK_SSE2
, "__builtin_ia32_storelpd", void_ftype_pv2si_v2df
, IX86_BUILTIN_STORELPD
);
12663 def_builtin (MASK_SSE2
, "__builtin_ia32_movmskpd", int_ftype_v2df
, IX86_BUILTIN_MOVMSKPD
);
12664 def_builtin (MASK_SSE2
, "__builtin_ia32_pmovmskb128", int_ftype_v16qi
, IX86_BUILTIN_PMOVMSKB128
);
12665 def_builtin (MASK_SSE2
, "__builtin_ia32_movnti", void_ftype_pint_int
, IX86_BUILTIN_MOVNTI
);
12666 def_builtin (MASK_SSE2
, "__builtin_ia32_movntpd", void_ftype_pdouble_v2df
, IX86_BUILTIN_MOVNTPD
);
12667 def_builtin (MASK_SSE2
, "__builtin_ia32_movntdq", void_ftype_pv2di_v2di
, IX86_BUILTIN_MOVNTDQ
);
12669 def_builtin (MASK_SSE2
, "__builtin_ia32_pshufd", v4si_ftype_v4si_int
, IX86_BUILTIN_PSHUFD
);
12670 def_builtin (MASK_SSE2
, "__builtin_ia32_pshuflw", v8hi_ftype_v8hi_int
, IX86_BUILTIN_PSHUFLW
);
12671 def_builtin (MASK_SSE2
, "__builtin_ia32_pshufhw", v8hi_ftype_v8hi_int
, IX86_BUILTIN_PSHUFHW
);
12672 def_builtin (MASK_SSE2
, "__builtin_ia32_psadbw128", v2di_ftype_v16qi_v16qi
, IX86_BUILTIN_PSADBW128
);
12674 def_builtin (MASK_SSE2
, "__builtin_ia32_sqrtpd", v2df_ftype_v2df
, IX86_BUILTIN_SQRTPD
);
12675 def_builtin (MASK_SSE2
, "__builtin_ia32_sqrtsd", v2df_ftype_v2df
, IX86_BUILTIN_SQRTSD
);
12677 def_builtin (MASK_SSE2
, "__builtin_ia32_shufpd", v2df_ftype_v2df_v2df_int
, IX86_BUILTIN_SHUFPD
);
12679 def_builtin (MASK_SSE2
, "__builtin_ia32_cvtdq2pd", v2df_ftype_v4si
, IX86_BUILTIN_CVTDQ2PD
);
12680 def_builtin (MASK_SSE2
, "__builtin_ia32_cvtdq2ps", v4sf_ftype_v4si
, IX86_BUILTIN_CVTDQ2PS
);
12682 def_builtin (MASK_SSE2
, "__builtin_ia32_cvtpd2dq", v4si_ftype_v2df
, IX86_BUILTIN_CVTPD2DQ
);
12683 def_builtin (MASK_SSE2
, "__builtin_ia32_cvtpd2pi", v2si_ftype_v2df
, IX86_BUILTIN_CVTPD2PI
);
12684 def_builtin (MASK_SSE2
, "__builtin_ia32_cvtpd2ps", v4sf_ftype_v2df
, IX86_BUILTIN_CVTPD2PS
);
12685 def_builtin (MASK_SSE2
, "__builtin_ia32_cvttpd2dq", v4si_ftype_v2df
, IX86_BUILTIN_CVTTPD2DQ
);
12686 def_builtin (MASK_SSE2
, "__builtin_ia32_cvttpd2pi", v2si_ftype_v2df
, IX86_BUILTIN_CVTTPD2PI
);
12688 def_builtin (MASK_SSE2
, "__builtin_ia32_cvtpi2pd", v2df_ftype_v2si
, IX86_BUILTIN_CVTPI2PD
);
12690 def_builtin (MASK_SSE2
, "__builtin_ia32_cvtsd2si", int_ftype_v2df
, IX86_BUILTIN_CVTSD2SI
);
12691 def_builtin (MASK_SSE2
, "__builtin_ia32_cvttsd2si", int_ftype_v2df
, IX86_BUILTIN_CVTTSD2SI
);
12693 def_builtin (MASK_SSE2
, "__builtin_ia32_cvtps2dq", v4si_ftype_v4sf
, IX86_BUILTIN_CVTPS2DQ
);
12694 def_builtin (MASK_SSE2
, "__builtin_ia32_cvtps2pd", v2df_ftype_v4sf
, IX86_BUILTIN_CVTPS2PD
);
12695 def_builtin (MASK_SSE2
, "__builtin_ia32_cvttps2dq", v4si_ftype_v4sf
, IX86_BUILTIN_CVTTPS2DQ
);
12697 def_builtin (MASK_SSE2
, "__builtin_ia32_cvtsi2sd", v2df_ftype_v2df_int
, IX86_BUILTIN_CVTSI2SD
);
12698 def_builtin (MASK_SSE2
, "__builtin_ia32_cvtsd2ss", v4sf_ftype_v4sf_v2df
, IX86_BUILTIN_CVTSD2SS
);
12699 def_builtin (MASK_SSE2
, "__builtin_ia32_cvtss2sd", v2df_ftype_v2df_v4sf
, IX86_BUILTIN_CVTSS2SD
);
12701 def_builtin (MASK_SSE2
, "__builtin_ia32_setpd1", v2df_ftype_double
, IX86_BUILTIN_SETPD1
);
12702 def_builtin (MASK_SSE2
, "__builtin_ia32_setpd", v2df_ftype_double_double
, IX86_BUILTIN_SETPD
);
12703 def_builtin (MASK_SSE2
, "__builtin_ia32_setzeropd", ti_ftype_void
, IX86_BUILTIN_CLRPD
);
12704 def_builtin (MASK_SSE2
, "__builtin_ia32_loadpd1", v2df_ftype_pdouble
, IX86_BUILTIN_LOADPD1
);
12705 def_builtin (MASK_SSE2
, "__builtin_ia32_loadrpd", v2df_ftype_pdouble
, IX86_BUILTIN_LOADRPD
);
12706 def_builtin (MASK_SSE2
, "__builtin_ia32_storepd1", void_ftype_pdouble_v2df
, IX86_BUILTIN_STOREPD1
);
12707 def_builtin (MASK_SSE2
, "__builtin_ia32_storerpd", void_ftype_pdouble_v2df
, IX86_BUILTIN_STORERPD
);
12709 def_builtin (MASK_SSE2
, "__builtin_ia32_clflush", void_ftype_pvoid
, IX86_BUILTIN_CLFLUSH
);
12710 def_builtin (MASK_SSE2
, "__builtin_ia32_lfence", void_ftype_void
, IX86_BUILTIN_LFENCE
);
12711 def_builtin (MASK_SSE2
, "__builtin_ia32_mfence", void_ftype_void
, IX86_BUILTIN_MFENCE
);
12713 def_builtin (MASK_SSE2
, "__builtin_ia32_loaddqa", v16qi_ftype_pchar
, IX86_BUILTIN_LOADDQA
);
12714 def_builtin (MASK_SSE2
, "__builtin_ia32_loaddqu", v16qi_ftype_pchar
, IX86_BUILTIN_LOADDQU
);
12715 def_builtin (MASK_SSE2
, "__builtin_ia32_loadd", v4si_ftype_pchar
, IX86_BUILTIN_LOADD
);
12716 def_builtin (MASK_SSE2
, "__builtin_ia32_storedqa", void_ftype_pchar_v16qi
, IX86_BUILTIN_STOREDQA
);
12717 def_builtin (MASK_SSE2
, "__builtin_ia32_storedqu", void_ftype_pchar_v16qi
, IX86_BUILTIN_STOREDQU
);
12718 def_builtin (MASK_SSE2
, "__builtin_ia32_stored", void_ftype_pchar_v4si
, IX86_BUILTIN_STORED
);
12719 def_builtin (MASK_SSE2
, "__builtin_ia32_movq", v2di_ftype_v2di
, IX86_BUILTIN_MOVQ
);
12721 def_builtin (MASK_SSE1
, "__builtin_ia32_setzero128", v2di_ftype_void
, IX86_BUILTIN_CLRTI
);
12723 def_builtin (MASK_SSE2
, "__builtin_ia32_psllw128", v8hi_ftype_v8hi_v2di
, IX86_BUILTIN_PSLLW128
);
12724 def_builtin (MASK_SSE2
, "__builtin_ia32_pslld128", v4si_ftype_v4si_v2di
, IX86_BUILTIN_PSLLD128
);
12725 def_builtin (MASK_SSE2
, "__builtin_ia32_psllq128", v2di_ftype_v2di_v2di
, IX86_BUILTIN_PSLLQ128
);
12727 def_builtin (MASK_SSE2
, "__builtin_ia32_psrlw128", v8hi_ftype_v8hi_v2di
, IX86_BUILTIN_PSRLW128
);
12728 def_builtin (MASK_SSE2
, "__builtin_ia32_psrld128", v4si_ftype_v4si_v2di
, IX86_BUILTIN_PSRLD128
);
12729 def_builtin (MASK_SSE2
, "__builtin_ia32_psrlq128", v2di_ftype_v2di_v2di
, IX86_BUILTIN_PSRLQ128
);
12731 def_builtin (MASK_SSE2
, "__builtin_ia32_psraw128", v8hi_ftype_v8hi_v2di
, IX86_BUILTIN_PSRAW128
);
12732 def_builtin (MASK_SSE2
, "__builtin_ia32_psrad128", v4si_ftype_v4si_v2di
, IX86_BUILTIN_PSRAD128
);
12734 def_builtin (MASK_SSE2
, "__builtin_ia32_pslldqi128", v2di_ftype_v2di_int
, IX86_BUILTIN_PSLLDQI128
);
12735 def_builtin (MASK_SSE2
, "__builtin_ia32_psllwi128", v8hi_ftype_v8hi_int
, IX86_BUILTIN_PSLLWI128
);
12736 def_builtin (MASK_SSE2
, "__builtin_ia32_pslldi128", v4si_ftype_v4si_int
, IX86_BUILTIN_PSLLDI128
);
12737 def_builtin (MASK_SSE2
, "__builtin_ia32_psllqi128", v2di_ftype_v2di_int
, IX86_BUILTIN_PSLLQI128
);
12739 def_builtin (MASK_SSE2
, "__builtin_ia32_psrldqi128", v2di_ftype_v2di_int
, IX86_BUILTIN_PSRLDQI128
);
12740 def_builtin (MASK_SSE2
, "__builtin_ia32_psrlwi128", v8hi_ftype_v8hi_int
, IX86_BUILTIN_PSRLWI128
);
12741 def_builtin (MASK_SSE2
, "__builtin_ia32_psrldi128", v4si_ftype_v4si_int
, IX86_BUILTIN_PSRLDI128
);
12742 def_builtin (MASK_SSE2
, "__builtin_ia32_psrlqi128", v2di_ftype_v2di_int
, IX86_BUILTIN_PSRLQI128
);
12744 def_builtin (MASK_SSE2
, "__builtin_ia32_psrawi128", v8hi_ftype_v8hi_int
, IX86_BUILTIN_PSRAWI128
);
12745 def_builtin (MASK_SSE2
, "__builtin_ia32_psradi128", v4si_ftype_v4si_int
, IX86_BUILTIN_PSRADI128
);
12747 def_builtin (MASK_SSE2
, "__builtin_ia32_pmaddwd128", v4si_ftype_v8hi_v8hi
, IX86_BUILTIN_PMADDWD128
);
12750 /* Errors in the source file can cause expand_expr to return const0_rtx
12751 where we expect a vector. To avoid crashing, use one of the vector
12752 clear instructions. */
12754 safe_vector_operand (x
, mode
)
12756 enum machine_mode mode
;
12758 if (x
!= const0_rtx
)
12760 x
= gen_reg_rtx (mode
);
12762 if (VALID_MMX_REG_MODE (mode
) || VALID_MMX_REG_MODE_3DNOW (mode
))
12763 emit_insn (gen_mmx_clrdi (mode
== DImode
? x
12764 : gen_rtx_SUBREG (DImode
, x
, 0)));
12766 emit_insn (gen_sse_clrv4sf (mode
== V4SFmode
? x
12767 : gen_rtx_SUBREG (V4SFmode
, x
, 0)));
12771 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
12774 ix86_expand_binop_builtin (icode
, arglist
, target
)
12775 enum insn_code icode
;
12780 tree arg0
= TREE_VALUE (arglist
);
12781 tree arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
12782 rtx op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
12783 rtx op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
12784 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
12785 enum machine_mode mode0
= insn_data
[icode
].operand
[1].mode
;
12786 enum machine_mode mode1
= insn_data
[icode
].operand
[2].mode
;
12788 if (VECTOR_MODE_P (mode0
))
12789 op0
= safe_vector_operand (op0
, mode0
);
12790 if (VECTOR_MODE_P (mode1
))
12791 op1
= safe_vector_operand (op1
, mode1
);
12794 || GET_MODE (target
) != tmode
12795 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
12796 target
= gen_reg_rtx (tmode
);
12798 /* In case the insn wants input operands in modes different from
12799 the result, abort. */
12800 if (GET_MODE (op0
) != mode0
|| GET_MODE (op1
) != mode1
)
12803 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
12804 op0
= copy_to_mode_reg (mode0
, op0
);
12805 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
12806 op1
= copy_to_mode_reg (mode1
, op1
);
12808 /* In the commutative cases, both op0 and op1 are nonimmediate_operand,
12809 yet one of the two must not be a memory. This is normally enforced
12810 by expanders, but we didn't bother to create one here. */
12811 if (GET_CODE (op0
) == MEM
&& GET_CODE (op1
) == MEM
)
12812 op0
= copy_to_mode_reg (mode0
, op0
);
12814 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
12821 /* Subroutine of ix86_expand_builtin to take care of stores. */
12824 ix86_expand_store_builtin (icode
, arglist
)
12825 enum insn_code icode
;
12829 tree arg0
= TREE_VALUE (arglist
);
12830 tree arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
12831 rtx op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
12832 rtx op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
12833 enum machine_mode mode0
= insn_data
[icode
].operand
[0].mode
;
12834 enum machine_mode mode1
= insn_data
[icode
].operand
[1].mode
;
12836 if (VECTOR_MODE_P (mode1
))
12837 op1
= safe_vector_operand (op1
, mode1
);
12839 op0
= gen_rtx_MEM (mode0
, copy_to_mode_reg (Pmode
, op0
));
12841 if (! (*insn_data
[icode
].operand
[1].predicate
) (op1
, mode1
))
12842 op1
= copy_to_mode_reg (mode1
, op1
);
12844 pat
= GEN_FCN (icode
) (op0
, op1
);
12850 /* Subroutine of ix86_expand_builtin to take care of unop insns. */
12853 ix86_expand_unop_builtin (icode
, arglist
, target
, do_load
)
12854 enum insn_code icode
;
12860 tree arg0
= TREE_VALUE (arglist
);
12861 rtx op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
12862 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
12863 enum machine_mode mode0
= insn_data
[icode
].operand
[1].mode
;
12866 || GET_MODE (target
) != tmode
12867 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
12868 target
= gen_reg_rtx (tmode
);
12870 op0
= gen_rtx_MEM (mode0
, copy_to_mode_reg (Pmode
, op0
));
12873 if (VECTOR_MODE_P (mode0
))
12874 op0
= safe_vector_operand (op0
, mode0
);
12876 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
12877 op0
= copy_to_mode_reg (mode0
, op0
);
12880 pat
= GEN_FCN (icode
) (target
, op0
);
12887 /* Subroutine of ix86_expand_builtin to take care of three special unop insns:
12888 sqrtss, rsqrtss, rcpss. */
12891 ix86_expand_unop1_builtin (icode
, arglist
, target
)
12892 enum insn_code icode
;
12897 tree arg0
= TREE_VALUE (arglist
);
12898 rtx op1
, op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
12899 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
12900 enum machine_mode mode0
= insn_data
[icode
].operand
[1].mode
;
12903 || GET_MODE (target
) != tmode
12904 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
12905 target
= gen_reg_rtx (tmode
);
12907 if (VECTOR_MODE_P (mode0
))
12908 op0
= safe_vector_operand (op0
, mode0
);
12910 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
12911 op0
= copy_to_mode_reg (mode0
, op0
);
12914 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode0
))
12915 op1
= copy_to_mode_reg (mode0
, op1
);
12917 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
12924 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
12927 ix86_expand_sse_compare (d
, arglist
, target
)
12928 const struct builtin_description
*d
;
12933 tree arg0
= TREE_VALUE (arglist
);
12934 tree arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
12935 rtx op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
12936 rtx op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
12938 enum machine_mode tmode
= insn_data
[d
->icode
].operand
[0].mode
;
12939 enum machine_mode mode0
= insn_data
[d
->icode
].operand
[1].mode
;
12940 enum machine_mode mode1
= insn_data
[d
->icode
].operand
[2].mode
;
12941 enum rtx_code comparison
= d
->comparison
;
12943 if (VECTOR_MODE_P (mode0
))
12944 op0
= safe_vector_operand (op0
, mode0
);
12945 if (VECTOR_MODE_P (mode1
))
12946 op1
= safe_vector_operand (op1
, mode1
);
12948 /* Swap operands if we have a comparison that isn't available in
12952 rtx tmp
= gen_reg_rtx (mode1
);
12953 emit_move_insn (tmp
, op1
);
12959 || GET_MODE (target
) != tmode
12960 || ! (*insn_data
[d
->icode
].operand
[0].predicate
) (target
, tmode
))
12961 target
= gen_reg_rtx (tmode
);
12963 if (! (*insn_data
[d
->icode
].operand
[1].predicate
) (op0
, mode0
))
12964 op0
= copy_to_mode_reg (mode0
, op0
);
12965 if (! (*insn_data
[d
->icode
].operand
[2].predicate
) (op1
, mode1
))
12966 op1
= copy_to_mode_reg (mode1
, op1
);
12968 op2
= gen_rtx_fmt_ee (comparison
, mode0
, op0
, op1
);
12969 pat
= GEN_FCN (d
->icode
) (target
, op0
, op1
, op2
);
12976 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
12979 ix86_expand_sse_comi (d
, arglist
, target
)
12980 const struct builtin_description
*d
;
12985 tree arg0
= TREE_VALUE (arglist
);
12986 tree arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
12987 rtx op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
12988 rtx op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
12990 enum machine_mode mode0
= insn_data
[d
->icode
].operand
[0].mode
;
12991 enum machine_mode mode1
= insn_data
[d
->icode
].operand
[1].mode
;
12992 enum rtx_code comparison
= d
->comparison
;
12994 if (VECTOR_MODE_P (mode0
))
12995 op0
= safe_vector_operand (op0
, mode0
);
12996 if (VECTOR_MODE_P (mode1
))
12997 op1
= safe_vector_operand (op1
, mode1
);
12999 /* Swap operands if we have a comparison that isn't available in
13008 target
= gen_reg_rtx (SImode
);
13009 emit_move_insn (target
, const0_rtx
);
13010 target
= gen_rtx_SUBREG (QImode
, target
, 0);
13012 if (! (*insn_data
[d
->icode
].operand
[0].predicate
) (op0
, mode0
))
13013 op0
= copy_to_mode_reg (mode0
, op0
);
13014 if (! (*insn_data
[d
->icode
].operand
[1].predicate
) (op1
, mode1
))
13015 op1
= copy_to_mode_reg (mode1
, op1
);
13017 op2
= gen_rtx_fmt_ee (comparison
, mode0
, op0
, op1
);
13018 pat
= GEN_FCN (d
->icode
) (op0
, op1
);
13022 emit_insn (gen_rtx_SET (VOIDmode
,
13023 gen_rtx_STRICT_LOW_PART (VOIDmode
, target
),
13024 gen_rtx_fmt_ee (comparison
, QImode
,
13028 return SUBREG_REG (target
);
13031 /* Expand an expression EXP that calls a built-in function,
13032 with result going to TARGET if that's convenient
13033 (and in mode MODE if that's convenient).
13034 SUBTARGET may be used as the target for computing one of EXP's operands.
13035 IGNORE is nonzero if the value is to be ignored. */
13038 ix86_expand_builtin (exp
, target
, subtarget
, mode
, ignore
)
13041 rtx subtarget ATTRIBUTE_UNUSED
;
13042 enum machine_mode mode ATTRIBUTE_UNUSED
;
13043 int ignore ATTRIBUTE_UNUSED
;
13045 const struct builtin_description
*d
;
13047 enum insn_code icode
;
13048 tree fndecl
= TREE_OPERAND (TREE_OPERAND (exp
, 0), 0);
13049 tree arglist
= TREE_OPERAND (exp
, 1);
13050 tree arg0
, arg1
, arg2
;
13051 rtx op0
, op1
, op2
, pat
;
13052 enum machine_mode tmode
, mode0
, mode1
, mode2
;
13053 unsigned int fcode
= DECL_FUNCTION_CODE (fndecl
);
13057 case IX86_BUILTIN_EMMS
:
13058 emit_insn (gen_emms ());
13061 case IX86_BUILTIN_SFENCE
:
13062 emit_insn (gen_sfence ());
13065 case IX86_BUILTIN_PEXTRW
:
13066 case IX86_BUILTIN_PEXTRW128
:
13067 icode
= (fcode
== IX86_BUILTIN_PEXTRW
13068 ? CODE_FOR_mmx_pextrw
13069 : CODE_FOR_sse2_pextrw
);
13070 arg0
= TREE_VALUE (arglist
);
13071 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
13072 op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
13073 op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
13074 tmode
= insn_data
[icode
].operand
[0].mode
;
13075 mode0
= insn_data
[icode
].operand
[1].mode
;
13076 mode1
= insn_data
[icode
].operand
[2].mode
;
13078 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
13079 op0
= copy_to_mode_reg (mode0
, op0
);
13080 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
13082 /* @@@ better error message */
13083 error ("selector must be an immediate");
13084 return gen_reg_rtx (tmode
);
13087 || GET_MODE (target
) != tmode
13088 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
13089 target
= gen_reg_rtx (tmode
);
13090 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
13096 case IX86_BUILTIN_PINSRW
:
13097 case IX86_BUILTIN_PINSRW128
:
13098 icode
= (fcode
== IX86_BUILTIN_PINSRW
13099 ? CODE_FOR_mmx_pinsrw
13100 : CODE_FOR_sse2_pinsrw
);
13101 arg0
= TREE_VALUE (arglist
);
13102 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
13103 arg2
= TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist
)));
13104 op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
13105 op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
13106 op2
= expand_expr (arg2
, NULL_RTX
, VOIDmode
, 0);
13107 tmode
= insn_data
[icode
].operand
[0].mode
;
13108 mode0
= insn_data
[icode
].operand
[1].mode
;
13109 mode1
= insn_data
[icode
].operand
[2].mode
;
13110 mode2
= insn_data
[icode
].operand
[3].mode
;
13112 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
13113 op0
= copy_to_mode_reg (mode0
, op0
);
13114 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
13115 op1
= copy_to_mode_reg (mode1
, op1
);
13116 if (! (*insn_data
[icode
].operand
[3].predicate
) (op2
, mode2
))
13118 /* @@@ better error message */
13119 error ("selector must be an immediate");
13123 || GET_MODE (target
) != tmode
13124 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
13125 target
= gen_reg_rtx (tmode
);
13126 pat
= GEN_FCN (icode
) (target
, op0
, op1
, op2
);
13132 case IX86_BUILTIN_MASKMOVQ
:
13133 case IX86_BUILTIN_MASKMOVDQU
:
13134 icode
= (fcode
== IX86_BUILTIN_MASKMOVQ
13135 ? (TARGET_64BIT
? CODE_FOR_mmx_maskmovq_rex
: CODE_FOR_mmx_maskmovq
)
13136 : (TARGET_64BIT
? CODE_FOR_sse2_maskmovdqu_rex64
13137 : CODE_FOR_sse2_maskmovdqu
));
13138 /* Note the arg order is different from the operand order. */
13139 arg1
= TREE_VALUE (arglist
);
13140 arg2
= TREE_VALUE (TREE_CHAIN (arglist
));
13141 arg0
= TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist
)));
13142 op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
13143 op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
13144 op2
= expand_expr (arg2
, NULL_RTX
, VOIDmode
, 0);
13145 mode0
= insn_data
[icode
].operand
[0].mode
;
13146 mode1
= insn_data
[icode
].operand
[1].mode
;
13147 mode2
= insn_data
[icode
].operand
[2].mode
;
13149 if (! (*insn_data
[icode
].operand
[0].predicate
) (op0
, mode0
))
13150 op0
= copy_to_mode_reg (mode0
, op0
);
13151 if (! (*insn_data
[icode
].operand
[1].predicate
) (op1
, mode1
))
13152 op1
= copy_to_mode_reg (mode1
, op1
);
13153 if (! (*insn_data
[icode
].operand
[2].predicate
) (op2
, mode2
))
13154 op2
= copy_to_mode_reg (mode2
, op2
);
13155 pat
= GEN_FCN (icode
) (op0
, op1
, op2
);
13161 case IX86_BUILTIN_SQRTSS
:
13162 return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv4sf2
, arglist
, target
);
13163 case IX86_BUILTIN_RSQRTSS
:
13164 return ix86_expand_unop1_builtin (CODE_FOR_vmrsqrtv4sf2
, arglist
, target
);
13165 case IX86_BUILTIN_RCPSS
:
13166 return ix86_expand_unop1_builtin (CODE_FOR_vmrcpv4sf2
, arglist
, target
);
13168 case IX86_BUILTIN_LOADAPS
:
13169 return ix86_expand_unop_builtin (CODE_FOR_sse_movaps
, arglist
, target
, 1);
13171 case IX86_BUILTIN_LOADUPS
:
13172 return ix86_expand_unop_builtin (CODE_FOR_sse_movups
, arglist
, target
, 1);
13174 case IX86_BUILTIN_STOREAPS
:
13175 return ix86_expand_store_builtin (CODE_FOR_sse_movaps
, arglist
);
13177 case IX86_BUILTIN_STOREUPS
:
13178 return ix86_expand_store_builtin (CODE_FOR_sse_movups
, arglist
);
13180 case IX86_BUILTIN_LOADSS
:
13181 return ix86_expand_unop_builtin (CODE_FOR_sse_loadss
, arglist
, target
, 1);
13183 case IX86_BUILTIN_STORESS
:
13184 return ix86_expand_store_builtin (CODE_FOR_sse_storess
, arglist
);
13186 case IX86_BUILTIN_LOADHPS
:
13187 case IX86_BUILTIN_LOADLPS
:
13188 case IX86_BUILTIN_LOADHPD
:
13189 case IX86_BUILTIN_LOADLPD
:
13190 icode
= (fcode
== IX86_BUILTIN_LOADHPS
? CODE_FOR_sse_movhps
13191 : fcode
== IX86_BUILTIN_LOADLPS
? CODE_FOR_sse_movlps
13192 : fcode
== IX86_BUILTIN_LOADHPD
? CODE_FOR_sse2_movhpd
13193 : CODE_FOR_sse2_movlpd
);
13194 arg0
= TREE_VALUE (arglist
);
13195 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
13196 op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
13197 op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
13198 tmode
= insn_data
[icode
].operand
[0].mode
;
13199 mode0
= insn_data
[icode
].operand
[1].mode
;
13200 mode1
= insn_data
[icode
].operand
[2].mode
;
13202 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
13203 op0
= copy_to_mode_reg (mode0
, op0
);
13204 op1
= gen_rtx_MEM (mode1
, copy_to_mode_reg (Pmode
, op1
));
13206 || GET_MODE (target
) != tmode
13207 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
13208 target
= gen_reg_rtx (tmode
);
13209 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
13215 case IX86_BUILTIN_STOREHPS
:
13216 case IX86_BUILTIN_STORELPS
:
13217 case IX86_BUILTIN_STOREHPD
:
13218 case IX86_BUILTIN_STORELPD
:
13219 icode
= (fcode
== IX86_BUILTIN_STOREHPS
? CODE_FOR_sse_movhps
13220 : fcode
== IX86_BUILTIN_STORELPS
? CODE_FOR_sse_movlps
13221 : fcode
== IX86_BUILTIN_STOREHPD
? CODE_FOR_sse2_movhpd
13222 : CODE_FOR_sse2_movlpd
);
13223 arg0
= TREE_VALUE (arglist
);
13224 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
13225 op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
13226 op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
13227 mode0
= insn_data
[icode
].operand
[1].mode
;
13228 mode1
= insn_data
[icode
].operand
[2].mode
;
13230 op0
= gen_rtx_MEM (mode0
, copy_to_mode_reg (Pmode
, op0
));
13231 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
13232 op1
= copy_to_mode_reg (mode1
, op1
);
13234 pat
= GEN_FCN (icode
) (op0
, op0
, op1
);
13240 case IX86_BUILTIN_MOVNTPS
:
13241 return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf
, arglist
);
13242 case IX86_BUILTIN_MOVNTQ
:
13243 return ix86_expand_store_builtin (CODE_FOR_sse_movntdi
, arglist
);
13245 case IX86_BUILTIN_LDMXCSR
:
13246 op0
= expand_expr (TREE_VALUE (arglist
), NULL_RTX
, VOIDmode
, 0);
13247 target
= assign_386_stack_local (SImode
, 0);
13248 emit_move_insn (target
, op0
);
13249 emit_insn (gen_ldmxcsr (target
));
13252 case IX86_BUILTIN_STMXCSR
:
13253 target
= assign_386_stack_local (SImode
, 0);
13254 emit_insn (gen_stmxcsr (target
));
13255 return copy_to_mode_reg (SImode
, target
);
13257 case IX86_BUILTIN_SHUFPS
:
13258 case IX86_BUILTIN_SHUFPD
:
13259 icode
= (fcode
== IX86_BUILTIN_SHUFPS
13260 ? CODE_FOR_sse_shufps
13261 : CODE_FOR_sse2_shufpd
);
13262 arg0
= TREE_VALUE (arglist
);
13263 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
13264 arg2
= TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist
)));
13265 op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
13266 op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
13267 op2
= expand_expr (arg2
, NULL_RTX
, VOIDmode
, 0);
13268 tmode
= insn_data
[icode
].operand
[0].mode
;
13269 mode0
= insn_data
[icode
].operand
[1].mode
;
13270 mode1
= insn_data
[icode
].operand
[2].mode
;
13271 mode2
= insn_data
[icode
].operand
[3].mode
;
13273 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
13274 op0
= copy_to_mode_reg (mode0
, op0
);
13275 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
13276 op1
= copy_to_mode_reg (mode1
, op1
);
13277 if (! (*insn_data
[icode
].operand
[3].predicate
) (op2
, mode2
))
13279 /* @@@ better error message */
13280 error ("mask must be an immediate");
13281 return gen_reg_rtx (tmode
);
13284 || GET_MODE (target
) != tmode
13285 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
13286 target
= gen_reg_rtx (tmode
);
13287 pat
= GEN_FCN (icode
) (target
, op0
, op1
, op2
);
13293 case IX86_BUILTIN_PSHUFW
:
13294 case IX86_BUILTIN_PSHUFD
:
13295 case IX86_BUILTIN_PSHUFHW
:
13296 case IX86_BUILTIN_PSHUFLW
:
13297 icode
= ( fcode
== IX86_BUILTIN_PSHUFHW
? CODE_FOR_sse2_pshufhw
13298 : fcode
== IX86_BUILTIN_PSHUFLW
? CODE_FOR_sse2_pshuflw
13299 : fcode
== IX86_BUILTIN_PSHUFD
? CODE_FOR_sse2_pshufd
13300 : CODE_FOR_mmx_pshufw
);
13301 arg0
= TREE_VALUE (arglist
);
13302 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
13303 op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
13304 op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
13305 tmode
= insn_data
[icode
].operand
[0].mode
;
13306 mode1
= insn_data
[icode
].operand
[1].mode
;
13307 mode2
= insn_data
[icode
].operand
[2].mode
;
13309 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode1
))
13310 op0
= copy_to_mode_reg (mode1
, op0
);
13311 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode2
))
13313 /* @@@ better error message */
13314 error ("mask must be an immediate");
13318 || GET_MODE (target
) != tmode
13319 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
13320 target
= gen_reg_rtx (tmode
);
13321 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
13327 case IX86_BUILTIN_PSLLDQI128
:
13328 case IX86_BUILTIN_PSRLDQI128
:
13329 icode
= ( fcode
== IX86_BUILTIN_PSLLDQI128
? CODE_FOR_sse2_ashlti3
13330 : CODE_FOR_sse2_lshrti3
);
13331 arg0
= TREE_VALUE (arglist
);
13332 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
13333 op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
13334 op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
13335 tmode
= insn_data
[icode
].operand
[0].mode
;
13336 mode1
= insn_data
[icode
].operand
[1].mode
;
13337 mode2
= insn_data
[icode
].operand
[2].mode
;
13339 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode1
))
13341 op0
= copy_to_reg (op0
);
13342 op0
= simplify_gen_subreg (mode1
, op0
, GET_MODE (op0
), 0);
13344 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode2
))
13346 error ("shift must be an immediate");
13349 target
= gen_reg_rtx (V2DImode
);
13350 pat
= GEN_FCN (icode
) (simplify_gen_subreg (tmode
, target
, V2DImode
, 0), op0
, op1
);
13356 case IX86_BUILTIN_FEMMS
:
13357 emit_insn (gen_femms ());
13360 case IX86_BUILTIN_PAVGUSB
:
13361 return ix86_expand_binop_builtin (CODE_FOR_pavgusb
, arglist
, target
);
13363 case IX86_BUILTIN_PF2ID
:
13364 return ix86_expand_unop_builtin (CODE_FOR_pf2id
, arglist
, target
, 0);
13366 case IX86_BUILTIN_PFACC
:
13367 return ix86_expand_binop_builtin (CODE_FOR_pfacc
, arglist
, target
);
13369 case IX86_BUILTIN_PFADD
:
13370 return ix86_expand_binop_builtin (CODE_FOR_addv2sf3
, arglist
, target
);
13372 case IX86_BUILTIN_PFCMPEQ
:
13373 return ix86_expand_binop_builtin (CODE_FOR_eqv2sf3
, arglist
, target
);
13375 case IX86_BUILTIN_PFCMPGE
:
13376 return ix86_expand_binop_builtin (CODE_FOR_gev2sf3
, arglist
, target
);
13378 case IX86_BUILTIN_PFCMPGT
:
13379 return ix86_expand_binop_builtin (CODE_FOR_gtv2sf3
, arglist
, target
);
13381 case IX86_BUILTIN_PFMAX
:
13382 return ix86_expand_binop_builtin (CODE_FOR_pfmaxv2sf3
, arglist
, target
);
13384 case IX86_BUILTIN_PFMIN
:
13385 return ix86_expand_binop_builtin (CODE_FOR_pfminv2sf3
, arglist
, target
);
13387 case IX86_BUILTIN_PFMUL
:
13388 return ix86_expand_binop_builtin (CODE_FOR_mulv2sf3
, arglist
, target
);
13390 case IX86_BUILTIN_PFRCP
:
13391 return ix86_expand_unop_builtin (CODE_FOR_pfrcpv2sf2
, arglist
, target
, 0);
13393 case IX86_BUILTIN_PFRCPIT1
:
13394 return ix86_expand_binop_builtin (CODE_FOR_pfrcpit1v2sf3
, arglist
, target
);
13396 case IX86_BUILTIN_PFRCPIT2
:
13397 return ix86_expand_binop_builtin (CODE_FOR_pfrcpit2v2sf3
, arglist
, target
);
13399 case IX86_BUILTIN_PFRSQIT1
:
13400 return ix86_expand_binop_builtin (CODE_FOR_pfrsqit1v2sf3
, arglist
, target
);
13402 case IX86_BUILTIN_PFRSQRT
:
13403 return ix86_expand_unop_builtin (CODE_FOR_pfrsqrtv2sf2
, arglist
, target
, 0);
13405 case IX86_BUILTIN_PFSUB
:
13406 return ix86_expand_binop_builtin (CODE_FOR_subv2sf3
, arglist
, target
);
13408 case IX86_BUILTIN_PFSUBR
:
13409 return ix86_expand_binop_builtin (CODE_FOR_subrv2sf3
, arglist
, target
);
13411 case IX86_BUILTIN_PI2FD
:
13412 return ix86_expand_unop_builtin (CODE_FOR_floatv2si2
, arglist
, target
, 0);
13414 case IX86_BUILTIN_PMULHRW
:
13415 return ix86_expand_binop_builtin (CODE_FOR_pmulhrwv4hi3
, arglist
, target
);
13417 case IX86_BUILTIN_PF2IW
:
13418 return ix86_expand_unop_builtin (CODE_FOR_pf2iw
, arglist
, target
, 0);
13420 case IX86_BUILTIN_PFNACC
:
13421 return ix86_expand_binop_builtin (CODE_FOR_pfnacc
, arglist
, target
);
13423 case IX86_BUILTIN_PFPNACC
:
13424 return ix86_expand_binop_builtin (CODE_FOR_pfpnacc
, arglist
, target
);
13426 case IX86_BUILTIN_PI2FW
:
13427 return ix86_expand_unop_builtin (CODE_FOR_pi2fw
, arglist
, target
, 0);
13429 case IX86_BUILTIN_PSWAPDSI
:
13430 return ix86_expand_unop_builtin (CODE_FOR_pswapdv2si2
, arglist
, target
, 0);
13432 case IX86_BUILTIN_PSWAPDSF
:
13433 return ix86_expand_unop_builtin (CODE_FOR_pswapdv2sf2
, arglist
, target
, 0);
13435 case IX86_BUILTIN_SSE_ZERO
:
13436 target
= gen_reg_rtx (V4SFmode
);
13437 emit_insn (gen_sse_clrv4sf (target
));
13440 case IX86_BUILTIN_MMX_ZERO
:
13441 target
= gen_reg_rtx (DImode
);
13442 emit_insn (gen_mmx_clrdi (target
));
13445 case IX86_BUILTIN_CLRTI
:
13446 target
= gen_reg_rtx (V2DImode
);
13447 emit_insn (gen_sse2_clrti (simplify_gen_subreg (TImode
, target
, V2DImode
, 0)));
13451 case IX86_BUILTIN_SQRTSD
:
13452 return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv2df2
, arglist
, target
);
13453 case IX86_BUILTIN_LOADAPD
:
13454 return ix86_expand_unop_builtin (CODE_FOR_sse2_movapd
, arglist
, target
, 1);
13455 case IX86_BUILTIN_LOADUPD
:
13456 return ix86_expand_unop_builtin (CODE_FOR_sse2_movupd
, arglist
, target
, 1);
13458 case IX86_BUILTIN_STOREAPD
:
13459 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd
, arglist
);
13460 case IX86_BUILTIN_STOREUPD
:
13461 return ix86_expand_store_builtin (CODE_FOR_sse2_movupd
, arglist
);
13463 case IX86_BUILTIN_LOADSD
:
13464 return ix86_expand_unop_builtin (CODE_FOR_sse2_loadsd
, arglist
, target
, 1);
13466 case IX86_BUILTIN_STORESD
:
13467 return ix86_expand_store_builtin (CODE_FOR_sse2_storesd
, arglist
);
13469 case IX86_BUILTIN_SETPD1
:
13470 target
= assign_386_stack_local (DFmode
, 0);
13471 arg0
= TREE_VALUE (arglist
);
13472 emit_move_insn (adjust_address (target
, DFmode
, 0),
13473 expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0));
13474 op0
= gen_reg_rtx (V2DFmode
);
13475 emit_insn (gen_sse2_loadsd (op0
, adjust_address (target
, V2DFmode
, 0)));
13476 emit_insn (gen_sse2_shufpd (op0
, op0
, op0
, GEN_INT (0)));
13479 case IX86_BUILTIN_SETPD
:
13480 target
= assign_386_stack_local (V2DFmode
, 0);
13481 arg0
= TREE_VALUE (arglist
);
13482 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
13483 emit_move_insn (adjust_address (target
, DFmode
, 0),
13484 expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0));
13485 emit_move_insn (adjust_address (target
, DFmode
, 8),
13486 expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0));
13487 op0
= gen_reg_rtx (V2DFmode
);
13488 emit_insn (gen_sse2_movapd (op0
, target
));
13491 case IX86_BUILTIN_LOADRPD
:
13492 target
= ix86_expand_unop_builtin (CODE_FOR_sse2_movapd
, arglist
,
13493 gen_reg_rtx (V2DFmode
), 1);
13494 emit_insn (gen_sse2_shufpd (target
, target
, target
, GEN_INT (1)));
13497 case IX86_BUILTIN_LOADPD1
:
13498 target
= ix86_expand_unop_builtin (CODE_FOR_sse2_loadsd
, arglist
,
13499 gen_reg_rtx (V2DFmode
), 1);
13500 emit_insn (gen_sse2_shufpd (target
, target
, target
, const0_rtx
));
13503 case IX86_BUILTIN_STOREPD1
:
13504 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd
, arglist
);
13505 case IX86_BUILTIN_STORERPD
:
13506 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd
, arglist
);
13508 case IX86_BUILTIN_CLRPD
:
13509 target
= gen_reg_rtx (V2DFmode
);
13510 emit_insn (gen_sse_clrv2df (target
));
13513 case IX86_BUILTIN_MFENCE
:
13514 emit_insn (gen_sse2_mfence ());
13516 case IX86_BUILTIN_LFENCE
:
13517 emit_insn (gen_sse2_lfence ());
13520 case IX86_BUILTIN_CLFLUSH
:
13521 arg0
= TREE_VALUE (arglist
);
13522 op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
13523 icode
= CODE_FOR_sse2_clflush
;
13524 if (! (*insn_data
[icode
].operand
[0].predicate
) (op0
, Pmode
))
13525 op0
= copy_to_mode_reg (Pmode
, op0
);
13527 emit_insn (gen_sse2_clflush (op0
));
13530 case IX86_BUILTIN_MOVNTPD
:
13531 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2df
, arglist
);
13532 case IX86_BUILTIN_MOVNTDQ
:
13533 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2di
, arglist
);
13534 case IX86_BUILTIN_MOVNTI
:
13535 return ix86_expand_store_builtin (CODE_FOR_sse2_movntsi
, arglist
);
13537 case IX86_BUILTIN_LOADDQA
:
13538 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqa
, arglist
, target
, 1);
13539 case IX86_BUILTIN_LOADDQU
:
13540 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqu
, arglist
, target
, 1);
13541 case IX86_BUILTIN_LOADD
:
13542 return ix86_expand_unop_builtin (CODE_FOR_sse2_loadd
, arglist
, target
, 1);
13544 case IX86_BUILTIN_STOREDQA
:
13545 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqa
, arglist
);
13546 case IX86_BUILTIN_STOREDQU
:
13547 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqu
, arglist
);
13548 case IX86_BUILTIN_STORED
:
13549 return ix86_expand_store_builtin (CODE_FOR_sse2_stored
, arglist
);
13555 for (i
= 0, d
= bdesc_2arg
; i
< ARRAY_SIZE (bdesc_2arg
); i
++, d
++)
13556 if (d
->code
== fcode
)
13558 /* Compares are treated specially. */
13559 if (d
->icode
== CODE_FOR_maskcmpv4sf3
13560 || d
->icode
== CODE_FOR_vmmaskcmpv4sf3
13561 || d
->icode
== CODE_FOR_maskncmpv4sf3
13562 || d
->icode
== CODE_FOR_vmmaskncmpv4sf3
13563 || d
->icode
== CODE_FOR_maskcmpv2df3
13564 || d
->icode
== CODE_FOR_vmmaskcmpv2df3
13565 || d
->icode
== CODE_FOR_maskncmpv2df3
13566 || d
->icode
== CODE_FOR_vmmaskncmpv2df3
)
13567 return ix86_expand_sse_compare (d
, arglist
, target
);
13569 return ix86_expand_binop_builtin (d
->icode
, arglist
, target
);
13572 for (i
= 0, d
= bdesc_1arg
; i
< ARRAY_SIZE (bdesc_1arg
); i
++, d
++)
13573 if (d
->code
== fcode
)
13574 return ix86_expand_unop_builtin (d
->icode
, arglist
, target
, 0);
13576 for (i
= 0, d
= bdesc_comi
; i
< ARRAY_SIZE (bdesc_comi
); i
++, d
++)
13577 if (d
->code
== fcode
)
13578 return ix86_expand_sse_comi (d
, arglist
, target
);
13580 /* @@@ Should really do something sensible here. */
13584 /* Store OPERAND to the memory after reload is completed. This means
13585 that we can't easily use assign_stack_local. */
13587 ix86_force_to_memory (mode
, operand
)
13588 enum machine_mode mode
;
13592 if (!reload_completed
)
13594 if (TARGET_64BIT
&& TARGET_RED_ZONE
)
13596 result
= gen_rtx_MEM (mode
,
13597 gen_rtx_PLUS (Pmode
,
13599 GEN_INT (-RED_ZONE_SIZE
)));
13600 emit_move_insn (result
, operand
);
13602 else if (TARGET_64BIT
&& !TARGET_RED_ZONE
)
13608 operand
= gen_lowpart (DImode
, operand
);
13612 gen_rtx_SET (VOIDmode
,
13613 gen_rtx_MEM (DImode
,
13614 gen_rtx_PRE_DEC (DImode
,
13615 stack_pointer_rtx
)),
13621 result
= gen_rtx_MEM (mode
, stack_pointer_rtx
);
13630 split_di (&operand
, 1, operands
, operands
+ 1);
13632 gen_rtx_SET (VOIDmode
,
13633 gen_rtx_MEM (SImode
,
13634 gen_rtx_PRE_DEC (Pmode
,
13635 stack_pointer_rtx
)),
13638 gen_rtx_SET (VOIDmode
,
13639 gen_rtx_MEM (SImode
,
13640 gen_rtx_PRE_DEC (Pmode
,
13641 stack_pointer_rtx
)),
13646 /* It is better to store HImodes as SImodes. */
13647 if (!TARGET_PARTIAL_REG_STALL
)
13648 operand
= gen_lowpart (SImode
, operand
);
13652 gen_rtx_SET (VOIDmode
,
13653 gen_rtx_MEM (GET_MODE (operand
),
13654 gen_rtx_PRE_DEC (SImode
,
13655 stack_pointer_rtx
)),
13661 result
= gen_rtx_MEM (mode
, stack_pointer_rtx
);
13666 /* Free operand from the memory. */
13668 ix86_free_from_memory (mode
)
13669 enum machine_mode mode
;
13671 if (!TARGET_64BIT
|| !TARGET_RED_ZONE
)
13675 if (mode
== DImode
|| TARGET_64BIT
)
13677 else if (mode
== HImode
&& TARGET_PARTIAL_REG_STALL
)
13681 /* Use LEA to deallocate stack space. In peephole2 it will be converted
13682 to pop or add instruction if registers are available. */
13683 emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
13684 gen_rtx_PLUS (Pmode
, stack_pointer_rtx
,
13689 /* Put float CONST_DOUBLE in the constant pool instead of fp regs.
13690 QImode must go into class Q_REGS.
13691 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
13692 movdf to do mem-to-mem moves through integer regs. */
13694 ix86_preferred_reload_class (x
, class)
13696 enum reg_class
class;
13698 if (GET_CODE (x
) == CONST_VECTOR
&& x
!= CONST0_RTX (GET_MODE (x
)))
13700 if (GET_CODE (x
) == CONST_DOUBLE
&& GET_MODE (x
) != VOIDmode
)
13702 /* SSE can't load any constant directly yet. */
13703 if (SSE_CLASS_P (class))
13705 /* Floats can load 0 and 1. */
13706 if (MAYBE_FLOAT_CLASS_P (class) && standard_80387_constant_p (x
))
13708 /* Limit class to non-SSE. Use GENERAL_REGS if possible. */
13709 if (MAYBE_SSE_CLASS_P (class))
13710 return (reg_class_subset_p (class, GENERAL_REGS
)
13711 ? GENERAL_REGS
: FLOAT_REGS
);
13715 /* General regs can load everything. */
13716 if (reg_class_subset_p (class, GENERAL_REGS
))
13717 return GENERAL_REGS
;
13718 /* In case we haven't resolved FLOAT or SSE yet, give up. */
13719 if (MAYBE_FLOAT_CLASS_P (class) || MAYBE_SSE_CLASS_P (class))
13722 if (MAYBE_MMX_CLASS_P (class) && CONSTANT_P (x
))
13724 if (GET_MODE (x
) == QImode
&& ! reg_class_subset_p (class, Q_REGS
))
13729 /* If we are copying between general and FP registers, we need a memory
13730 location. The same is true for SSE and MMX registers.
13732 The macro can't work reliably when one of the CLASSES is class containing
13733 registers from multiple units (SSE, MMX, integer). We avoid this by never
13734 combining those units in single alternative in the machine description.
13735 Ensure that this constraint holds to avoid unexpected surprises.
13737 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
13738 enforce these sanity checks. */
13740 ix86_secondary_memory_needed (class1
, class2
, mode
, strict
)
13741 enum reg_class class1
, class2
;
13742 enum machine_mode mode
;
13745 if (MAYBE_FLOAT_CLASS_P (class1
) != FLOAT_CLASS_P (class1
)
13746 || MAYBE_FLOAT_CLASS_P (class2
) != FLOAT_CLASS_P (class2
)
13747 || MAYBE_SSE_CLASS_P (class1
) != SSE_CLASS_P (class1
)
13748 || MAYBE_SSE_CLASS_P (class2
) != SSE_CLASS_P (class2
)
13749 || MAYBE_MMX_CLASS_P (class1
) != MMX_CLASS_P (class1
)
13750 || MAYBE_MMX_CLASS_P (class2
) != MMX_CLASS_P (class2
))
13757 return (FLOAT_CLASS_P (class1
) != FLOAT_CLASS_P (class2
)
13758 || (SSE_CLASS_P (class1
) != SSE_CLASS_P (class2
)
13759 && (mode
) != SImode
)
13760 || (MMX_CLASS_P (class1
) != MMX_CLASS_P (class2
)
13761 && (mode
) != SImode
));
13763 /* Return the cost of moving data from a register in class CLASS1 to
13764 one in class CLASS2.
13766 It is not required that the cost always equal 2 when FROM is the same as TO;
13767 on some machines it is expensive to move between registers if they are not
13768 general registers. */
13770 ix86_register_move_cost (mode
, class1
, class2
)
13771 enum machine_mode mode
;
13772 enum reg_class class1
, class2
;
13774 /* In case we require secondary memory, compute cost of the store followed
13775 by load. In order to avoid bad register allocation choices, we need
13776 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
13778 if (ix86_secondary_memory_needed (class1
, class2
, mode
, 0))
13782 cost
+= MAX (MEMORY_MOVE_COST (mode
, class1
, 0),
13783 MEMORY_MOVE_COST (mode
, class1
, 1));
13784 cost
+= MAX (MEMORY_MOVE_COST (mode
, class2
, 0),
13785 MEMORY_MOVE_COST (mode
, class2
, 1));
13787 /* In case of copying from general_purpose_register we may emit multiple
13788 stores followed by single load causing memory size mismatch stall.
13789 Count this as arbitarily high cost of 20. */
13790 if (CLASS_MAX_NREGS (class1
, mode
) > CLASS_MAX_NREGS (class2
, mode
))
13793 /* In the case of FP/MMX moves, the registers actually overlap, and we
13794 have to switch modes in order to treat them differently. */
13795 if ((MMX_CLASS_P (class1
) && MAYBE_FLOAT_CLASS_P (class2
))
13796 || (MMX_CLASS_P (class2
) && MAYBE_FLOAT_CLASS_P (class1
)))
13802 /* Moves between SSE/MMX and integer unit are expensive. */
13803 if (MMX_CLASS_P (class1
) != MMX_CLASS_P (class2
)
13804 || SSE_CLASS_P (class1
) != SSE_CLASS_P (class2
))
13805 return ix86_cost
->mmxsse_to_integer
;
13806 if (MAYBE_FLOAT_CLASS_P (class1
))
13807 return ix86_cost
->fp_move
;
13808 if (MAYBE_SSE_CLASS_P (class1
))
13809 return ix86_cost
->sse_move
;
13810 if (MAYBE_MMX_CLASS_P (class1
))
13811 return ix86_cost
->mmx_move
;
13815 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
13817 ix86_hard_regno_mode_ok (regno
, mode
)
13819 enum machine_mode mode
;
13821 /* Flags and only flags can only hold CCmode values. */
13822 if (CC_REGNO_P (regno
))
13823 return GET_MODE_CLASS (mode
) == MODE_CC
;
13824 if (GET_MODE_CLASS (mode
) == MODE_CC
13825 || GET_MODE_CLASS (mode
) == MODE_RANDOM
13826 || GET_MODE_CLASS (mode
) == MODE_PARTIAL_INT
)
13828 if (FP_REGNO_P (regno
))
13829 return VALID_FP_MODE_P (mode
);
13830 if (SSE_REGNO_P (regno
))
13831 return VALID_SSE_REG_MODE (mode
);
13832 if (MMX_REGNO_P (regno
))
13833 return VALID_MMX_REG_MODE (mode
) || VALID_MMX_REG_MODE_3DNOW (mode
);
13834 /* We handle both integer and floats in the general purpose registers.
13835 In future we should be able to handle vector modes as well. */
13836 if (!VALID_INT_MODE_P (mode
) && !VALID_FP_MODE_P (mode
))
13838 /* Take care for QImode values - they can be in non-QI regs, but then
13839 they do cause partial register stalls. */
13840 if (regno
< 4 || mode
!= QImode
|| TARGET_64BIT
)
13842 return reload_in_progress
|| reload_completed
|| !TARGET_PARTIAL_REG_STALL
;
13845 /* Return the cost of moving data of mode M between a
13846 register and memory. A value of 2 is the default; this cost is
13847 relative to those in `REGISTER_MOVE_COST'.
13849 If moving between registers and memory is more expensive than
13850 between two registers, you should define this macro to express the
13853 Model also increased moving costs of QImode registers in non
13857 ix86_memory_move_cost (mode
, class, in
)
13858 enum machine_mode mode
;
13859 enum reg_class
class;
13862 if (FLOAT_CLASS_P (class))
13880 return in
? ix86_cost
->fp_load
[index
] : ix86_cost
->fp_store
[index
];
13882 if (SSE_CLASS_P (class))
13885 switch (GET_MODE_SIZE (mode
))
13899 return in
? ix86_cost
->sse_load
[index
] : ix86_cost
->sse_store
[index
];
13901 if (MMX_CLASS_P (class))
13904 switch (GET_MODE_SIZE (mode
))
13915 return in
? ix86_cost
->mmx_load
[index
] : ix86_cost
->mmx_store
[index
];
13917 switch (GET_MODE_SIZE (mode
))
13921 return (Q_CLASS_P (class) ? ix86_cost
->int_load
[0]
13922 : ix86_cost
->movzbl_load
);
13924 return (Q_CLASS_P (class) ? ix86_cost
->int_store
[0]
13925 : ix86_cost
->int_store
[0] + 4);
13928 return in
? ix86_cost
->int_load
[1] : ix86_cost
->int_store
[1];
13930 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
13931 if (mode
== TFmode
)
13933 return ((in
? ix86_cost
->int_load
[2] : ix86_cost
->int_store
[2])
13934 * ((int) GET_MODE_SIZE (mode
)
13935 + UNITS_PER_WORD
-1 ) / UNITS_PER_WORD
);
13939 #if defined (DO_GLOBAL_CTORS_BODY) && defined (HAS_INIT_SECTION)
13941 ix86_svr3_asm_out_constructor (symbol
, priority
)
13943 int priority ATTRIBUTE_UNUSED
;
13946 fputs ("\tpushl $", asm_out_file
);
13947 assemble_name (asm_out_file
, XSTR (symbol
, 0));
13948 fputc ('\n', asm_out_file
);
13954 static int current_machopic_label_num
;
13956 /* Given a symbol name and its associated stub, write out the
13957 definition of the stub. */
13960 machopic_output_stub (file
, symb
, stub
)
13962 const char *symb
, *stub
;
13964 unsigned int length
;
13965 char *binder_name
, *symbol_name
, lazy_ptr_name
[32];
13966 int label
= ++current_machopic_label_num
;
13968 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
13969 symb
= (*targetm
.strip_name_encoding
) (symb
);
13971 length
= strlen (stub
);
13972 binder_name
= alloca (length
+ 32);
13973 GEN_BINDER_NAME_FOR_STUB (binder_name
, stub
, length
);
13975 length
= strlen (symb
);
13976 symbol_name
= alloca (length
+ 32);
13977 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name
, symb
, length
);
13979 sprintf (lazy_ptr_name
, "L%d$lz", label
);
13982 machopic_picsymbol_stub_section ();
13984 machopic_symbol_stub_section ();
13986 fprintf (file
, "%s:\n", stub
);
13987 fprintf (file
, "\t.indirect_symbol %s\n", symbol_name
);
13991 fprintf (file
, "\tcall LPC$%d\nLPC$%d:\tpopl %%eax\n", label
, label
);
13992 fprintf (file
, "\tmovl %s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name
, label
);
13993 fprintf (file
, "\tjmp %%edx\n");
13996 fprintf (file
, "\tjmp *%s\n", lazy_ptr_name
);
13998 fprintf (file
, "%s:\n", binder_name
);
14002 fprintf (file
, "\tlea %s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name
, label
);
14003 fprintf (file
, "\tpushl %%eax\n");
14006 fprintf (file
, "\t pushl $%s\n", lazy_ptr_name
);
14008 fprintf (file
, "\tjmp dyld_stub_binding_helper\n");
14010 machopic_lazy_symbol_ptr_section ();
14011 fprintf (file
, "%s:\n", lazy_ptr_name
);
14012 fprintf (file
, "\t.indirect_symbol %s\n", symbol_name
);
14013 fprintf (file
, "\t.long %s\n", binder_name
);
14015 #endif /* TARGET_MACHO */
14017 /* Order the registers for register allocator. */
14020 x86_order_regs_for_local_alloc ()
14025 /* First allocate the local general purpose registers. */
14026 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
14027 if (GENERAL_REGNO_P (i
) && call_used_regs
[i
])
14028 reg_alloc_order
[pos
++] = i
;
14030 /* Global general purpose registers. */
14031 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
14032 if (GENERAL_REGNO_P (i
) && !call_used_regs
[i
])
14033 reg_alloc_order
[pos
++] = i
;
14035 /* x87 registers come first in case we are doing FP math
14037 if (!TARGET_SSE_MATH
)
14038 for (i
= FIRST_STACK_REG
; i
<= LAST_STACK_REG
; i
++)
14039 reg_alloc_order
[pos
++] = i
;
14041 /* SSE registers. */
14042 for (i
= FIRST_SSE_REG
; i
<= LAST_SSE_REG
; i
++)
14043 reg_alloc_order
[pos
++] = i
;
14044 for (i
= FIRST_REX_SSE_REG
; i
<= LAST_REX_SSE_REG
; i
++)
14045 reg_alloc_order
[pos
++] = i
;
14047 /* x87 registerts. */
14048 if (TARGET_SSE_MATH
)
14049 for (i
= FIRST_STACK_REG
; i
<= LAST_STACK_REG
; i
++)
14050 reg_alloc_order
[pos
++] = i
;
14052 for (i
= FIRST_MMX_REG
; i
<= LAST_MMX_REG
; i
++)
14053 reg_alloc_order
[pos
++] = i
;
14055 /* Initialize the rest of array as we do not allocate some registers
14057 while (pos
< FIRST_PSEUDO_REGISTER
)
14058 reg_alloc_order
[pos
++] = 0;
14061 /* Returns an expression indicating where the this parameter is
14062 located on entry to the FUNCTION. */
14065 x86_this_parameter (function
)
14068 tree type
= TREE_TYPE (function
);
14072 int n
= aggregate_value_p (TREE_TYPE (type
)) != 0;
14073 return gen_rtx_REG (DImode
, x86_64_int_parameter_registers
[n
]);
14076 if (ix86_fntype_regparm (type
) > 0)
14080 parm
= TYPE_ARG_TYPES (type
);
14081 /* Figure out whether or not the function has a variable number of
14083 for (; parm
; parm
= TREE_CHAIN (parm
))
14084 if (TREE_VALUE (parm
) == void_type_node
)
14086 /* If not, the this parameter is in %eax. */
14088 return gen_rtx_REG (SImode
, 0);
14091 if (aggregate_value_p (TREE_TYPE (type
)))
14092 return gen_rtx_MEM (SImode
, plus_constant (stack_pointer_rtx
, 8));
14094 return gen_rtx_MEM (SImode
, plus_constant (stack_pointer_rtx
, 4));
14097 /* Determine whether x86_output_mi_thunk can succeed. */
14100 x86_can_output_mi_thunk (thunk
, delta
, vcall_offset
, function
)
14101 tree thunk ATTRIBUTE_UNUSED
;
14102 HOST_WIDE_INT delta ATTRIBUTE_UNUSED
;
14103 HOST_WIDE_INT vcall_offset
;
14106 /* 64-bit can handle anything. */
14110 /* For 32-bit, everything's fine if we have one free register. */
14111 if (ix86_fntype_regparm (TREE_TYPE (function
)) < 3)
14114 /* Need a free register for vcall_offset. */
14118 /* Need a free register for GOT references. */
14119 if (flag_pic
&& !(*targetm
.binds_local_p
) (function
))
14122 /* Otherwise ok. */
14126 /* Output the assembler code for a thunk function. THUNK_DECL is the
14127 declaration for the thunk function itself, FUNCTION is the decl for
14128 the target function. DELTA is an immediate constant offset to be
14129 added to THIS. If VCALL_OFFSET is non-zero, the word at
14130 *(*this + vcall_offset) should be added to THIS. */
14133 x86_output_mi_thunk (file
, thunk
, delta
, vcall_offset
, function
)
14134 FILE *file ATTRIBUTE_UNUSED
;
14135 tree thunk ATTRIBUTE_UNUSED
;
14136 HOST_WIDE_INT delta
;
14137 HOST_WIDE_INT vcall_offset
;
14141 rtx
this = x86_this_parameter (function
);
14144 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
14145 pull it in now and let DELTA benefit. */
14148 else if (vcall_offset
)
14150 /* Put the this parameter into %eax. */
14152 xops
[1] = this_reg
= gen_rtx_REG (Pmode
, 0);
14153 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops
);
14156 this_reg
= NULL_RTX
;
14158 /* Adjust the this parameter by a fixed constant. */
14161 xops
[0] = GEN_INT (delta
);
14162 xops
[1] = this_reg
? this_reg
: this;
14165 if (!x86_64_general_operand (xops
[0], DImode
))
14167 tmp
= gen_rtx_REG (DImode
, FIRST_REX_INT_REG
+ 2 /* R10 */);
14169 output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops
);
14173 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops
);
14176 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops
);
14179 /* Adjust the this parameter by a value stored in the vtable. */
14183 tmp
= gen_rtx_REG (DImode
, FIRST_REX_INT_REG
+ 2 /* R10 */);
14185 tmp
= gen_rtx_REG (SImode
, 2 /* ECX */);
14187 xops
[0] = gen_rtx_MEM (Pmode
, this_reg
);
14190 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops
);
14192 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops
);
14194 /* Adjust the this parameter. */
14195 xops
[0] = gen_rtx_MEM (Pmode
, plus_constant (tmp
, vcall_offset
));
14196 if (TARGET_64BIT
&& !memory_operand (xops
[0], Pmode
))
14198 rtx tmp2
= gen_rtx_REG (DImode
, FIRST_REX_INT_REG
+ 3 /* R11 */);
14199 xops
[0] = GEN_INT (vcall_offset
);
14201 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops
);
14202 xops
[0] = gen_rtx_MEM (Pmode
, gen_rtx_PLUS (Pmode
, tmp
, tmp2
));
14204 xops
[1] = this_reg
;
14206 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops
);
14208 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops
);
14211 /* If necessary, drop THIS back to its stack slot. */
14212 if (this_reg
&& this_reg
!= this)
14214 xops
[0] = this_reg
;
14216 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops
);
14219 xops
[0] = DECL_RTL (function
);
14222 if (!flag_pic
|| (*targetm
.binds_local_p
) (function
))
14223 output_asm_insn ("jmp\t%P0", xops
);
14226 tmp
= XEXP (xops
[0], 0);
14227 tmp
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, tmp
), UNSPEC_GOTPCREL
);
14228 tmp
= gen_rtx_CONST (Pmode
, tmp
);
14229 tmp
= gen_rtx_MEM (QImode
, tmp
);
14231 output_asm_insn ("jmp\t%A0", xops
);
14236 if (!flag_pic
|| (*targetm
.binds_local_p
) (function
))
14237 output_asm_insn ("jmp\t%P0", xops
);
14240 tmp
= gen_rtx_REG (SImode
, 2 /* ECX */);
14241 output_set_got (tmp
);
14244 output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops
);
14245 output_asm_insn ("jmp\t{*}%1", xops
);
14251 x86_field_alignment (field
, computed
)
14255 enum machine_mode mode
;
14256 tree type
= TREE_TYPE (field
);
14258 if (TARGET_64BIT
|| TARGET_ALIGN_DOUBLE
)
14260 mode
= TYPE_MODE (TREE_CODE (type
) == ARRAY_TYPE
14261 ? get_inner_array_type (type
) : type
);
14262 if (mode
== DFmode
|| mode
== DCmode
14263 || GET_MODE_CLASS (mode
) == MODE_INT
14264 || GET_MODE_CLASS (mode
) == MODE_COMPLEX_INT
)
14265 return MIN (32, computed
);
14269 /* Output assembler code to FILE to increment profiler label # LABELNO
14270 for profiling a function entry. */
14272 x86_function_profiler (file
, labelno
)
14279 #ifndef NO_PROFILE_COUNTERS
14280 fprintf (file
, "\tleaq\t%sP%d@(%%rip),%%r11\n", LPREFIX
, labelno
);
14282 fprintf (file
, "\tcall\t*%s@GOTPCREL(%%rip)\n", MCOUNT_NAME
);
14286 #ifndef NO_PROFILE_COUNTERS
14287 fprintf (file
, "\tmovq\t$%sP%d,%%r11\n", LPREFIX
, labelno
);
14289 fprintf (file
, "\tcall\t%s\n", MCOUNT_NAME
);
14293 #ifndef NO_PROFILE_COUNTERS
14294 fprintf (file
, "\tleal\t%sP%d@GOTOFF(%%ebx),%%%s\n",
14295 LPREFIX
, labelno
, PROFILE_COUNT_REGISTER
);
14297 fprintf (file
, "\tcall\t*%s@GOT(%%ebx)\n", MCOUNT_NAME
);
14301 #ifndef NO_PROFILE_COUNTERS
14302 fprintf (file
, "\tmovl\t$%sP%d,%%$s\n", LPREFIX
, labelno
,
14303 PROFILE_COUNT_REGISTER
);
14305 fprintf (file
, "\tcall\t%s\n", MCOUNT_NAME
);
14309 /* Implement machine specific optimizations.
14310 At the moment we implement single transformation: AMD Athlon works faster
14311 when RET is not destination of conditional jump or directly preceeded
14312 by other jump instruction. We avoid the penalty by inserting NOP just
14313 before the RET instructions in such cases. */
14315 x86_machine_dependent_reorg (first
)
14316 rtx first ATTRIBUTE_UNUSED
;
14320 if (!TARGET_ATHLON
|| !optimize
|| optimize_size
)
14322 for (e
= EXIT_BLOCK_PTR
->pred
; e
; e
= e
->pred_next
)
14324 basic_block bb
= e
->src
;
14327 bool insert
= false;
14329 if (!returnjump_p (ret
) || !maybe_hot_bb_p (bb
))
14331 prev
= prev_nonnote_insn (ret
);
14332 if (prev
&& GET_CODE (prev
) == CODE_LABEL
)
14335 for (e
= bb
->pred
; e
; e
= e
->pred_next
)
14336 if (EDGE_FREQUENCY (e
) && e
->src
->index
> 0
14337 && !(e
->flags
& EDGE_FALLTHRU
))
14342 prev
= prev_real_insn (ret
);
14343 if (prev
&& GET_CODE (prev
) == JUMP_INSN
14344 && any_condjump_p (prev
))
14348 emit_insn_before (gen_nop (), ret
);
14352 #include "gt-i386.h"