]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/config/i386/i386.c
42f6d93d3c3089e9c6c7bc8bcfef5e0fe027f7f9
[thirdparty/gcc.git] / gcc / config / i386 / i386.c
1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002 Free Software Foundation, Inc.
4
5 This file is part of GNU CC.
6
7 GNU CC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
10 any later version.
11
12 GNU CC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GNU CC; see the file COPYING. If not, write to
19 the Free Software Foundation, 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
21
22 #include "config.h"
23 #include "system.h"
24 #include "rtl.h"
25 #include "tree.h"
26 #include "tm_p.h"
27 #include "regs.h"
28 #include "hard-reg-set.h"
29 #include "real.h"
30 #include "insn-config.h"
31 #include "conditions.h"
32 #include "output.h"
33 #include "insn-attr.h"
34 #include "flags.h"
35 #include "except.h"
36 #include "function.h"
37 #include "recog.h"
38 #include "expr.h"
39 #include "optabs.h"
40 #include "toplev.h"
41 #include "basic-block.h"
42 #include "ggc.h"
43 #include "target.h"
44 #include "target-def.h"
45 #include "langhooks.h"
46
47 #ifndef CHECK_STACK_LIMIT
48 #define CHECK_STACK_LIMIT (-1)
49 #endif
50
51 /* Processor costs (relative to an add) */
52 static const
53 struct processor_costs size_cost = { /* costs for tunning for size */
54 2, /* cost of an add instruction */
55 3, /* cost of a lea instruction */
56 2, /* variable shift costs */
57 3, /* constant shift costs */
58 3, /* cost of starting a multiply */
59 0, /* cost of multiply per each bit set */
60 3, /* cost of a divide/mod */
61 3, /* cost of movsx */
62 3, /* cost of movzx */
63 0, /* "large" insn */
64 2, /* MOVE_RATIO */
65 2, /* cost for loading QImode using movzbl */
66 {2, 2, 2}, /* cost of loading integer registers
67 in QImode, HImode and SImode.
68 Relative to reg-reg move (2). */
69 {2, 2, 2}, /* cost of storing integer registers */
70 2, /* cost of reg,reg fld/fst */
71 {2, 2, 2}, /* cost of loading fp registers
72 in SFmode, DFmode and XFmode */
73 {2, 2, 2}, /* cost of loading integer registers */
74 3, /* cost of moving MMX register */
75 {3, 3}, /* cost of loading MMX registers
76 in SImode and DImode */
77 {3, 3}, /* cost of storing MMX registers
78 in SImode and DImode */
79 3, /* cost of moving SSE register */
80 {3, 3, 3}, /* cost of loading SSE registers
81 in SImode, DImode and TImode */
82 {3, 3, 3}, /* cost of storing SSE registers
83 in SImode, DImode and TImode */
84 3, /* MMX or SSE register to integer */
85 0, /* size of prefetch block */
86 0, /* number of parallel prefetches */
87 2, /* cost of FADD and FSUB insns. */
88 2, /* cost of FMUL instruction. */
89 2, /* cost of FDIV instruction. */
90 2, /* cost of FABS instruction. */
91 2, /* cost of FCHS instruction. */
92 2, /* cost of FSQRT instruction. */
93 };
94
95 /* Processor costs (relative to an add) */
96 static const
97 struct processor_costs i386_cost = { /* 386 specific costs */
98 1, /* cost of an add instruction */
99 1, /* cost of a lea instruction */
100 3, /* variable shift costs */
101 2, /* constant shift costs */
102 6, /* cost of starting a multiply */
103 1, /* cost of multiply per each bit set */
104 23, /* cost of a divide/mod */
105 3, /* cost of movsx */
106 2, /* cost of movzx */
107 15, /* "large" insn */
108 3, /* MOVE_RATIO */
109 4, /* cost for loading QImode using movzbl */
110 {2, 4, 2}, /* cost of loading integer registers
111 in QImode, HImode and SImode.
112 Relative to reg-reg move (2). */
113 {2, 4, 2}, /* cost of storing integer registers */
114 2, /* cost of reg,reg fld/fst */
115 {8, 8, 8}, /* cost of loading fp registers
116 in SFmode, DFmode and XFmode */
117 {8, 8, 8}, /* cost of loading integer registers */
118 2, /* cost of moving MMX register */
119 {4, 8}, /* cost of loading MMX registers
120 in SImode and DImode */
121 {4, 8}, /* cost of storing MMX registers
122 in SImode and DImode */
123 2, /* cost of moving SSE register */
124 {4, 8, 16}, /* cost of loading SSE registers
125 in SImode, DImode and TImode */
126 {4, 8, 16}, /* cost of storing SSE registers
127 in SImode, DImode and TImode */
128 3, /* MMX or SSE register to integer */
129 0, /* size of prefetch block */
130 0, /* number of parallel prefetches */
131 23, /* cost of FADD and FSUB insns. */
132 27, /* cost of FMUL instruction. */
133 88, /* cost of FDIV instruction. */
134 22, /* cost of FABS instruction. */
135 24, /* cost of FCHS instruction. */
136 122, /* cost of FSQRT instruction. */
137 };
138
139 static const
140 struct processor_costs i486_cost = { /* 486 specific costs */
141 1, /* cost of an add instruction */
142 1, /* cost of a lea instruction */
143 3, /* variable shift costs */
144 2, /* constant shift costs */
145 12, /* cost of starting a multiply */
146 1, /* cost of multiply per each bit set */
147 40, /* cost of a divide/mod */
148 3, /* cost of movsx */
149 2, /* cost of movzx */
150 15, /* "large" insn */
151 3, /* MOVE_RATIO */
152 4, /* cost for loading QImode using movzbl */
153 {2, 4, 2}, /* cost of loading integer registers
154 in QImode, HImode and SImode.
155 Relative to reg-reg move (2). */
156 {2, 4, 2}, /* cost of storing integer registers */
157 2, /* cost of reg,reg fld/fst */
158 {8, 8, 8}, /* cost of loading fp registers
159 in SFmode, DFmode and XFmode */
160 {8, 8, 8}, /* cost of loading integer registers */
161 2, /* cost of moving MMX register */
162 {4, 8}, /* cost of loading MMX registers
163 in SImode and DImode */
164 {4, 8}, /* cost of storing MMX registers
165 in SImode and DImode */
166 2, /* cost of moving SSE register */
167 {4, 8, 16}, /* cost of loading SSE registers
168 in SImode, DImode and TImode */
169 {4, 8, 16}, /* cost of storing SSE registers
170 in SImode, DImode and TImode */
171 3, /* MMX or SSE register to integer */
172 0, /* size of prefetch block */
173 0, /* number of parallel prefetches */
174 8, /* cost of FADD and FSUB insns. */
175 16, /* cost of FMUL instruction. */
176 73, /* cost of FDIV instruction. */
177 3, /* cost of FABS instruction. */
178 3, /* cost of FCHS instruction. */
179 83, /* cost of FSQRT instruction. */
180 };
181
182 static const
183 struct processor_costs pentium_cost = {
184 1, /* cost of an add instruction */
185 1, /* cost of a lea instruction */
186 4, /* variable shift costs */
187 1, /* constant shift costs */
188 11, /* cost of starting a multiply */
189 0, /* cost of multiply per each bit set */
190 25, /* cost of a divide/mod */
191 3, /* cost of movsx */
192 2, /* cost of movzx */
193 8, /* "large" insn */
194 6, /* MOVE_RATIO */
195 6, /* cost for loading QImode using movzbl */
196 {2, 4, 2}, /* cost of loading integer registers
197 in QImode, HImode and SImode.
198 Relative to reg-reg move (2). */
199 {2, 4, 2}, /* cost of storing integer registers */
200 2, /* cost of reg,reg fld/fst */
201 {2, 2, 6}, /* cost of loading fp registers
202 in SFmode, DFmode and XFmode */
203 {4, 4, 6}, /* cost of loading integer registers */
204 8, /* cost of moving MMX register */
205 {8, 8}, /* cost of loading MMX registers
206 in SImode and DImode */
207 {8, 8}, /* cost of storing MMX registers
208 in SImode and DImode */
209 2, /* cost of moving SSE register */
210 {4, 8, 16}, /* cost of loading SSE registers
211 in SImode, DImode and TImode */
212 {4, 8, 16}, /* cost of storing SSE registers
213 in SImode, DImode and TImode */
214 3, /* MMX or SSE register to integer */
215 0, /* size of prefetch block */
216 0, /* number of parallel prefetches */
217 3, /* cost of FADD and FSUB insns. */
218 3, /* cost of FMUL instruction. */
219 39, /* cost of FDIV instruction. */
220 1, /* cost of FABS instruction. */
221 1, /* cost of FCHS instruction. */
222 70, /* cost of FSQRT instruction. */
223 };
224
225 static const
226 struct processor_costs pentiumpro_cost = {
227 1, /* cost of an add instruction */
228 1, /* cost of a lea instruction */
229 1, /* variable shift costs */
230 1, /* constant shift costs */
231 4, /* cost of starting a multiply */
232 0, /* cost of multiply per each bit set */
233 17, /* cost of a divide/mod */
234 1, /* cost of movsx */
235 1, /* cost of movzx */
236 8, /* "large" insn */
237 6, /* MOVE_RATIO */
238 2, /* cost for loading QImode using movzbl */
239 {4, 4, 4}, /* cost of loading integer registers
240 in QImode, HImode and SImode.
241 Relative to reg-reg move (2). */
242 {2, 2, 2}, /* cost of storing integer registers */
243 2, /* cost of reg,reg fld/fst */
244 {2, 2, 6}, /* cost of loading fp registers
245 in SFmode, DFmode and XFmode */
246 {4, 4, 6}, /* cost of loading integer registers */
247 2, /* cost of moving MMX register */
248 {2, 2}, /* cost of loading MMX registers
249 in SImode and DImode */
250 {2, 2}, /* cost of storing MMX registers
251 in SImode and DImode */
252 2, /* cost of moving SSE register */
253 {2, 2, 8}, /* cost of loading SSE registers
254 in SImode, DImode and TImode */
255 {2, 2, 8}, /* cost of storing SSE registers
256 in SImode, DImode and TImode */
257 3, /* MMX or SSE register to integer */
258 32, /* size of prefetch block */
259 6, /* number of parallel prefetches */
260 3, /* cost of FADD and FSUB insns. */
261 5, /* cost of FMUL instruction. */
262 56, /* cost of FDIV instruction. */
263 2, /* cost of FABS instruction. */
264 2, /* cost of FCHS instruction. */
265 56, /* cost of FSQRT instruction. */
266 };
267
268 static const
269 struct processor_costs k6_cost = {
270 1, /* cost of an add instruction */
271 2, /* cost of a lea instruction */
272 1, /* variable shift costs */
273 1, /* constant shift costs */
274 3, /* cost of starting a multiply */
275 0, /* cost of multiply per each bit set */
276 18, /* cost of a divide/mod */
277 2, /* cost of movsx */
278 2, /* cost of movzx */
279 8, /* "large" insn */
280 4, /* MOVE_RATIO */
281 3, /* cost for loading QImode using movzbl */
282 {4, 5, 4}, /* cost of loading integer registers
283 in QImode, HImode and SImode.
284 Relative to reg-reg move (2). */
285 {2, 3, 2}, /* cost of storing integer registers */
286 4, /* cost of reg,reg fld/fst */
287 {6, 6, 6}, /* cost of loading fp registers
288 in SFmode, DFmode and XFmode */
289 {4, 4, 4}, /* cost of loading integer registers */
290 2, /* cost of moving MMX register */
291 {2, 2}, /* cost of loading MMX registers
292 in SImode and DImode */
293 {2, 2}, /* cost of storing MMX registers
294 in SImode and DImode */
295 2, /* cost of moving SSE register */
296 {2, 2, 8}, /* cost of loading SSE registers
297 in SImode, DImode and TImode */
298 {2, 2, 8}, /* cost of storing SSE registers
299 in SImode, DImode and TImode */
300 6, /* MMX or SSE register to integer */
301 32, /* size of prefetch block */
302 1, /* number of parallel prefetches */
303 2, /* cost of FADD and FSUB insns. */
304 2, /* cost of FMUL instruction. */
305 56, /* cost of FDIV instruction. */
306 2, /* cost of FABS instruction. */
307 2, /* cost of FCHS instruction. */
308 56, /* cost of FSQRT instruction. */
309 };
310
311 static const
312 struct processor_costs athlon_cost = {
313 1, /* cost of an add instruction */
314 2, /* cost of a lea instruction */
315 1, /* variable shift costs */
316 1, /* constant shift costs */
317 5, /* cost of starting a multiply */
318 0, /* cost of multiply per each bit set */
319 42, /* cost of a divide/mod */
320 1, /* cost of movsx */
321 1, /* cost of movzx */
322 8, /* "large" insn */
323 9, /* MOVE_RATIO */
324 4, /* cost for loading QImode using movzbl */
325 {3, 4, 3}, /* cost of loading integer registers
326 in QImode, HImode and SImode.
327 Relative to reg-reg move (2). */
328 {3, 4, 3}, /* cost of storing integer registers */
329 4, /* cost of reg,reg fld/fst */
330 {4, 4, 12}, /* cost of loading fp registers
331 in SFmode, DFmode and XFmode */
332 {6, 6, 8}, /* cost of loading integer registers */
333 2, /* cost of moving MMX register */
334 {4, 4}, /* cost of loading MMX registers
335 in SImode and DImode */
336 {4, 4}, /* cost of storing MMX registers
337 in SImode and DImode */
338 2, /* cost of moving SSE register */
339 {4, 4, 6}, /* cost of loading SSE registers
340 in SImode, DImode and TImode */
341 {4, 4, 5}, /* cost of storing SSE registers
342 in SImode, DImode and TImode */
343 5, /* MMX or SSE register to integer */
344 64, /* size of prefetch block */
345 6, /* number of parallel prefetches */
346 4, /* cost of FADD and FSUB insns. */
347 4, /* cost of FMUL instruction. */
348 24, /* cost of FDIV instruction. */
349 2, /* cost of FABS instruction. */
350 2, /* cost of FCHS instruction. */
351 35, /* cost of FSQRT instruction. */
352 };
353
354 static const
355 struct processor_costs pentium4_cost = {
356 1, /* cost of an add instruction */
357 1, /* cost of a lea instruction */
358 8, /* variable shift costs */
359 8, /* constant shift costs */
360 30, /* cost of starting a multiply */
361 0, /* cost of multiply per each bit set */
362 112, /* cost of a divide/mod */
363 1, /* cost of movsx */
364 1, /* cost of movzx */
365 16, /* "large" insn */
366 6, /* MOVE_RATIO */
367 2, /* cost for loading QImode using movzbl */
368 {4, 5, 4}, /* cost of loading integer registers
369 in QImode, HImode and SImode.
370 Relative to reg-reg move (2). */
371 {2, 3, 2}, /* cost of storing integer registers */
372 2, /* cost of reg,reg fld/fst */
373 {2, 2, 6}, /* cost of loading fp registers
374 in SFmode, DFmode and XFmode */
375 {4, 4, 6}, /* cost of loading integer registers */
376 2, /* cost of moving MMX register */
377 {2, 2}, /* cost of loading MMX registers
378 in SImode and DImode */
379 {2, 2}, /* cost of storing MMX registers
380 in SImode and DImode */
381 12, /* cost of moving SSE register */
382 {12, 12, 12}, /* cost of loading SSE registers
383 in SImode, DImode and TImode */
384 {2, 2, 8}, /* cost of storing SSE registers
385 in SImode, DImode and TImode */
386 10, /* MMX or SSE register to integer */
387 64, /* size of prefetch block */
388 6, /* number of parallel prefetches */
389 5, /* cost of FADD and FSUB insns. */
390 7, /* cost of FMUL instruction. */
391 43, /* cost of FDIV instruction. */
392 2, /* cost of FABS instruction. */
393 2, /* cost of FCHS instruction. */
394 43, /* cost of FSQRT instruction. */
395 };
396
397 const struct processor_costs *ix86_cost = &pentium_cost;
398
399 /* Processor feature/optimization bitmasks. */
400 #define m_386 (1<<PROCESSOR_I386)
401 #define m_486 (1<<PROCESSOR_I486)
402 #define m_PENT (1<<PROCESSOR_PENTIUM)
403 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
404 #define m_K6 (1<<PROCESSOR_K6)
405 #define m_ATHLON (1<<PROCESSOR_ATHLON)
406 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
407
408 const int x86_use_leave = m_386 | m_K6 | m_ATHLON;
409 const int x86_push_memory = m_386 | m_K6 | m_ATHLON | m_PENT4;
410 const int x86_zero_extend_with_and = m_486 | m_PENT;
411 const int x86_movx = m_ATHLON | m_PPRO | m_PENT4 /* m_386 | m_K6 */;
412 const int x86_double_with_add = ~m_386;
413 const int x86_use_bit_test = m_386;
414 const int x86_unroll_strlen = m_486 | m_PENT | m_PPRO | m_ATHLON | m_K6;
415 const int x86_cmove = m_PPRO | m_ATHLON | m_PENT4;
416 const int x86_3dnow_a = m_ATHLON;
417 const int x86_deep_branch = m_PPRO | m_K6 | m_ATHLON | m_PENT4;
418 const int x86_branch_hints = m_PENT4;
419 const int x86_use_sahf = m_PPRO | m_K6 | m_PENT4;
420 const int x86_partial_reg_stall = m_PPRO;
421 const int x86_use_loop = m_K6;
422 const int x86_use_fiop = ~(m_PPRO | m_ATHLON | m_PENT);
423 const int x86_use_mov0 = m_K6;
424 const int x86_use_cltd = ~(m_PENT | m_K6);
425 const int x86_read_modify_write = ~m_PENT;
426 const int x86_read_modify = ~(m_PENT | m_PPRO);
427 const int x86_split_long_moves = m_PPRO;
428 const int x86_promote_QImode = m_K6 | m_PENT | m_386 | m_486 | m_ATHLON;
429 const int x86_fast_prefix = ~(m_PENT | m_486 | m_386);
430 const int x86_single_stringop = m_386 | m_PENT4;
431 const int x86_qimode_math = ~(0);
432 const int x86_promote_qi_regs = 0;
433 const int x86_himode_math = ~(m_PPRO);
434 const int x86_promote_hi_regs = m_PPRO;
435 const int x86_sub_esp_4 = m_ATHLON | m_PPRO | m_PENT4;
436 const int x86_sub_esp_8 = m_ATHLON | m_PPRO | m_386 | m_486 | m_PENT4;
437 const int x86_add_esp_4 = m_ATHLON | m_K6 | m_PENT4;
438 const int x86_add_esp_8 = m_ATHLON | m_PPRO | m_K6 | m_386 | m_486 | m_PENT4;
439 const int x86_integer_DFmode_moves = ~(m_ATHLON | m_PENT4 | m_PPRO);
440 const int x86_partial_reg_dependency = m_ATHLON | m_PENT4;
441 const int x86_memory_mismatch_stall = m_ATHLON | m_PENT4;
442 const int x86_accumulate_outgoing_args = m_ATHLON | m_PENT4 | m_PPRO;
443 const int x86_prologue_using_move = m_ATHLON | m_PENT4 | m_PPRO;
444 const int x86_epilogue_using_move = m_ATHLON | m_PENT4 | m_PPRO;
445 const int x86_decompose_lea = m_PENT4;
446 const int x86_shift1 = ~m_486;
447 const int x86_arch_always_fancy_math_387 = m_PENT | m_PPRO | m_ATHLON | m_PENT4;
448
449 /* In case the avreage insn count for single function invocation is
450 lower than this constant, emit fast (but longer) prologue and
451 epilogue code. */
452 #define FAST_PROLOGUE_INSN_COUNT 30
453
454 /* Set by prologue expander and used by epilogue expander to determine
455 the style used. */
456 static int use_fast_prologue_epilogue;
457
458 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
459 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
460 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
461 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
462
463 /* Array of the smallest class containing reg number REGNO, indexed by
464 REGNO. Used by REGNO_REG_CLASS in i386.h. */
465
466 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
467 {
468 /* ax, dx, cx, bx */
469 AREG, DREG, CREG, BREG,
470 /* si, di, bp, sp */
471 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
472 /* FP registers */
473 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
474 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
475 /* arg pointer */
476 NON_Q_REGS,
477 /* flags, fpsr, dirflag, frame */
478 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
479 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
480 SSE_REGS, SSE_REGS,
481 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
482 MMX_REGS, MMX_REGS,
483 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
484 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
485 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
486 SSE_REGS, SSE_REGS,
487 };
488
489 /* The "default" register map used in 32bit mode. */
490
491 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
492 {
493 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
494 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
495 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
496 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
497 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
498 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
499 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
500 };
501
502 static int const x86_64_int_parameter_registers[6] =
503 {
504 5 /*RDI*/, 4 /*RSI*/, 1 /*RDX*/, 2 /*RCX*/,
505 FIRST_REX_INT_REG /*R8 */, FIRST_REX_INT_REG + 1 /*R9 */
506 };
507
508 static int const x86_64_int_return_registers[4] =
509 {
510 0 /*RAX*/, 1 /*RDI*/, 5 /*RDI*/, 4 /*RSI*/
511 };
512
513 /* The "default" register map used in 64bit mode. */
514 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
515 {
516 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
517 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
518 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
519 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
520 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
521 8,9,10,11,12,13,14,15, /* extended integer registers */
522 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
523 };
524
525 /* Define the register numbers to be used in Dwarf debugging information.
526 The SVR4 reference port C compiler uses the following register numbers
527 in its Dwarf output code:
528 0 for %eax (gcc regno = 0)
529 1 for %ecx (gcc regno = 2)
530 2 for %edx (gcc regno = 1)
531 3 for %ebx (gcc regno = 3)
532 4 for %esp (gcc regno = 7)
533 5 for %ebp (gcc regno = 6)
534 6 for %esi (gcc regno = 4)
535 7 for %edi (gcc regno = 5)
536 The following three DWARF register numbers are never generated by
537 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
538 believes these numbers have these meanings.
539 8 for %eip (no gcc equivalent)
540 9 for %eflags (gcc regno = 17)
541 10 for %trapno (no gcc equivalent)
542 It is not at all clear how we should number the FP stack registers
543 for the x86 architecture. If the version of SDB on x86/svr4 were
544 a bit less brain dead with respect to floating-point then we would
545 have a precedent to follow with respect to DWARF register numbers
546 for x86 FP registers, but the SDB on x86/svr4 is so completely
547 broken with respect to FP registers that it is hardly worth thinking
548 of it as something to strive for compatibility with.
549 The version of x86/svr4 SDB I have at the moment does (partially)
550 seem to believe that DWARF register number 11 is associated with
551 the x86 register %st(0), but that's about all. Higher DWARF
552 register numbers don't seem to be associated with anything in
553 particular, and even for DWARF regno 11, SDB only seems to under-
554 stand that it should say that a variable lives in %st(0) (when
555 asked via an `=' command) if we said it was in DWARF regno 11,
556 but SDB still prints garbage when asked for the value of the
557 variable in question (via a `/' command).
558 (Also note that the labels SDB prints for various FP stack regs
559 when doing an `x' command are all wrong.)
560 Note that these problems generally don't affect the native SVR4
561 C compiler because it doesn't allow the use of -O with -g and
562 because when it is *not* optimizing, it allocates a memory
563 location for each floating-point variable, and the memory
564 location is what gets described in the DWARF AT_location
565 attribute for the variable in question.
566 Regardless of the severe mental illness of the x86/svr4 SDB, we
567 do something sensible here and we use the following DWARF
568 register numbers. Note that these are all stack-top-relative
569 numbers.
570 11 for %st(0) (gcc regno = 8)
571 12 for %st(1) (gcc regno = 9)
572 13 for %st(2) (gcc regno = 10)
573 14 for %st(3) (gcc regno = 11)
574 15 for %st(4) (gcc regno = 12)
575 16 for %st(5) (gcc regno = 13)
576 17 for %st(6) (gcc regno = 14)
577 18 for %st(7) (gcc regno = 15)
578 */
579 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
580 {
581 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
582 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
583 -1, 9, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
584 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
585 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
586 -1, -1, -1, -1, -1, -1, -1, -1, /* extemded integer registers */
587 -1, -1, -1, -1, -1, -1, -1, -1, /* extemded SSE registers */
588 };
589
590 /* Test and compare insns in i386.md store the information needed to
591 generate branch and scc insns here. */
592
593 rtx ix86_compare_op0 = NULL_RTX;
594 rtx ix86_compare_op1 = NULL_RTX;
595
596 /* The encoding characters for the four TLS models present in ELF. */
597
598 static char const tls_model_chars[] = " GLil";
599
600 #define MAX_386_STACK_LOCALS 3
601 /* Size of the register save area. */
602 #define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16)
603
604 /* Define the structure for the machine field in struct function. */
605 struct machine_function GTY(())
606 {
607 rtx stack_locals[(int) MAX_MACHINE_MODE][MAX_386_STACK_LOCALS];
608 const char *some_ld_name;
609 int save_varrargs_registers;
610 int accesses_prev_frame;
611 };
612
613 #define ix86_stack_locals (cfun->machine->stack_locals)
614 #define ix86_save_varrargs_registers (cfun->machine->save_varrargs_registers)
615
616 /* Structure describing stack frame layout.
617 Stack grows downward:
618
619 [arguments]
620 <- ARG_POINTER
621 saved pc
622
623 saved frame pointer if frame_pointer_needed
624 <- HARD_FRAME_POINTER
625 [saved regs]
626
627 [padding1] \
628 )
629 [va_arg registers] (
630 > to_allocate <- FRAME_POINTER
631 [frame] (
632 )
633 [padding2] /
634 */
635 struct ix86_frame
636 {
637 int nregs;
638 int padding1;
639 int va_arg_size;
640 HOST_WIDE_INT frame;
641 int padding2;
642 int outgoing_arguments_size;
643 int red_zone_size;
644
645 HOST_WIDE_INT to_allocate;
646 /* The offsets relative to ARG_POINTER. */
647 HOST_WIDE_INT frame_pointer_offset;
648 HOST_WIDE_INT hard_frame_pointer_offset;
649 HOST_WIDE_INT stack_pointer_offset;
650 };
651
652 /* Used to enable/disable debugging features. */
653 const char *ix86_debug_arg_string, *ix86_debug_addr_string;
654 /* Code model option as passed by user. */
655 const char *ix86_cmodel_string;
656 /* Parsed value. */
657 enum cmodel ix86_cmodel;
658 /* Asm dialect. */
659 const char *ix86_asm_string;
660 enum asm_dialect ix86_asm_dialect = ASM_ATT;
661 /* TLS dialext. */
662 const char *ix86_tls_dialect_string;
663 enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU;
664
665 /* Which unit we are generating floating point math for. */
666 enum fpmath_unit ix86_fpmath;
667
668 /* Which cpu are we scheduling for. */
669 enum processor_type ix86_cpu;
670 /* Which instruction set architecture to use. */
671 enum processor_type ix86_arch;
672
673 /* Strings to hold which cpu and instruction set architecture to use. */
674 const char *ix86_cpu_string; /* for -mcpu=<xxx> */
675 const char *ix86_arch_string; /* for -march=<xxx> */
676 const char *ix86_fpmath_string; /* for -mfpmath=<xxx> */
677
678 /* # of registers to use to pass arguments. */
679 const char *ix86_regparm_string;
680
681 /* true if sse prefetch instruction is not NOOP. */
682 int x86_prefetch_sse;
683
684 /* ix86_regparm_string as a number */
685 int ix86_regparm;
686
687 /* Alignment to use for loops and jumps: */
688
689 /* Power of two alignment for loops. */
690 const char *ix86_align_loops_string;
691
692 /* Power of two alignment for non-loop jumps. */
693 const char *ix86_align_jumps_string;
694
695 /* Power of two alignment for stack boundary in bytes. */
696 const char *ix86_preferred_stack_boundary_string;
697
698 /* Preferred alignment for stack boundary in bits. */
699 int ix86_preferred_stack_boundary;
700
701 /* Values 1-5: see jump.c */
702 int ix86_branch_cost;
703 const char *ix86_branch_cost_string;
704
705 /* Power of two alignment for functions. */
706 const char *ix86_align_funcs_string;
707
708 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
709 static char internal_label_prefix[16];
710 static int internal_label_prefix_len;
711 \f
712 static int local_symbolic_operand PARAMS ((rtx, enum machine_mode));
713 static int tls_symbolic_operand_1 PARAMS ((rtx, enum tls_model));
714 static void output_pic_addr_const PARAMS ((FILE *, rtx, int));
715 static void put_condition_code PARAMS ((enum rtx_code, enum machine_mode,
716 int, int, FILE *));
717 static const char *get_some_local_dynamic_name PARAMS ((void));
718 static int get_some_local_dynamic_name_1 PARAMS ((rtx *, void *));
719 static rtx maybe_get_pool_constant PARAMS ((rtx));
720 static rtx ix86_expand_int_compare PARAMS ((enum rtx_code, rtx, rtx));
721 static enum rtx_code ix86_prepare_fp_compare_args PARAMS ((enum rtx_code,
722 rtx *, rtx *));
723 static rtx get_thread_pointer PARAMS ((void));
724 static void get_pc_thunk_name PARAMS ((char [32], unsigned int));
725 static rtx gen_push PARAMS ((rtx));
726 static int memory_address_length PARAMS ((rtx addr));
727 static int ix86_flags_dependant PARAMS ((rtx, rtx, enum attr_type));
728 static int ix86_agi_dependant PARAMS ((rtx, rtx, enum attr_type));
729 static enum attr_ppro_uops ix86_safe_ppro_uops PARAMS ((rtx));
730 static void ix86_dump_ppro_packet PARAMS ((FILE *));
731 static void ix86_reorder_insn PARAMS ((rtx *, rtx *));
732 static struct machine_function * ix86_init_machine_status PARAMS ((void));
733 static int ix86_split_to_parts PARAMS ((rtx, rtx *, enum machine_mode));
734 static int ix86_nsaved_regs PARAMS ((void));
735 static void ix86_emit_save_regs PARAMS ((void));
736 static void ix86_emit_save_regs_using_mov PARAMS ((rtx, HOST_WIDE_INT));
737 static void ix86_emit_restore_regs_using_mov PARAMS ((rtx, int, int));
738 static void ix86_output_function_epilogue PARAMS ((FILE *, HOST_WIDE_INT));
739 static void ix86_set_move_mem_attrs_1 PARAMS ((rtx, rtx, rtx, rtx, rtx));
740 static void ix86_sched_reorder_ppro PARAMS ((rtx *, rtx *));
741 static HOST_WIDE_INT ix86_GOT_alias_set PARAMS ((void));
742 static void ix86_adjust_counter PARAMS ((rtx, HOST_WIDE_INT));
743 static rtx ix86_expand_aligntest PARAMS ((rtx, int));
744 static void ix86_expand_strlensi_unroll_1 PARAMS ((rtx, rtx));
745 static int ix86_issue_rate PARAMS ((void));
746 static int ix86_adjust_cost PARAMS ((rtx, rtx, rtx, int));
747 static void ix86_sched_init PARAMS ((FILE *, int, int));
748 static int ix86_sched_reorder PARAMS ((FILE *, int, rtx *, int *, int));
749 static int ix86_variable_issue PARAMS ((FILE *, int, rtx, int));
750 static int ia32_use_dfa_pipeline_interface PARAMS ((void));
751 static int ia32_multipass_dfa_lookahead PARAMS ((void));
752 static void ix86_init_mmx_sse_builtins PARAMS ((void));
753 static rtx x86_this_parameter PARAMS ((tree));
754 static void x86_output_mi_thunk PARAMS ((FILE *, tree, HOST_WIDE_INT,
755 HOST_WIDE_INT, tree));
756 static bool x86_can_output_mi_thunk PARAMS ((tree, HOST_WIDE_INT,
757 HOST_WIDE_INT, tree));
758
759 struct ix86_address
760 {
761 rtx base, index, disp;
762 HOST_WIDE_INT scale;
763 };
764
765 static int ix86_decompose_address PARAMS ((rtx, struct ix86_address *));
766 static bool ix86_cannot_force_const_mem PARAMS ((rtx));
767
768 static void ix86_encode_section_info PARAMS ((tree, int)) ATTRIBUTE_UNUSED;
769 static const char *ix86_strip_name_encoding PARAMS ((const char *))
770 ATTRIBUTE_UNUSED;
771
772 struct builtin_description;
773 static rtx ix86_expand_sse_comi PARAMS ((const struct builtin_description *,
774 tree, rtx));
775 static rtx ix86_expand_sse_compare PARAMS ((const struct builtin_description *,
776 tree, rtx));
777 static rtx ix86_expand_unop1_builtin PARAMS ((enum insn_code, tree, rtx));
778 static rtx ix86_expand_unop_builtin PARAMS ((enum insn_code, tree, rtx, int));
779 static rtx ix86_expand_binop_builtin PARAMS ((enum insn_code, tree, rtx));
780 static rtx ix86_expand_store_builtin PARAMS ((enum insn_code, tree));
781 static rtx safe_vector_operand PARAMS ((rtx, enum machine_mode));
782 static enum rtx_code ix86_fp_compare_code_to_integer PARAMS ((enum rtx_code));
783 static void ix86_fp_comparison_codes PARAMS ((enum rtx_code code,
784 enum rtx_code *,
785 enum rtx_code *,
786 enum rtx_code *));
787 static rtx ix86_expand_fp_compare PARAMS ((enum rtx_code, rtx, rtx, rtx,
788 rtx *, rtx *));
789 static int ix86_fp_comparison_arithmetics_cost PARAMS ((enum rtx_code code));
790 static int ix86_fp_comparison_fcomi_cost PARAMS ((enum rtx_code code));
791 static int ix86_fp_comparison_sahf_cost PARAMS ((enum rtx_code code));
792 static int ix86_fp_comparison_cost PARAMS ((enum rtx_code code));
793 static unsigned int ix86_select_alt_pic_regnum PARAMS ((void));
794 static int ix86_save_reg PARAMS ((unsigned int, int));
795 static void ix86_compute_frame_layout PARAMS ((struct ix86_frame *));
796 static int ix86_comp_type_attributes PARAMS ((tree, tree));
797 static int ix86_fntype_regparm PARAMS ((tree));
798 const struct attribute_spec ix86_attribute_table[];
799 static tree ix86_handle_cdecl_attribute PARAMS ((tree *, tree, tree, int, bool *));
800 static tree ix86_handle_regparm_attribute PARAMS ((tree *, tree, tree, int, bool *));
801 static int ix86_value_regno PARAMS ((enum machine_mode));
802
803 #if defined (DO_GLOBAL_CTORS_BODY) && defined (HAS_INIT_SECTION)
804 static void ix86_svr3_asm_out_constructor PARAMS ((rtx, int));
805 #endif
806
807 /* Register class used for passing given 64bit part of the argument.
808 These represent classes as documented by the PS ABI, with the exception
809 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
810 use SF or DFmode move instead of DImode to avoid reformating penalties.
811
812 Similary we play games with INTEGERSI_CLASS to use cheaper SImode moves
813 whenever possible (upper half does contain padding).
814 */
815 enum x86_64_reg_class
816 {
817 X86_64_NO_CLASS,
818 X86_64_INTEGER_CLASS,
819 X86_64_INTEGERSI_CLASS,
820 X86_64_SSE_CLASS,
821 X86_64_SSESF_CLASS,
822 X86_64_SSEDF_CLASS,
823 X86_64_SSEUP_CLASS,
824 X86_64_X87_CLASS,
825 X86_64_X87UP_CLASS,
826 X86_64_MEMORY_CLASS
827 };
828 static const char * const x86_64_reg_class_name[] =
829 {"no", "integer", "integerSI", "sse", "sseSF", "sseDF", "sseup", "x87", "x87up", "no"};
830
831 #define MAX_CLASSES 4
832 static int classify_argument PARAMS ((enum machine_mode, tree,
833 enum x86_64_reg_class [MAX_CLASSES],
834 int));
835 static int examine_argument PARAMS ((enum machine_mode, tree, int, int *,
836 int *));
837 static rtx construct_container PARAMS ((enum machine_mode, tree, int, int, int,
838 const int *, int));
839 static enum x86_64_reg_class merge_classes PARAMS ((enum x86_64_reg_class,
840 enum x86_64_reg_class));
841 \f
842 /* Initialize the GCC target structure. */
843 #undef TARGET_ATTRIBUTE_TABLE
844 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
845 #ifdef TARGET_DLLIMPORT_DECL_ATTRIBUTES
846 # undef TARGET_MERGE_DECL_ATTRIBUTES
847 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
848 #endif
849
850 #undef TARGET_COMP_TYPE_ATTRIBUTES
851 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
852
853 #undef TARGET_INIT_BUILTINS
854 #define TARGET_INIT_BUILTINS ix86_init_builtins
855
856 #undef TARGET_EXPAND_BUILTIN
857 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
858
859 #undef TARGET_ASM_FUNCTION_EPILOGUE
860 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
861
862 #undef TARGET_ASM_OPEN_PAREN
863 #define TARGET_ASM_OPEN_PAREN ""
864 #undef TARGET_ASM_CLOSE_PAREN
865 #define TARGET_ASM_CLOSE_PAREN ""
866
867 #undef TARGET_ASM_ALIGNED_HI_OP
868 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
869 #undef TARGET_ASM_ALIGNED_SI_OP
870 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
871 #ifdef ASM_QUAD
872 #undef TARGET_ASM_ALIGNED_DI_OP
873 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
874 #endif
875
876 #undef TARGET_ASM_UNALIGNED_HI_OP
877 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
878 #undef TARGET_ASM_UNALIGNED_SI_OP
879 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
880 #undef TARGET_ASM_UNALIGNED_DI_OP
881 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
882
883 #undef TARGET_SCHED_ADJUST_COST
884 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
885 #undef TARGET_SCHED_ISSUE_RATE
886 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
887 #undef TARGET_SCHED_VARIABLE_ISSUE
888 #define TARGET_SCHED_VARIABLE_ISSUE ix86_variable_issue
889 #undef TARGET_SCHED_INIT
890 #define TARGET_SCHED_INIT ix86_sched_init
891 #undef TARGET_SCHED_REORDER
892 #define TARGET_SCHED_REORDER ix86_sched_reorder
893 #undef TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE
894 #define TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE \
895 ia32_use_dfa_pipeline_interface
896 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
897 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
898 ia32_multipass_dfa_lookahead
899
900 #ifdef HAVE_AS_TLS
901 #undef TARGET_HAVE_TLS
902 #define TARGET_HAVE_TLS true
903 #endif
904 #undef TARGET_CANNOT_FORCE_CONST_MEM
905 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
906
907 #undef TARGET_ASM_OUTPUT_MI_THUNK
908 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
909 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
910 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
911
912 struct gcc_target targetm = TARGET_INITIALIZER;
913 \f
914 /* Sometimes certain combinations of command options do not make
915 sense on a particular target machine. You can define a macro
916 `OVERRIDE_OPTIONS' to take account of this. This macro, if
917 defined, is executed once just after all the command options have
918 been parsed.
919
920 Don't use this macro to turn on various extra optimizations for
921 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
922
923 void
924 override_options ()
925 {
926 int i;
927 /* Comes from final.c -- no real reason to change it. */
928 #define MAX_CODE_ALIGN 16
929
930 static struct ptt
931 {
932 const struct processor_costs *cost; /* Processor costs */
933 const int target_enable; /* Target flags to enable. */
934 const int target_disable; /* Target flags to disable. */
935 const int align_loop; /* Default alignments. */
936 const int align_loop_max_skip;
937 const int align_jump;
938 const int align_jump_max_skip;
939 const int align_func;
940 const int branch_cost;
941 }
942 const processor_target_table[PROCESSOR_max] =
943 {
944 {&i386_cost, 0, 0, 4, 3, 4, 3, 4, 1},
945 {&i486_cost, 0, 0, 16, 15, 16, 15, 16, 1},
946 {&pentium_cost, 0, 0, 16, 7, 16, 7, 16, 1},
947 {&pentiumpro_cost, 0, 0, 16, 15, 16, 7, 16, 1},
948 {&k6_cost, 0, 0, 32, 7, 32, 7, 32, 1},
949 {&athlon_cost, 0, 0, 16, 7, 64, 7, 16, 1},
950 {&pentium4_cost, 0, 0, 0, 0, 0, 0, 0, 1}
951 };
952
953 static const char * const cpu_names[] = TARGET_CPU_DEFAULT_NAMES;
954 static struct pta
955 {
956 const char *const name; /* processor name or nickname. */
957 const enum processor_type processor;
958 const enum pta_flags
959 {
960 PTA_SSE = 1,
961 PTA_SSE2 = 2,
962 PTA_MMX = 4,
963 PTA_PREFETCH_SSE = 8,
964 PTA_3DNOW = 16,
965 PTA_3DNOW_A = 64
966 } flags;
967 }
968 const processor_alias_table[] =
969 {
970 {"i386", PROCESSOR_I386, 0},
971 {"i486", PROCESSOR_I486, 0},
972 {"i586", PROCESSOR_PENTIUM, 0},
973 {"pentium", PROCESSOR_PENTIUM, 0},
974 {"pentium-mmx", PROCESSOR_PENTIUM, PTA_MMX},
975 {"winchip-c6", PROCESSOR_I486, PTA_MMX},
976 {"winchip2", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
977 {"c3", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
978 {"i686", PROCESSOR_PENTIUMPRO, 0},
979 {"pentiumpro", PROCESSOR_PENTIUMPRO, 0},
980 {"pentium2", PROCESSOR_PENTIUMPRO, PTA_MMX},
981 {"pentium3", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
982 {"pentium4", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2 |
983 PTA_MMX | PTA_PREFETCH_SSE},
984 {"k6", PROCESSOR_K6, PTA_MMX},
985 {"k6-2", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
986 {"k6-3", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
987 {"athlon", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
988 | PTA_3DNOW_A},
989 {"athlon-tbird", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE
990 | PTA_3DNOW | PTA_3DNOW_A},
991 {"athlon-4", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
992 | PTA_3DNOW_A | PTA_SSE},
993 {"athlon-xp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
994 | PTA_3DNOW_A | PTA_SSE},
995 {"athlon-mp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
996 | PTA_3DNOW_A | PTA_SSE},
997 };
998
999 int const pta_size = ARRAY_SIZE (processor_alias_table);
1000
1001 /* By default our XFmode is the 80-bit extended format. If we have
1002 use TFmode instead, it's also the 80-bit format, but with padding. */
1003 real_format_for_mode[XFmode - QFmode] = &ieee_extended_intel_96_format;
1004 real_format_for_mode[TFmode - QFmode] = &ieee_extended_intel_128_format;
1005
1006 /* Set the default values for switches whose default depends on TARGET_64BIT
1007 in case they weren't overwriten by command line options. */
1008 if (TARGET_64BIT)
1009 {
1010 if (flag_omit_frame_pointer == 2)
1011 flag_omit_frame_pointer = 1;
1012 if (flag_asynchronous_unwind_tables == 2)
1013 flag_asynchronous_unwind_tables = 1;
1014 if (flag_pcc_struct_return == 2)
1015 flag_pcc_struct_return = 0;
1016 }
1017 else
1018 {
1019 if (flag_omit_frame_pointer == 2)
1020 flag_omit_frame_pointer = 0;
1021 if (flag_asynchronous_unwind_tables == 2)
1022 flag_asynchronous_unwind_tables = 0;
1023 if (flag_pcc_struct_return == 2)
1024 flag_pcc_struct_return = 1;
1025 }
1026
1027 #ifdef SUBTARGET_OVERRIDE_OPTIONS
1028 SUBTARGET_OVERRIDE_OPTIONS;
1029 #endif
1030
1031 if (!ix86_cpu_string && ix86_arch_string)
1032 ix86_cpu_string = ix86_arch_string;
1033 if (!ix86_cpu_string)
1034 ix86_cpu_string = cpu_names [TARGET_CPU_DEFAULT];
1035 if (!ix86_arch_string)
1036 ix86_arch_string = TARGET_64BIT ? "athlon-4" : "i386";
1037
1038 if (ix86_cmodel_string != 0)
1039 {
1040 if (!strcmp (ix86_cmodel_string, "small"))
1041 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1042 else if (flag_pic)
1043 sorry ("code model %s not supported in PIC mode", ix86_cmodel_string);
1044 else if (!strcmp (ix86_cmodel_string, "32"))
1045 ix86_cmodel = CM_32;
1046 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
1047 ix86_cmodel = CM_KERNEL;
1048 else if (!strcmp (ix86_cmodel_string, "medium") && !flag_pic)
1049 ix86_cmodel = CM_MEDIUM;
1050 else if (!strcmp (ix86_cmodel_string, "large") && !flag_pic)
1051 ix86_cmodel = CM_LARGE;
1052 else
1053 error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string);
1054 }
1055 else
1056 {
1057 ix86_cmodel = CM_32;
1058 if (TARGET_64BIT)
1059 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1060 }
1061 if (ix86_asm_string != 0)
1062 {
1063 if (!strcmp (ix86_asm_string, "intel"))
1064 ix86_asm_dialect = ASM_INTEL;
1065 else if (!strcmp (ix86_asm_string, "att"))
1066 ix86_asm_dialect = ASM_ATT;
1067 else
1068 error ("bad value (%s) for -masm= switch", ix86_asm_string);
1069 }
1070 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
1071 error ("code model `%s' not supported in the %s bit mode",
1072 ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
1073 if (ix86_cmodel == CM_LARGE)
1074 sorry ("code model `large' not supported yet");
1075 if ((TARGET_64BIT != 0) != ((target_flags & MASK_64BIT) != 0))
1076 sorry ("%i-bit mode not compiled in",
1077 (target_flags & MASK_64BIT) ? 64 : 32);
1078
1079 for (i = 0; i < pta_size; i++)
1080 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
1081 {
1082 ix86_arch = processor_alias_table[i].processor;
1083 /* Default cpu tuning to the architecture. */
1084 ix86_cpu = ix86_arch;
1085 if (processor_alias_table[i].flags & PTA_MMX
1086 && !(target_flags_explicit & MASK_MMX))
1087 target_flags |= MASK_MMX;
1088 if (processor_alias_table[i].flags & PTA_3DNOW
1089 && !(target_flags_explicit & MASK_3DNOW))
1090 target_flags |= MASK_3DNOW;
1091 if (processor_alias_table[i].flags & PTA_3DNOW_A
1092 && !(target_flags_explicit & MASK_3DNOW_A))
1093 target_flags |= MASK_3DNOW_A;
1094 if (processor_alias_table[i].flags & PTA_SSE
1095 && !(target_flags_explicit & MASK_SSE))
1096 target_flags |= MASK_SSE;
1097 if (processor_alias_table[i].flags & PTA_SSE2
1098 && !(target_flags_explicit & MASK_SSE2))
1099 target_flags |= MASK_SSE2;
1100 if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
1101 x86_prefetch_sse = true;
1102 break;
1103 }
1104
1105 if (i == pta_size)
1106 error ("bad value (%s) for -march= switch", ix86_arch_string);
1107
1108 for (i = 0; i < pta_size; i++)
1109 if (! strcmp (ix86_cpu_string, processor_alias_table[i].name))
1110 {
1111 ix86_cpu = processor_alias_table[i].processor;
1112 break;
1113 }
1114 if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
1115 x86_prefetch_sse = true;
1116 if (i == pta_size)
1117 error ("bad value (%s) for -mcpu= switch", ix86_cpu_string);
1118
1119 if (optimize_size)
1120 ix86_cost = &size_cost;
1121 else
1122 ix86_cost = processor_target_table[ix86_cpu].cost;
1123 target_flags |= processor_target_table[ix86_cpu].target_enable;
1124 target_flags &= ~processor_target_table[ix86_cpu].target_disable;
1125
1126 /* Arrange to set up i386_stack_locals for all functions. */
1127 init_machine_status = ix86_init_machine_status;
1128
1129 /* Validate -mregparm= value. */
1130 if (ix86_regparm_string)
1131 {
1132 i = atoi (ix86_regparm_string);
1133 if (i < 0 || i > REGPARM_MAX)
1134 error ("-mregparm=%d is not between 0 and %d", i, REGPARM_MAX);
1135 else
1136 ix86_regparm = i;
1137 }
1138 else
1139 if (TARGET_64BIT)
1140 ix86_regparm = REGPARM_MAX;
1141
1142 /* If the user has provided any of the -malign-* options,
1143 warn and use that value only if -falign-* is not set.
1144 Remove this code in GCC 3.2 or later. */
1145 if (ix86_align_loops_string)
1146 {
1147 warning ("-malign-loops is obsolete, use -falign-loops");
1148 if (align_loops == 0)
1149 {
1150 i = atoi (ix86_align_loops_string);
1151 if (i < 0 || i > MAX_CODE_ALIGN)
1152 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1153 else
1154 align_loops = 1 << i;
1155 }
1156 }
1157
1158 if (ix86_align_jumps_string)
1159 {
1160 warning ("-malign-jumps is obsolete, use -falign-jumps");
1161 if (align_jumps == 0)
1162 {
1163 i = atoi (ix86_align_jumps_string);
1164 if (i < 0 || i > MAX_CODE_ALIGN)
1165 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1166 else
1167 align_jumps = 1 << i;
1168 }
1169 }
1170
1171 if (ix86_align_funcs_string)
1172 {
1173 warning ("-malign-functions is obsolete, use -falign-functions");
1174 if (align_functions == 0)
1175 {
1176 i = atoi (ix86_align_funcs_string);
1177 if (i < 0 || i > MAX_CODE_ALIGN)
1178 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1179 else
1180 align_functions = 1 << i;
1181 }
1182 }
1183
1184 /* Default align_* from the processor table. */
1185 if (align_loops == 0)
1186 {
1187 align_loops = processor_target_table[ix86_cpu].align_loop;
1188 align_loops_max_skip = processor_target_table[ix86_cpu].align_loop_max_skip;
1189 }
1190 if (align_jumps == 0)
1191 {
1192 align_jumps = processor_target_table[ix86_cpu].align_jump;
1193 align_jumps_max_skip = processor_target_table[ix86_cpu].align_jump_max_skip;
1194 }
1195 if (align_functions == 0)
1196 {
1197 align_functions = processor_target_table[ix86_cpu].align_func;
1198 }
1199
1200 /* Validate -mpreferred-stack-boundary= value, or provide default.
1201 The default of 128 bits is for Pentium III's SSE __m128, but we
1202 don't want additional code to keep the stack aligned when
1203 optimizing for code size. */
1204 ix86_preferred_stack_boundary = (optimize_size
1205 ? TARGET_64BIT ? 128 : 32
1206 : 128);
1207 if (ix86_preferred_stack_boundary_string)
1208 {
1209 i = atoi (ix86_preferred_stack_boundary_string);
1210 if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
1211 error ("-mpreferred-stack-boundary=%d is not between %d and 12", i,
1212 TARGET_64BIT ? 4 : 2);
1213 else
1214 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
1215 }
1216
1217 /* Validate -mbranch-cost= value, or provide default. */
1218 ix86_branch_cost = processor_target_table[ix86_cpu].branch_cost;
1219 if (ix86_branch_cost_string)
1220 {
1221 i = atoi (ix86_branch_cost_string);
1222 if (i < 0 || i > 5)
1223 error ("-mbranch-cost=%d is not between 0 and 5", i);
1224 else
1225 ix86_branch_cost = i;
1226 }
1227
1228 if (ix86_tls_dialect_string)
1229 {
1230 if (strcmp (ix86_tls_dialect_string, "gnu") == 0)
1231 ix86_tls_dialect = TLS_DIALECT_GNU;
1232 else if (strcmp (ix86_tls_dialect_string, "sun") == 0)
1233 ix86_tls_dialect = TLS_DIALECT_SUN;
1234 else
1235 error ("bad value (%s) for -mtls-dialect= switch",
1236 ix86_tls_dialect_string);
1237 }
1238
1239 /* Keep nonleaf frame pointers. */
1240 if (TARGET_OMIT_LEAF_FRAME_POINTER)
1241 flag_omit_frame_pointer = 1;
1242
1243 /* If we're doing fast math, we don't care about comparison order
1244 wrt NaNs. This lets us use a shorter comparison sequence. */
1245 if (flag_unsafe_math_optimizations)
1246 target_flags &= ~MASK_IEEE_FP;
1247
1248 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
1249 since the insns won't need emulation. */
1250 if (x86_arch_always_fancy_math_387 & (1 << ix86_arch))
1251 target_flags &= ~MASK_NO_FANCY_MATH_387;
1252
1253 if (TARGET_64BIT)
1254 {
1255 if (TARGET_ALIGN_DOUBLE)
1256 error ("-malign-double makes no sense in the 64bit mode");
1257 if (TARGET_RTD)
1258 error ("-mrtd calling convention not supported in the 64bit mode");
1259 /* Enable by default the SSE and MMX builtins. */
1260 target_flags |= (MASK_SSE2 | MASK_SSE | MASK_MMX | MASK_128BIT_LONG_DOUBLE);
1261 ix86_fpmath = FPMATH_SSE;
1262 }
1263 else
1264 ix86_fpmath = FPMATH_387;
1265
1266 if (ix86_fpmath_string != 0)
1267 {
1268 if (! strcmp (ix86_fpmath_string, "387"))
1269 ix86_fpmath = FPMATH_387;
1270 else if (! strcmp (ix86_fpmath_string, "sse"))
1271 {
1272 if (!TARGET_SSE)
1273 {
1274 warning ("SSE instruction set disabled, using 387 arithmetics");
1275 ix86_fpmath = FPMATH_387;
1276 }
1277 else
1278 ix86_fpmath = FPMATH_SSE;
1279 }
1280 else if (! strcmp (ix86_fpmath_string, "387,sse")
1281 || ! strcmp (ix86_fpmath_string, "sse,387"))
1282 {
1283 if (!TARGET_SSE)
1284 {
1285 warning ("SSE instruction set disabled, using 387 arithmetics");
1286 ix86_fpmath = FPMATH_387;
1287 }
1288 else if (!TARGET_80387)
1289 {
1290 warning ("387 instruction set disabled, using SSE arithmetics");
1291 ix86_fpmath = FPMATH_SSE;
1292 }
1293 else
1294 ix86_fpmath = FPMATH_SSE | FPMATH_387;
1295 }
1296 else
1297 error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string);
1298 }
1299
1300 /* It makes no sense to ask for just SSE builtins, so MMX is also turned
1301 on by -msse. */
1302 if (TARGET_SSE)
1303 {
1304 target_flags |= MASK_MMX;
1305 x86_prefetch_sse = true;
1306 }
1307
1308 /* If it has 3DNow! it also has MMX so MMX is also turned on by -m3dnow */
1309 if (TARGET_3DNOW)
1310 {
1311 target_flags |= MASK_MMX;
1312 /* If we are targetting the Athlon architecture, enable the 3Dnow/MMX
1313 extensions it adds. */
1314 if (x86_3dnow_a & (1 << ix86_arch))
1315 target_flags |= MASK_3DNOW_A;
1316 }
1317 if ((x86_accumulate_outgoing_args & CPUMASK)
1318 && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
1319 && !optimize_size)
1320 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
1321
1322 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
1323 {
1324 char *p;
1325 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
1326 p = strchr (internal_label_prefix, 'X');
1327 internal_label_prefix_len = p - internal_label_prefix;
1328 *p = '\0';
1329 }
1330 }
1331 \f
1332 void
1333 optimization_options (level, size)
1334 int level;
1335 int size ATTRIBUTE_UNUSED;
1336 {
1337 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
1338 make the problem with not enough registers even worse. */
1339 #ifdef INSN_SCHEDULING
1340 if (level > 1)
1341 flag_schedule_insns = 0;
1342 #endif
1343
1344 /* The default values of these switches depend on the TARGET_64BIT
1345 that is not known at this moment. Mark these values with 2 and
1346 let user the to override these. In case there is no command line option
1347 specifying them, we will set the defaults in override_options. */
1348 if (optimize >= 1)
1349 flag_omit_frame_pointer = 2;
1350 flag_pcc_struct_return = 2;
1351 flag_asynchronous_unwind_tables = 2;
1352 }
1353 \f
1354 /* Table of valid machine attributes. */
1355 const struct attribute_spec ix86_attribute_table[] =
1356 {
1357 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
1358 /* Stdcall attribute says callee is responsible for popping arguments
1359 if they are not variable. */
1360 { "stdcall", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
1361 /* Cdecl attribute says the callee is a normal C declaration */
1362 { "cdecl", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
1363 /* Regparm attribute specifies how many integer arguments are to be
1364 passed in registers. */
1365 { "regparm", 1, 1, false, true, true, ix86_handle_regparm_attribute },
1366 #ifdef TARGET_DLLIMPORT_DECL_ATTRIBUTES
1367 { "dllimport", 0, 0, false, false, false, ix86_handle_dll_attribute },
1368 { "dllexport", 0, 0, false, false, false, ix86_handle_dll_attribute },
1369 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute },
1370 #endif
1371 { NULL, 0, 0, false, false, false, NULL }
1372 };
1373
1374 /* Handle a "cdecl" or "stdcall" attribute;
1375 arguments as in struct attribute_spec.handler. */
1376 static tree
1377 ix86_handle_cdecl_attribute (node, name, args, flags, no_add_attrs)
1378 tree *node;
1379 tree name;
1380 tree args ATTRIBUTE_UNUSED;
1381 int flags ATTRIBUTE_UNUSED;
1382 bool *no_add_attrs;
1383 {
1384 if (TREE_CODE (*node) != FUNCTION_TYPE
1385 && TREE_CODE (*node) != METHOD_TYPE
1386 && TREE_CODE (*node) != FIELD_DECL
1387 && TREE_CODE (*node) != TYPE_DECL)
1388 {
1389 warning ("`%s' attribute only applies to functions",
1390 IDENTIFIER_POINTER (name));
1391 *no_add_attrs = true;
1392 }
1393
1394 if (TARGET_64BIT)
1395 {
1396 warning ("`%s' attribute ignored", IDENTIFIER_POINTER (name));
1397 *no_add_attrs = true;
1398 }
1399
1400 return NULL_TREE;
1401 }
1402
1403 /* Handle a "regparm" attribute;
1404 arguments as in struct attribute_spec.handler. */
1405 static tree
1406 ix86_handle_regparm_attribute (node, name, args, flags, no_add_attrs)
1407 tree *node;
1408 tree name;
1409 tree args;
1410 int flags ATTRIBUTE_UNUSED;
1411 bool *no_add_attrs;
1412 {
1413 if (TREE_CODE (*node) != FUNCTION_TYPE
1414 && TREE_CODE (*node) != METHOD_TYPE
1415 && TREE_CODE (*node) != FIELD_DECL
1416 && TREE_CODE (*node) != TYPE_DECL)
1417 {
1418 warning ("`%s' attribute only applies to functions",
1419 IDENTIFIER_POINTER (name));
1420 *no_add_attrs = true;
1421 }
1422 else
1423 {
1424 tree cst;
1425
1426 cst = TREE_VALUE (args);
1427 if (TREE_CODE (cst) != INTEGER_CST)
1428 {
1429 warning ("`%s' attribute requires an integer constant argument",
1430 IDENTIFIER_POINTER (name));
1431 *no_add_attrs = true;
1432 }
1433 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
1434 {
1435 warning ("argument to `%s' attribute larger than %d",
1436 IDENTIFIER_POINTER (name), REGPARM_MAX);
1437 *no_add_attrs = true;
1438 }
1439 }
1440
1441 return NULL_TREE;
1442 }
1443
1444 /* Return 0 if the attributes for two types are incompatible, 1 if they
1445 are compatible, and 2 if they are nearly compatible (which causes a
1446 warning to be generated). */
1447
1448 static int
1449 ix86_comp_type_attributes (type1, type2)
1450 tree type1;
1451 tree type2;
1452 {
1453 /* Check for mismatch of non-default calling convention. */
1454 const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
1455
1456 if (TREE_CODE (type1) != FUNCTION_TYPE)
1457 return 1;
1458
1459 /* Check for mismatched return types (cdecl vs stdcall). */
1460 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
1461 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
1462 return 0;
1463 return 1;
1464 }
1465 \f
1466 /* Return the regparm value for a fuctio with the indicated TYPE. */
1467
1468 static int
1469 ix86_fntype_regparm (type)
1470 tree type;
1471 {
1472 tree attr;
1473
1474 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
1475 if (attr)
1476 return TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
1477 else
1478 return ix86_regparm;
1479 }
1480
1481 /* Value is the number of bytes of arguments automatically
1482 popped when returning from a subroutine call.
1483 FUNDECL is the declaration node of the function (as a tree),
1484 FUNTYPE is the data type of the function (as a tree),
1485 or for a library call it is an identifier node for the subroutine name.
1486 SIZE is the number of bytes of arguments passed on the stack.
1487
1488 On the 80386, the RTD insn may be used to pop them if the number
1489 of args is fixed, but if the number is variable then the caller
1490 must pop them all. RTD can't be used for library calls now
1491 because the library is compiled with the Unix compiler.
1492 Use of RTD is a selectable option, since it is incompatible with
1493 standard Unix calling sequences. If the option is not selected,
1494 the caller must always pop the args.
1495
1496 The attribute stdcall is equivalent to RTD on a per module basis. */
1497
1498 int
1499 ix86_return_pops_args (fundecl, funtype, size)
1500 tree fundecl;
1501 tree funtype;
1502 int size;
1503 {
1504 int rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
1505
1506 /* Cdecl functions override -mrtd, and never pop the stack. */
1507 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype))) {
1508
1509 /* Stdcall functions will pop the stack if not variable args. */
1510 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype)))
1511 rtd = 1;
1512
1513 if (rtd
1514 && (TYPE_ARG_TYPES (funtype) == NULL_TREE
1515 || (TREE_VALUE (tree_last (TYPE_ARG_TYPES (funtype)))
1516 == void_type_node)))
1517 return size;
1518 }
1519
1520 /* Lose any fake structure return argument if it is passed on the stack. */
1521 if (aggregate_value_p (TREE_TYPE (funtype))
1522 && !TARGET_64BIT)
1523 {
1524 int nregs = ix86_fntype_regparm (funtype);
1525
1526 if (!nregs)
1527 return GET_MODE_SIZE (Pmode);
1528 }
1529
1530 return 0;
1531 }
1532 \f
1533 /* Argument support functions. */
1534
1535 /* Return true when register may be used to pass function parameters. */
1536 bool
1537 ix86_function_arg_regno_p (regno)
1538 int regno;
1539 {
1540 int i;
1541 if (!TARGET_64BIT)
1542 return (regno < REGPARM_MAX
1543 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
1544 if (SSE_REGNO_P (regno) && TARGET_SSE)
1545 return true;
1546 /* RAX is used as hidden argument to va_arg functions. */
1547 if (!regno)
1548 return true;
1549 for (i = 0; i < REGPARM_MAX; i++)
1550 if (regno == x86_64_int_parameter_registers[i])
1551 return true;
1552 return false;
1553 }
1554
1555 /* Initialize a variable CUM of type CUMULATIVE_ARGS
1556 for a call to a function whose data type is FNTYPE.
1557 For a library call, FNTYPE is 0. */
1558
1559 void
1560 init_cumulative_args (cum, fntype, libname)
1561 CUMULATIVE_ARGS *cum; /* Argument info to initialize */
1562 tree fntype; /* tree ptr for function decl */
1563 rtx libname; /* SYMBOL_REF of library name or 0 */
1564 {
1565 static CUMULATIVE_ARGS zero_cum;
1566 tree param, next_param;
1567
1568 if (TARGET_DEBUG_ARG)
1569 {
1570 fprintf (stderr, "\ninit_cumulative_args (");
1571 if (fntype)
1572 fprintf (stderr, "fntype code = %s, ret code = %s",
1573 tree_code_name[(int) TREE_CODE (fntype)],
1574 tree_code_name[(int) TREE_CODE (TREE_TYPE (fntype))]);
1575 else
1576 fprintf (stderr, "no fntype");
1577
1578 if (libname)
1579 fprintf (stderr, ", libname = %s", XSTR (libname, 0));
1580 }
1581
1582 *cum = zero_cum;
1583
1584 /* Set up the number of registers to use for passing arguments. */
1585 cum->nregs = ix86_regparm;
1586 cum->sse_nregs = SSE_REGPARM_MAX;
1587 if (fntype && !TARGET_64BIT)
1588 {
1589 tree attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (fntype));
1590
1591 if (attr)
1592 cum->nregs = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
1593 }
1594 cum->maybe_vaarg = false;
1595
1596 /* Determine if this function has variable arguments. This is
1597 indicated by the last argument being 'void_type_mode' if there
1598 are no variable arguments. If there are variable arguments, then
1599 we won't pass anything in registers */
1600
1601 if (cum->nregs)
1602 {
1603 for (param = (fntype) ? TYPE_ARG_TYPES (fntype) : 0;
1604 param != 0; param = next_param)
1605 {
1606 next_param = TREE_CHAIN (param);
1607 if (next_param == 0 && TREE_VALUE (param) != void_type_node)
1608 {
1609 if (!TARGET_64BIT)
1610 cum->nregs = 0;
1611 cum->maybe_vaarg = true;
1612 }
1613 }
1614 }
1615 if ((!fntype && !libname)
1616 || (fntype && !TYPE_ARG_TYPES (fntype)))
1617 cum->maybe_vaarg = 1;
1618
1619 if (TARGET_DEBUG_ARG)
1620 fprintf (stderr, ", nregs=%d )\n", cum->nregs);
1621
1622 return;
1623 }
1624
1625 /* x86-64 register passing impleemntation. See x86-64 ABI for details. Goal
1626 of this code is to classify each 8bytes of incoming argument by the register
1627 class and assign registers accordingly. */
1628
1629 /* Return the union class of CLASS1 and CLASS2.
1630 See the x86-64 PS ABI for details. */
1631
1632 static enum x86_64_reg_class
1633 merge_classes (class1, class2)
1634 enum x86_64_reg_class class1, class2;
1635 {
1636 /* Rule #1: If both classes are equal, this is the resulting class. */
1637 if (class1 == class2)
1638 return class1;
1639
1640 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
1641 the other class. */
1642 if (class1 == X86_64_NO_CLASS)
1643 return class2;
1644 if (class2 == X86_64_NO_CLASS)
1645 return class1;
1646
1647 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
1648 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
1649 return X86_64_MEMORY_CLASS;
1650
1651 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
1652 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
1653 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
1654 return X86_64_INTEGERSI_CLASS;
1655 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
1656 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
1657 return X86_64_INTEGER_CLASS;
1658
1659 /* Rule #5: If one of the classes is X87 or X87UP class, MEMORY is used. */
1660 if (class1 == X86_64_X87_CLASS || class1 == X86_64_X87UP_CLASS
1661 || class2 == X86_64_X87_CLASS || class2 == X86_64_X87UP_CLASS)
1662 return X86_64_MEMORY_CLASS;
1663
1664 /* Rule #6: Otherwise class SSE is used. */
1665 return X86_64_SSE_CLASS;
1666 }
1667
1668 /* Classify the argument of type TYPE and mode MODE.
1669 CLASSES will be filled by the register class used to pass each word
1670 of the operand. The number of words is returned. In case the parameter
1671 should be passed in memory, 0 is returned. As a special case for zero
1672 sized containers, classes[0] will be NO_CLASS and 1 is returned.
1673
1674 BIT_OFFSET is used internally for handling records and specifies offset
1675 of the offset in bits modulo 256 to avoid overflow cases.
1676
1677 See the x86-64 PS ABI for details.
1678 */
1679
1680 static int
1681 classify_argument (mode, type, classes, bit_offset)
1682 enum machine_mode mode;
1683 tree type;
1684 enum x86_64_reg_class classes[MAX_CLASSES];
1685 int bit_offset;
1686 {
1687 int bytes =
1688 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
1689 int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
1690
1691 /* Variable sized entities are always passed/returned in memory. */
1692 if (bytes < 0)
1693 return 0;
1694
1695 if (type && AGGREGATE_TYPE_P (type))
1696 {
1697 int i;
1698 tree field;
1699 enum x86_64_reg_class subclasses[MAX_CLASSES];
1700
1701 /* On x86-64 we pass structures larger than 16 bytes on the stack. */
1702 if (bytes > 16)
1703 return 0;
1704
1705 for (i = 0; i < words; i++)
1706 classes[i] = X86_64_NO_CLASS;
1707
1708 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
1709 signalize memory class, so handle it as special case. */
1710 if (!words)
1711 {
1712 classes[0] = X86_64_NO_CLASS;
1713 return 1;
1714 }
1715
1716 /* Classify each field of record and merge classes. */
1717 if (TREE_CODE (type) == RECORD_TYPE)
1718 {
1719 /* For classes first merge in the field of the subclasses. */
1720 if (TYPE_BINFO (type) != NULL && TYPE_BINFO_BASETYPES (type) != NULL)
1721 {
1722 tree bases = TYPE_BINFO_BASETYPES (type);
1723 int n_bases = TREE_VEC_LENGTH (bases);
1724 int i;
1725
1726 for (i = 0; i < n_bases; ++i)
1727 {
1728 tree binfo = TREE_VEC_ELT (bases, i);
1729 int num;
1730 int offset = tree_low_cst (BINFO_OFFSET (binfo), 0) * 8;
1731 tree type = BINFO_TYPE (binfo);
1732
1733 num = classify_argument (TYPE_MODE (type),
1734 type, subclasses,
1735 (offset + bit_offset) % 256);
1736 if (!num)
1737 return 0;
1738 for (i = 0; i < num; i++)
1739 {
1740 int pos = (offset + (bit_offset % 64)) / 8 / 8;
1741 classes[i + pos] =
1742 merge_classes (subclasses[i], classes[i + pos]);
1743 }
1744 }
1745 }
1746 /* And now merge the fields of structure. */
1747 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
1748 {
1749 if (TREE_CODE (field) == FIELD_DECL)
1750 {
1751 int num;
1752
1753 /* Bitfields are always classified as integer. Handle them
1754 early, since later code would consider them to be
1755 misaligned integers. */
1756 if (DECL_BIT_FIELD (field))
1757 {
1758 for (i = int_bit_position (field) / 8 / 8;
1759 i < (int_bit_position (field)
1760 + tree_low_cst (DECL_SIZE (field), 0)
1761 + 63) / 8 / 8; i++)
1762 classes[i] =
1763 merge_classes (X86_64_INTEGER_CLASS,
1764 classes[i]);
1765 }
1766 else
1767 {
1768 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
1769 TREE_TYPE (field), subclasses,
1770 (int_bit_position (field)
1771 + bit_offset) % 256);
1772 if (!num)
1773 return 0;
1774 for (i = 0; i < num; i++)
1775 {
1776 int pos =
1777 (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
1778 classes[i + pos] =
1779 merge_classes (subclasses[i], classes[i + pos]);
1780 }
1781 }
1782 }
1783 }
1784 }
1785 /* Arrays are handled as small records. */
1786 else if (TREE_CODE (type) == ARRAY_TYPE)
1787 {
1788 int num;
1789 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
1790 TREE_TYPE (type), subclasses, bit_offset);
1791 if (!num)
1792 return 0;
1793
1794 /* The partial classes are now full classes. */
1795 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
1796 subclasses[0] = X86_64_SSE_CLASS;
1797 if (subclasses[0] == X86_64_INTEGERSI_CLASS && bytes != 4)
1798 subclasses[0] = X86_64_INTEGER_CLASS;
1799
1800 for (i = 0; i < words; i++)
1801 classes[i] = subclasses[i % num];
1802 }
1803 /* Unions are similar to RECORD_TYPE but offset is always 0. */
1804 else if (TREE_CODE (type) == UNION_TYPE
1805 || TREE_CODE (type) == QUAL_UNION_TYPE)
1806 {
1807 /* For classes first merge in the field of the subclasses. */
1808 if (TYPE_BINFO (type) != NULL && TYPE_BINFO_BASETYPES (type) != NULL)
1809 {
1810 tree bases = TYPE_BINFO_BASETYPES (type);
1811 int n_bases = TREE_VEC_LENGTH (bases);
1812 int i;
1813
1814 for (i = 0; i < n_bases; ++i)
1815 {
1816 tree binfo = TREE_VEC_ELT (bases, i);
1817 int num;
1818 int offset = tree_low_cst (BINFO_OFFSET (binfo), 0) * 8;
1819 tree type = BINFO_TYPE (binfo);
1820
1821 num = classify_argument (TYPE_MODE (type),
1822 type, subclasses,
1823 (offset + (bit_offset % 64)) % 256);
1824 if (!num)
1825 return 0;
1826 for (i = 0; i < num; i++)
1827 {
1828 int pos = (offset + (bit_offset % 64)) / 8 / 8;
1829 classes[i + pos] =
1830 merge_classes (subclasses[i], classes[i + pos]);
1831 }
1832 }
1833 }
1834 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
1835 {
1836 if (TREE_CODE (field) == FIELD_DECL)
1837 {
1838 int num;
1839 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
1840 TREE_TYPE (field), subclasses,
1841 bit_offset);
1842 if (!num)
1843 return 0;
1844 for (i = 0; i < num; i++)
1845 classes[i] = merge_classes (subclasses[i], classes[i]);
1846 }
1847 }
1848 }
1849 else
1850 abort ();
1851
1852 /* Final merger cleanup. */
1853 for (i = 0; i < words; i++)
1854 {
1855 /* If one class is MEMORY, everything should be passed in
1856 memory. */
1857 if (classes[i] == X86_64_MEMORY_CLASS)
1858 return 0;
1859
1860 /* The X86_64_SSEUP_CLASS should be always preceded by
1861 X86_64_SSE_CLASS. */
1862 if (classes[i] == X86_64_SSEUP_CLASS
1863 && (i == 0 || classes[i - 1] != X86_64_SSE_CLASS))
1864 classes[i] = X86_64_SSE_CLASS;
1865
1866 /* X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS. */
1867 if (classes[i] == X86_64_X87UP_CLASS
1868 && (i == 0 || classes[i - 1] != X86_64_X87_CLASS))
1869 classes[i] = X86_64_SSE_CLASS;
1870 }
1871 return words;
1872 }
1873
1874 /* Compute alignment needed. We align all types to natural boundaries with
1875 exception of XFmode that is aligned to 64bits. */
1876 if (mode != VOIDmode && mode != BLKmode)
1877 {
1878 int mode_alignment = GET_MODE_BITSIZE (mode);
1879
1880 if (mode == XFmode)
1881 mode_alignment = 128;
1882 else if (mode == XCmode)
1883 mode_alignment = 256;
1884 /* Misaligned fields are always returned in memory. */
1885 if (bit_offset % mode_alignment)
1886 return 0;
1887 }
1888
1889 /* Classification of atomic types. */
1890 switch (mode)
1891 {
1892 case DImode:
1893 case SImode:
1894 case HImode:
1895 case QImode:
1896 case CSImode:
1897 case CHImode:
1898 case CQImode:
1899 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
1900 classes[0] = X86_64_INTEGERSI_CLASS;
1901 else
1902 classes[0] = X86_64_INTEGER_CLASS;
1903 return 1;
1904 case CDImode:
1905 case TImode:
1906 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
1907 return 2;
1908 case CTImode:
1909 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
1910 classes[2] = classes[3] = X86_64_INTEGER_CLASS;
1911 return 4;
1912 case SFmode:
1913 if (!(bit_offset % 64))
1914 classes[0] = X86_64_SSESF_CLASS;
1915 else
1916 classes[0] = X86_64_SSE_CLASS;
1917 return 1;
1918 case DFmode:
1919 classes[0] = X86_64_SSEDF_CLASS;
1920 return 1;
1921 case TFmode:
1922 classes[0] = X86_64_X87_CLASS;
1923 classes[1] = X86_64_X87UP_CLASS;
1924 return 2;
1925 case TCmode:
1926 classes[0] = X86_64_X87_CLASS;
1927 classes[1] = X86_64_X87UP_CLASS;
1928 classes[2] = X86_64_X87_CLASS;
1929 classes[3] = X86_64_X87UP_CLASS;
1930 return 4;
1931 case DCmode:
1932 classes[0] = X86_64_SSEDF_CLASS;
1933 classes[1] = X86_64_SSEDF_CLASS;
1934 return 2;
1935 case SCmode:
1936 classes[0] = X86_64_SSE_CLASS;
1937 return 1;
1938 case V4SFmode:
1939 case V4SImode:
1940 case V16QImode:
1941 case V8HImode:
1942 case V2DFmode:
1943 case V2DImode:
1944 classes[0] = X86_64_SSE_CLASS;
1945 classes[1] = X86_64_SSEUP_CLASS;
1946 return 2;
1947 case V2SFmode:
1948 case V2SImode:
1949 case V4HImode:
1950 case V8QImode:
1951 return 0;
1952 case BLKmode:
1953 case VOIDmode:
1954 return 0;
1955 default:
1956 abort ();
1957 }
1958 }
1959
1960 /* Examine the argument and return set number of register required in each
1961 class. Return 0 iff parameter should be passed in memory. */
1962 static int
1963 examine_argument (mode, type, in_return, int_nregs, sse_nregs)
1964 enum machine_mode mode;
1965 tree type;
1966 int *int_nregs, *sse_nregs;
1967 int in_return;
1968 {
1969 enum x86_64_reg_class class[MAX_CLASSES];
1970 int n = classify_argument (mode, type, class, 0);
1971
1972 *int_nregs = 0;
1973 *sse_nregs = 0;
1974 if (!n)
1975 return 0;
1976 for (n--; n >= 0; n--)
1977 switch (class[n])
1978 {
1979 case X86_64_INTEGER_CLASS:
1980 case X86_64_INTEGERSI_CLASS:
1981 (*int_nregs)++;
1982 break;
1983 case X86_64_SSE_CLASS:
1984 case X86_64_SSESF_CLASS:
1985 case X86_64_SSEDF_CLASS:
1986 (*sse_nregs)++;
1987 break;
1988 case X86_64_NO_CLASS:
1989 case X86_64_SSEUP_CLASS:
1990 break;
1991 case X86_64_X87_CLASS:
1992 case X86_64_X87UP_CLASS:
1993 if (!in_return)
1994 return 0;
1995 break;
1996 case X86_64_MEMORY_CLASS:
1997 abort ();
1998 }
1999 return 1;
2000 }
2001 /* Construct container for the argument used by GCC interface. See
2002 FUNCTION_ARG for the detailed description. */
2003 static rtx
2004 construct_container (mode, type, in_return, nintregs, nsseregs, intreg, sse_regno)
2005 enum machine_mode mode;
2006 tree type;
2007 int in_return;
2008 int nintregs, nsseregs;
2009 const int * intreg;
2010 int sse_regno;
2011 {
2012 enum machine_mode tmpmode;
2013 int bytes =
2014 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2015 enum x86_64_reg_class class[MAX_CLASSES];
2016 int n;
2017 int i;
2018 int nexps = 0;
2019 int needed_sseregs, needed_intregs;
2020 rtx exp[MAX_CLASSES];
2021 rtx ret;
2022
2023 n = classify_argument (mode, type, class, 0);
2024 if (TARGET_DEBUG_ARG)
2025 {
2026 if (!n)
2027 fprintf (stderr, "Memory class\n");
2028 else
2029 {
2030 fprintf (stderr, "Classes:");
2031 for (i = 0; i < n; i++)
2032 {
2033 fprintf (stderr, " %s", x86_64_reg_class_name[class[i]]);
2034 }
2035 fprintf (stderr, "\n");
2036 }
2037 }
2038 if (!n)
2039 return NULL;
2040 if (!examine_argument (mode, type, in_return, &needed_intregs, &needed_sseregs))
2041 return NULL;
2042 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
2043 return NULL;
2044
2045 /* First construct simple cases. Avoid SCmode, since we want to use
2046 single register to pass this type. */
2047 if (n == 1 && mode != SCmode)
2048 switch (class[0])
2049 {
2050 case X86_64_INTEGER_CLASS:
2051 case X86_64_INTEGERSI_CLASS:
2052 return gen_rtx_REG (mode, intreg[0]);
2053 case X86_64_SSE_CLASS:
2054 case X86_64_SSESF_CLASS:
2055 case X86_64_SSEDF_CLASS:
2056 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
2057 case X86_64_X87_CLASS:
2058 return gen_rtx_REG (mode, FIRST_STACK_REG);
2059 case X86_64_NO_CLASS:
2060 /* Zero sized array, struct or class. */
2061 return NULL;
2062 default:
2063 abort ();
2064 }
2065 if (n == 2 && class[0] == X86_64_SSE_CLASS && class[1] == X86_64_SSEUP_CLASS)
2066 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
2067 if (n == 2
2068 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS)
2069 return gen_rtx_REG (TFmode, FIRST_STACK_REG);
2070 if (n == 2 && class[0] == X86_64_INTEGER_CLASS
2071 && class[1] == X86_64_INTEGER_CLASS
2072 && (mode == CDImode || mode == TImode)
2073 && intreg[0] + 1 == intreg[1])
2074 return gen_rtx_REG (mode, intreg[0]);
2075 if (n == 4
2076 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS
2077 && class[2] == X86_64_X87_CLASS && class[3] == X86_64_X87UP_CLASS)
2078 return gen_rtx_REG (TCmode, FIRST_STACK_REG);
2079
2080 /* Otherwise figure out the entries of the PARALLEL. */
2081 for (i = 0; i < n; i++)
2082 {
2083 switch (class[i])
2084 {
2085 case X86_64_NO_CLASS:
2086 break;
2087 case X86_64_INTEGER_CLASS:
2088 case X86_64_INTEGERSI_CLASS:
2089 /* Merge TImodes on aligned occassions here too. */
2090 if (i * 8 + 8 > bytes)
2091 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
2092 else if (class[i] == X86_64_INTEGERSI_CLASS)
2093 tmpmode = SImode;
2094 else
2095 tmpmode = DImode;
2096 /* We've requested 24 bytes we don't have mode for. Use DImode. */
2097 if (tmpmode == BLKmode)
2098 tmpmode = DImode;
2099 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2100 gen_rtx_REG (tmpmode, *intreg),
2101 GEN_INT (i*8));
2102 intreg++;
2103 break;
2104 case X86_64_SSESF_CLASS:
2105 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2106 gen_rtx_REG (SFmode,
2107 SSE_REGNO (sse_regno)),
2108 GEN_INT (i*8));
2109 sse_regno++;
2110 break;
2111 case X86_64_SSEDF_CLASS:
2112 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2113 gen_rtx_REG (DFmode,
2114 SSE_REGNO (sse_regno)),
2115 GEN_INT (i*8));
2116 sse_regno++;
2117 break;
2118 case X86_64_SSE_CLASS:
2119 if (i < n - 1 && class[i + 1] == X86_64_SSEUP_CLASS)
2120 tmpmode = TImode;
2121 else
2122 tmpmode = DImode;
2123 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2124 gen_rtx_REG (tmpmode,
2125 SSE_REGNO (sse_regno)),
2126 GEN_INT (i*8));
2127 if (tmpmode == TImode)
2128 i++;
2129 sse_regno++;
2130 break;
2131 default:
2132 abort ();
2133 }
2134 }
2135 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
2136 for (i = 0; i < nexps; i++)
2137 XVECEXP (ret, 0, i) = exp [i];
2138 return ret;
2139 }
2140
2141 /* Update the data in CUM to advance over an argument
2142 of mode MODE and data type TYPE.
2143 (TYPE is null for libcalls where that information may not be available.) */
2144
2145 void
2146 function_arg_advance (cum, mode, type, named)
2147 CUMULATIVE_ARGS *cum; /* current arg information */
2148 enum machine_mode mode; /* current arg mode */
2149 tree type; /* type of the argument or 0 if lib support */
2150 int named; /* whether or not the argument was named */
2151 {
2152 int bytes =
2153 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2154 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2155
2156 if (TARGET_DEBUG_ARG)
2157 fprintf (stderr,
2158 "function_adv (sz=%d, wds=%2d, nregs=%d, mode=%s, named=%d)\n\n",
2159 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
2160 if (TARGET_64BIT)
2161 {
2162 int int_nregs, sse_nregs;
2163 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs))
2164 cum->words += words;
2165 else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
2166 {
2167 cum->nregs -= int_nregs;
2168 cum->sse_nregs -= sse_nregs;
2169 cum->regno += int_nregs;
2170 cum->sse_regno += sse_nregs;
2171 }
2172 else
2173 cum->words += words;
2174 }
2175 else
2176 {
2177 if (TARGET_SSE && mode == TImode)
2178 {
2179 cum->sse_words += words;
2180 cum->sse_nregs -= 1;
2181 cum->sse_regno += 1;
2182 if (cum->sse_nregs <= 0)
2183 {
2184 cum->sse_nregs = 0;
2185 cum->sse_regno = 0;
2186 }
2187 }
2188 else
2189 {
2190 cum->words += words;
2191 cum->nregs -= words;
2192 cum->regno += words;
2193
2194 if (cum->nregs <= 0)
2195 {
2196 cum->nregs = 0;
2197 cum->regno = 0;
2198 }
2199 }
2200 }
2201 return;
2202 }
2203
2204 /* Define where to put the arguments to a function.
2205 Value is zero to push the argument on the stack,
2206 or a hard register in which to store the argument.
2207
2208 MODE is the argument's machine mode.
2209 TYPE is the data type of the argument (as a tree).
2210 This is null for libcalls where that information may
2211 not be available.
2212 CUM is a variable of type CUMULATIVE_ARGS which gives info about
2213 the preceding args and about the function being called.
2214 NAMED is nonzero if this argument is a named parameter
2215 (otherwise it is an extra parameter matching an ellipsis). */
2216
2217 rtx
2218 function_arg (cum, mode, type, named)
2219 CUMULATIVE_ARGS *cum; /* current arg information */
2220 enum machine_mode mode; /* current arg mode */
2221 tree type; /* type of the argument or 0 if lib support */
2222 int named; /* != 0 for normal args, == 0 for ... args */
2223 {
2224 rtx ret = NULL_RTX;
2225 int bytes =
2226 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2227 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2228
2229 /* Handle an hidden AL argument containing number of registers for varargs
2230 x86-64 functions. For i386 ABI just return constm1_rtx to avoid
2231 any AL settings. */
2232 if (mode == VOIDmode)
2233 {
2234 if (TARGET_64BIT)
2235 return GEN_INT (cum->maybe_vaarg
2236 ? (cum->sse_nregs < 0
2237 ? SSE_REGPARM_MAX
2238 : cum->sse_regno)
2239 : -1);
2240 else
2241 return constm1_rtx;
2242 }
2243 if (TARGET_64BIT)
2244 ret = construct_container (mode, type, 0, cum->nregs, cum->sse_nregs,
2245 &x86_64_int_parameter_registers [cum->regno],
2246 cum->sse_regno);
2247 else
2248 switch (mode)
2249 {
2250 /* For now, pass fp/complex values on the stack. */
2251 default:
2252 break;
2253
2254 case BLKmode:
2255 case DImode:
2256 case SImode:
2257 case HImode:
2258 case QImode:
2259 if (words <= cum->nregs)
2260 ret = gen_rtx_REG (mode, cum->regno);
2261 break;
2262 case TImode:
2263 if (cum->sse_nregs)
2264 ret = gen_rtx_REG (mode, cum->sse_regno);
2265 break;
2266 }
2267
2268 if (TARGET_DEBUG_ARG)
2269 {
2270 fprintf (stderr,
2271 "function_arg (size=%d, wds=%2d, nregs=%d, mode=%4s, named=%d, ",
2272 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
2273
2274 if (ret)
2275 print_simple_rtl (stderr, ret);
2276 else
2277 fprintf (stderr, ", stack");
2278
2279 fprintf (stderr, " )\n");
2280 }
2281
2282 return ret;
2283 }
2284
2285 /* Gives the alignment boundary, in bits, of an argument with the specified mode
2286 and type. */
2287
2288 int
2289 ix86_function_arg_boundary (mode, type)
2290 enum machine_mode mode;
2291 tree type;
2292 {
2293 int align;
2294 if (!TARGET_64BIT)
2295 return PARM_BOUNDARY;
2296 if (type)
2297 align = TYPE_ALIGN (type);
2298 else
2299 align = GET_MODE_ALIGNMENT (mode);
2300 if (align < PARM_BOUNDARY)
2301 align = PARM_BOUNDARY;
2302 if (align > 128)
2303 align = 128;
2304 return align;
2305 }
2306
2307 /* Return true if N is a possible register number of function value. */
2308 bool
2309 ix86_function_value_regno_p (regno)
2310 int regno;
2311 {
2312 if (!TARGET_64BIT)
2313 {
2314 return ((regno) == 0
2315 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387)
2316 || ((regno) == FIRST_SSE_REG && TARGET_SSE));
2317 }
2318 return ((regno) == 0 || (regno) == FIRST_FLOAT_REG
2319 || ((regno) == FIRST_SSE_REG && TARGET_SSE)
2320 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387));
2321 }
2322
2323 /* Define how to find the value returned by a function.
2324 VALTYPE is the data type of the value (as a tree).
2325 If the precise function being called is known, FUNC is its FUNCTION_DECL;
2326 otherwise, FUNC is 0. */
2327 rtx
2328 ix86_function_value (valtype)
2329 tree valtype;
2330 {
2331 if (TARGET_64BIT)
2332 {
2333 rtx ret = construct_container (TYPE_MODE (valtype), valtype, 1,
2334 REGPARM_MAX, SSE_REGPARM_MAX,
2335 x86_64_int_return_registers, 0);
2336 /* For zero sized structures, construct_continer return NULL, but we need
2337 to keep rest of compiler happy by returning meaningfull value. */
2338 if (!ret)
2339 ret = gen_rtx_REG (TYPE_MODE (valtype), 0);
2340 return ret;
2341 }
2342 else
2343 return gen_rtx_REG (TYPE_MODE (valtype),
2344 ix86_value_regno (TYPE_MODE (valtype)));
2345 }
2346
2347 /* Return false iff type is returned in memory. */
2348 int
2349 ix86_return_in_memory (type)
2350 tree type;
2351 {
2352 int needed_intregs, needed_sseregs;
2353 if (TARGET_64BIT)
2354 {
2355 return !examine_argument (TYPE_MODE (type), type, 1,
2356 &needed_intregs, &needed_sseregs);
2357 }
2358 else
2359 {
2360 if (TYPE_MODE (type) == BLKmode
2361 || (VECTOR_MODE_P (TYPE_MODE (type))
2362 && int_size_in_bytes (type) == 8)
2363 || (int_size_in_bytes (type) > 12 && TYPE_MODE (type) != TImode
2364 && TYPE_MODE (type) != TFmode
2365 && !VECTOR_MODE_P (TYPE_MODE (type))))
2366 return 1;
2367 return 0;
2368 }
2369 }
2370
2371 /* Define how to find the value returned by a library function
2372 assuming the value has mode MODE. */
2373 rtx
2374 ix86_libcall_value (mode)
2375 enum machine_mode mode;
2376 {
2377 if (TARGET_64BIT)
2378 {
2379 switch (mode)
2380 {
2381 case SFmode:
2382 case SCmode:
2383 case DFmode:
2384 case DCmode:
2385 return gen_rtx_REG (mode, FIRST_SSE_REG);
2386 case TFmode:
2387 case TCmode:
2388 return gen_rtx_REG (mode, FIRST_FLOAT_REG);
2389 default:
2390 return gen_rtx_REG (mode, 0);
2391 }
2392 }
2393 else
2394 return gen_rtx_REG (mode, ix86_value_regno (mode));
2395 }
2396
2397 /* Given a mode, return the register to use for a return value. */
2398
2399 static int
2400 ix86_value_regno (mode)
2401 enum machine_mode mode;
2402 {
2403 if (GET_MODE_CLASS (mode) == MODE_FLOAT && TARGET_FLOAT_RETURNS_IN_80387)
2404 return FIRST_FLOAT_REG;
2405 if (mode == TImode || VECTOR_MODE_P (mode))
2406 return FIRST_SSE_REG;
2407 return 0;
2408 }
2409 \f
2410 /* Create the va_list data type. */
2411
2412 tree
2413 ix86_build_va_list ()
2414 {
2415 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
2416
2417 /* For i386 we use plain pointer to argument area. */
2418 if (!TARGET_64BIT)
2419 return build_pointer_type (char_type_node);
2420
2421 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
2422 type_decl = build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
2423
2424 f_gpr = build_decl (FIELD_DECL, get_identifier ("gp_offset"),
2425 unsigned_type_node);
2426 f_fpr = build_decl (FIELD_DECL, get_identifier ("fp_offset"),
2427 unsigned_type_node);
2428 f_ovf = build_decl (FIELD_DECL, get_identifier ("overflow_arg_area"),
2429 ptr_type_node);
2430 f_sav = build_decl (FIELD_DECL, get_identifier ("reg_save_area"),
2431 ptr_type_node);
2432
2433 DECL_FIELD_CONTEXT (f_gpr) = record;
2434 DECL_FIELD_CONTEXT (f_fpr) = record;
2435 DECL_FIELD_CONTEXT (f_ovf) = record;
2436 DECL_FIELD_CONTEXT (f_sav) = record;
2437
2438 TREE_CHAIN (record) = type_decl;
2439 TYPE_NAME (record) = type_decl;
2440 TYPE_FIELDS (record) = f_gpr;
2441 TREE_CHAIN (f_gpr) = f_fpr;
2442 TREE_CHAIN (f_fpr) = f_ovf;
2443 TREE_CHAIN (f_ovf) = f_sav;
2444
2445 layout_type (record);
2446
2447 /* The correct type is an array type of one element. */
2448 return build_array_type (record, build_index_type (size_zero_node));
2449 }
2450
2451 /* Perform any needed actions needed for a function that is receiving a
2452 variable number of arguments.
2453
2454 CUM is as above.
2455
2456 MODE and TYPE are the mode and type of the current parameter.
2457
2458 PRETEND_SIZE is a variable that should be set to the amount of stack
2459 that must be pushed by the prolog to pretend that our caller pushed
2460 it.
2461
2462 Normally, this macro will push all remaining incoming registers on the
2463 stack and set PRETEND_SIZE to the length of the registers pushed. */
2464
2465 void
2466 ix86_setup_incoming_varargs (cum, mode, type, pretend_size, no_rtl)
2467 CUMULATIVE_ARGS *cum;
2468 enum machine_mode mode;
2469 tree type;
2470 int *pretend_size ATTRIBUTE_UNUSED;
2471 int no_rtl;
2472
2473 {
2474 CUMULATIVE_ARGS next_cum;
2475 rtx save_area = NULL_RTX, mem;
2476 rtx label;
2477 rtx label_ref;
2478 rtx tmp_reg;
2479 rtx nsse_reg;
2480 int set;
2481 tree fntype;
2482 int stdarg_p;
2483 int i;
2484
2485 if (!TARGET_64BIT)
2486 return;
2487
2488 /* Indicate to allocate space on the stack for varargs save area. */
2489 ix86_save_varrargs_registers = 1;
2490
2491 fntype = TREE_TYPE (current_function_decl);
2492 stdarg_p = (TYPE_ARG_TYPES (fntype) != 0
2493 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype)))
2494 != void_type_node));
2495
2496 /* For varargs, we do not want to skip the dummy va_dcl argument.
2497 For stdargs, we do want to skip the last named argument. */
2498 next_cum = *cum;
2499 if (stdarg_p)
2500 function_arg_advance (&next_cum, mode, type, 1);
2501
2502 if (!no_rtl)
2503 save_area = frame_pointer_rtx;
2504
2505 set = get_varargs_alias_set ();
2506
2507 for (i = next_cum.regno; i < ix86_regparm; i++)
2508 {
2509 mem = gen_rtx_MEM (Pmode,
2510 plus_constant (save_area, i * UNITS_PER_WORD));
2511 set_mem_alias_set (mem, set);
2512 emit_move_insn (mem, gen_rtx_REG (Pmode,
2513 x86_64_int_parameter_registers[i]));
2514 }
2515
2516 if (next_cum.sse_nregs)
2517 {
2518 /* Now emit code to save SSE registers. The AX parameter contains number
2519 of SSE parameter regsiters used to call this function. We use
2520 sse_prologue_save insn template that produces computed jump across
2521 SSE saves. We need some preparation work to get this working. */
2522
2523 label = gen_label_rtx ();
2524 label_ref = gen_rtx_LABEL_REF (Pmode, label);
2525
2526 /* Compute address to jump to :
2527 label - 5*eax + nnamed_sse_arguments*5 */
2528 tmp_reg = gen_reg_rtx (Pmode);
2529 nsse_reg = gen_reg_rtx (Pmode);
2530 emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, 0)));
2531 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
2532 gen_rtx_MULT (Pmode, nsse_reg,
2533 GEN_INT (4))));
2534 if (next_cum.sse_regno)
2535 emit_move_insn
2536 (nsse_reg,
2537 gen_rtx_CONST (DImode,
2538 gen_rtx_PLUS (DImode,
2539 label_ref,
2540 GEN_INT (next_cum.sse_regno * 4))));
2541 else
2542 emit_move_insn (nsse_reg, label_ref);
2543 emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg));
2544
2545 /* Compute address of memory block we save into. We always use pointer
2546 pointing 127 bytes after first byte to store - this is needed to keep
2547 instruction size limited by 4 bytes. */
2548 tmp_reg = gen_reg_rtx (Pmode);
2549 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
2550 plus_constant (save_area,
2551 8 * REGPARM_MAX + 127)));
2552 mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127));
2553 set_mem_alias_set (mem, set);
2554 set_mem_align (mem, BITS_PER_WORD);
2555
2556 /* And finally do the dirty job! */
2557 emit_insn (gen_sse_prologue_save (mem, nsse_reg,
2558 GEN_INT (next_cum.sse_regno), label));
2559 }
2560
2561 }
2562
2563 /* Implement va_start. */
2564
2565 void
2566 ix86_va_start (valist, nextarg)
2567 tree valist;
2568 rtx nextarg;
2569 {
2570 HOST_WIDE_INT words, n_gpr, n_fpr;
2571 tree f_gpr, f_fpr, f_ovf, f_sav;
2572 tree gpr, fpr, ovf, sav, t;
2573
2574 /* Only 64bit target needs something special. */
2575 if (!TARGET_64BIT)
2576 {
2577 std_expand_builtin_va_start (valist, nextarg);
2578 return;
2579 }
2580
2581 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
2582 f_fpr = TREE_CHAIN (f_gpr);
2583 f_ovf = TREE_CHAIN (f_fpr);
2584 f_sav = TREE_CHAIN (f_ovf);
2585
2586 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
2587 gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr);
2588 fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr);
2589 ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf);
2590 sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav);
2591
2592 /* Count number of gp and fp argument registers used. */
2593 words = current_function_args_info.words;
2594 n_gpr = current_function_args_info.regno;
2595 n_fpr = current_function_args_info.sse_regno;
2596
2597 if (TARGET_DEBUG_ARG)
2598 fprintf (stderr, "va_start: words = %d, n_gpr = %d, n_fpr = %d\n",
2599 (int) words, (int) n_gpr, (int) n_fpr);
2600
2601 t = build (MODIFY_EXPR, TREE_TYPE (gpr), gpr,
2602 build_int_2 (n_gpr * 8, 0));
2603 TREE_SIDE_EFFECTS (t) = 1;
2604 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2605
2606 t = build (MODIFY_EXPR, TREE_TYPE (fpr), fpr,
2607 build_int_2 (n_fpr * 16 + 8*REGPARM_MAX, 0));
2608 TREE_SIDE_EFFECTS (t) = 1;
2609 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2610
2611 /* Find the overflow area. */
2612 t = make_tree (TREE_TYPE (ovf), virtual_incoming_args_rtx);
2613 if (words != 0)
2614 t = build (PLUS_EXPR, TREE_TYPE (ovf), t,
2615 build_int_2 (words * UNITS_PER_WORD, 0));
2616 t = build (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
2617 TREE_SIDE_EFFECTS (t) = 1;
2618 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2619
2620 /* Find the register save area.
2621 Prologue of the function save it right above stack frame. */
2622 t = make_tree (TREE_TYPE (sav), frame_pointer_rtx);
2623 t = build (MODIFY_EXPR, TREE_TYPE (sav), sav, t);
2624 TREE_SIDE_EFFECTS (t) = 1;
2625 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2626 }
2627
2628 /* Implement va_arg. */
2629 rtx
2630 ix86_va_arg (valist, type)
2631 tree valist, type;
2632 {
2633 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
2634 tree f_gpr, f_fpr, f_ovf, f_sav;
2635 tree gpr, fpr, ovf, sav, t;
2636 int size, rsize;
2637 rtx lab_false, lab_over = NULL_RTX;
2638 rtx addr_rtx, r;
2639 rtx container;
2640
2641 /* Only 64bit target needs something special. */
2642 if (!TARGET_64BIT)
2643 {
2644 return std_expand_builtin_va_arg (valist, type);
2645 }
2646
2647 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
2648 f_fpr = TREE_CHAIN (f_gpr);
2649 f_ovf = TREE_CHAIN (f_fpr);
2650 f_sav = TREE_CHAIN (f_ovf);
2651
2652 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
2653 gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr);
2654 fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr);
2655 ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf);
2656 sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav);
2657
2658 size = int_size_in_bytes (type);
2659 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2660
2661 container = construct_container (TYPE_MODE (type), type, 0,
2662 REGPARM_MAX, SSE_REGPARM_MAX, intreg, 0);
2663 /*
2664 * Pull the value out of the saved registers ...
2665 */
2666
2667 addr_rtx = gen_reg_rtx (Pmode);
2668
2669 if (container)
2670 {
2671 rtx int_addr_rtx, sse_addr_rtx;
2672 int needed_intregs, needed_sseregs;
2673 int need_temp;
2674
2675 lab_over = gen_label_rtx ();
2676 lab_false = gen_label_rtx ();
2677
2678 examine_argument (TYPE_MODE (type), type, 0,
2679 &needed_intregs, &needed_sseregs);
2680
2681
2682 need_temp = ((needed_intregs && TYPE_ALIGN (type) > 64)
2683 || TYPE_ALIGN (type) > 128);
2684
2685 /* In case we are passing structure, verify that it is consetuctive block
2686 on the register save area. If not we need to do moves. */
2687 if (!need_temp && !REG_P (container))
2688 {
2689 /* Verify that all registers are strictly consetuctive */
2690 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
2691 {
2692 int i;
2693
2694 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
2695 {
2696 rtx slot = XVECEXP (container, 0, i);
2697 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
2698 || INTVAL (XEXP (slot, 1)) != i * 16)
2699 need_temp = 1;
2700 }
2701 }
2702 else
2703 {
2704 int i;
2705
2706 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
2707 {
2708 rtx slot = XVECEXP (container, 0, i);
2709 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
2710 || INTVAL (XEXP (slot, 1)) != i * 8)
2711 need_temp = 1;
2712 }
2713 }
2714 }
2715 if (!need_temp)
2716 {
2717 int_addr_rtx = addr_rtx;
2718 sse_addr_rtx = addr_rtx;
2719 }
2720 else
2721 {
2722 int_addr_rtx = gen_reg_rtx (Pmode);
2723 sse_addr_rtx = gen_reg_rtx (Pmode);
2724 }
2725 /* First ensure that we fit completely in registers. */
2726 if (needed_intregs)
2727 {
2728 emit_cmp_and_jump_insns (expand_expr
2729 (gpr, NULL_RTX, SImode, EXPAND_NORMAL),
2730 GEN_INT ((REGPARM_MAX - needed_intregs +
2731 1) * 8), GE, const1_rtx, SImode,
2732 1, lab_false);
2733 }
2734 if (needed_sseregs)
2735 {
2736 emit_cmp_and_jump_insns (expand_expr
2737 (fpr, NULL_RTX, SImode, EXPAND_NORMAL),
2738 GEN_INT ((SSE_REGPARM_MAX -
2739 needed_sseregs + 1) * 16 +
2740 REGPARM_MAX * 8), GE, const1_rtx,
2741 SImode, 1, lab_false);
2742 }
2743
2744 /* Compute index to start of area used for integer regs. */
2745 if (needed_intregs)
2746 {
2747 t = build (PLUS_EXPR, ptr_type_node, sav, gpr);
2748 r = expand_expr (t, int_addr_rtx, Pmode, EXPAND_NORMAL);
2749 if (r != int_addr_rtx)
2750 emit_move_insn (int_addr_rtx, r);
2751 }
2752 if (needed_sseregs)
2753 {
2754 t = build (PLUS_EXPR, ptr_type_node, sav, fpr);
2755 r = expand_expr (t, sse_addr_rtx, Pmode, EXPAND_NORMAL);
2756 if (r != sse_addr_rtx)
2757 emit_move_insn (sse_addr_rtx, r);
2758 }
2759 if (need_temp)
2760 {
2761 int i;
2762 rtx mem;
2763
2764 /* Never use the memory itself, as it has the alias set. */
2765 addr_rtx = XEXP (assign_temp (type, 0, 1, 0), 0);
2766 mem = gen_rtx_MEM (BLKmode, addr_rtx);
2767 set_mem_alias_set (mem, get_varargs_alias_set ());
2768 set_mem_align (mem, BITS_PER_UNIT);
2769
2770 for (i = 0; i < XVECLEN (container, 0); i++)
2771 {
2772 rtx slot = XVECEXP (container, 0, i);
2773 rtx reg = XEXP (slot, 0);
2774 enum machine_mode mode = GET_MODE (reg);
2775 rtx src_addr;
2776 rtx src_mem;
2777 int src_offset;
2778 rtx dest_mem;
2779
2780 if (SSE_REGNO_P (REGNO (reg)))
2781 {
2782 src_addr = sse_addr_rtx;
2783 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
2784 }
2785 else
2786 {
2787 src_addr = int_addr_rtx;
2788 src_offset = REGNO (reg) * 8;
2789 }
2790 src_mem = gen_rtx_MEM (mode, src_addr);
2791 set_mem_alias_set (src_mem, get_varargs_alias_set ());
2792 src_mem = adjust_address (src_mem, mode, src_offset);
2793 dest_mem = adjust_address (mem, mode, INTVAL (XEXP (slot, 1)));
2794 emit_move_insn (dest_mem, src_mem);
2795 }
2796 }
2797
2798 if (needed_intregs)
2799 {
2800 t =
2801 build (PLUS_EXPR, TREE_TYPE (gpr), gpr,
2802 build_int_2 (needed_intregs * 8, 0));
2803 t = build (MODIFY_EXPR, TREE_TYPE (gpr), gpr, t);
2804 TREE_SIDE_EFFECTS (t) = 1;
2805 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2806 }
2807 if (needed_sseregs)
2808 {
2809 t =
2810 build (PLUS_EXPR, TREE_TYPE (fpr), fpr,
2811 build_int_2 (needed_sseregs * 16, 0));
2812 t = build (MODIFY_EXPR, TREE_TYPE (fpr), fpr, t);
2813 TREE_SIDE_EFFECTS (t) = 1;
2814 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2815 }
2816
2817 emit_jump_insn (gen_jump (lab_over));
2818 emit_barrier ();
2819 emit_label (lab_false);
2820 }
2821
2822 /* ... otherwise out of the overflow area. */
2823
2824 /* Care for on-stack alignment if needed. */
2825 if (FUNCTION_ARG_BOUNDARY (VOIDmode, type) <= 64)
2826 t = ovf;
2827 else
2828 {
2829 HOST_WIDE_INT align = FUNCTION_ARG_BOUNDARY (VOIDmode, type) / 8;
2830 t = build (PLUS_EXPR, TREE_TYPE (ovf), ovf, build_int_2 (align - 1, 0));
2831 t = build (BIT_AND_EXPR, TREE_TYPE (t), t, build_int_2 (-align, -1));
2832 }
2833 t = save_expr (t);
2834
2835 r = expand_expr (t, addr_rtx, Pmode, EXPAND_NORMAL);
2836 if (r != addr_rtx)
2837 emit_move_insn (addr_rtx, r);
2838
2839 t =
2840 build (PLUS_EXPR, TREE_TYPE (t), t,
2841 build_int_2 (rsize * UNITS_PER_WORD, 0));
2842 t = build (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
2843 TREE_SIDE_EFFECTS (t) = 1;
2844 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2845
2846 if (container)
2847 emit_label (lab_over);
2848
2849 return addr_rtx;
2850 }
2851 \f
2852 /* Return nonzero if OP is either a i387 or SSE fp register. */
2853 int
2854 any_fp_register_operand (op, mode)
2855 rtx op;
2856 enum machine_mode mode ATTRIBUTE_UNUSED;
2857 {
2858 return ANY_FP_REG_P (op);
2859 }
2860
2861 /* Return nonzero if OP is an i387 fp register. */
2862 int
2863 fp_register_operand (op, mode)
2864 rtx op;
2865 enum machine_mode mode ATTRIBUTE_UNUSED;
2866 {
2867 return FP_REG_P (op);
2868 }
2869
2870 /* Return nonzero if OP is a non-fp register_operand. */
2871 int
2872 register_and_not_any_fp_reg_operand (op, mode)
2873 rtx op;
2874 enum machine_mode mode;
2875 {
2876 return register_operand (op, mode) && !ANY_FP_REG_P (op);
2877 }
2878
2879 /* Return nonzero of OP is a register operand other than an
2880 i387 fp register. */
2881 int
2882 register_and_not_fp_reg_operand (op, mode)
2883 rtx op;
2884 enum machine_mode mode;
2885 {
2886 return register_operand (op, mode) && !FP_REG_P (op);
2887 }
2888
2889 /* Return nonzero if OP is general operand representable on x86_64. */
2890
2891 int
2892 x86_64_general_operand (op, mode)
2893 rtx op;
2894 enum machine_mode mode;
2895 {
2896 if (!TARGET_64BIT)
2897 return general_operand (op, mode);
2898 if (nonimmediate_operand (op, mode))
2899 return 1;
2900 return x86_64_sign_extended_value (op);
2901 }
2902
2903 /* Return nonzero if OP is general operand representable on x86_64
2904 as either sign extended or zero extended constant. */
2905
2906 int
2907 x86_64_szext_general_operand (op, mode)
2908 rtx op;
2909 enum machine_mode mode;
2910 {
2911 if (!TARGET_64BIT)
2912 return general_operand (op, mode);
2913 if (nonimmediate_operand (op, mode))
2914 return 1;
2915 return x86_64_sign_extended_value (op) || x86_64_zero_extended_value (op);
2916 }
2917
2918 /* Return nonzero if OP is nonmemory operand representable on x86_64. */
2919
2920 int
2921 x86_64_nonmemory_operand (op, mode)
2922 rtx op;
2923 enum machine_mode mode;
2924 {
2925 if (!TARGET_64BIT)
2926 return nonmemory_operand (op, mode);
2927 if (register_operand (op, mode))
2928 return 1;
2929 return x86_64_sign_extended_value (op);
2930 }
2931
2932 /* Return nonzero if OP is nonmemory operand acceptable by movabs patterns. */
2933
2934 int
2935 x86_64_movabs_operand (op, mode)
2936 rtx op;
2937 enum machine_mode mode;
2938 {
2939 if (!TARGET_64BIT || !flag_pic)
2940 return nonmemory_operand (op, mode);
2941 if (register_operand (op, mode) || x86_64_sign_extended_value (op))
2942 return 1;
2943 if (CONSTANT_P (op) && !symbolic_reference_mentioned_p (op))
2944 return 1;
2945 return 0;
2946 }
2947
2948 /* Return nonzero if OP is nonmemory operand representable on x86_64. */
2949
2950 int
2951 x86_64_szext_nonmemory_operand (op, mode)
2952 rtx op;
2953 enum machine_mode mode;
2954 {
2955 if (!TARGET_64BIT)
2956 return nonmemory_operand (op, mode);
2957 if (register_operand (op, mode))
2958 return 1;
2959 return x86_64_sign_extended_value (op) || x86_64_zero_extended_value (op);
2960 }
2961
2962 /* Return nonzero if OP is immediate operand representable on x86_64. */
2963
2964 int
2965 x86_64_immediate_operand (op, mode)
2966 rtx op;
2967 enum machine_mode mode;
2968 {
2969 if (!TARGET_64BIT)
2970 return immediate_operand (op, mode);
2971 return x86_64_sign_extended_value (op);
2972 }
2973
2974 /* Return nonzero if OP is immediate operand representable on x86_64. */
2975
2976 int
2977 x86_64_zext_immediate_operand (op, mode)
2978 rtx op;
2979 enum machine_mode mode ATTRIBUTE_UNUSED;
2980 {
2981 return x86_64_zero_extended_value (op);
2982 }
2983
2984 /* Return nonzero if OP is (const_int 1), else return zero. */
2985
2986 int
2987 const_int_1_operand (op, mode)
2988 rtx op;
2989 enum machine_mode mode ATTRIBUTE_UNUSED;
2990 {
2991 return (GET_CODE (op) == CONST_INT && INTVAL (op) == 1);
2992 }
2993
2994 /* Return nonzero if OP is CONST_INT >= 1 and <= 31 (a valid operand
2995 for shift & compare patterns, as shifting by 0 does not change flags),
2996 else return zero. */
2997
2998 int
2999 const_int_1_31_operand (op, mode)
3000 rtx op;
3001 enum machine_mode mode ATTRIBUTE_UNUSED;
3002 {
3003 return (GET_CODE (op) == CONST_INT && INTVAL (op) >= 1 && INTVAL (op) <= 31);
3004 }
3005
3006 /* Returns 1 if OP is either a symbol reference or a sum of a symbol
3007 reference and a constant. */
3008
3009 int
3010 symbolic_operand (op, mode)
3011 register rtx op;
3012 enum machine_mode mode ATTRIBUTE_UNUSED;
3013 {
3014 switch (GET_CODE (op))
3015 {
3016 case SYMBOL_REF:
3017 case LABEL_REF:
3018 return 1;
3019
3020 case CONST:
3021 op = XEXP (op, 0);
3022 if (GET_CODE (op) == SYMBOL_REF
3023 || GET_CODE (op) == LABEL_REF
3024 || (GET_CODE (op) == UNSPEC
3025 && (XINT (op, 1) == UNSPEC_GOT
3026 || XINT (op, 1) == UNSPEC_GOTOFF
3027 || XINT (op, 1) == UNSPEC_GOTPCREL)))
3028 return 1;
3029 if (GET_CODE (op) != PLUS
3030 || GET_CODE (XEXP (op, 1)) != CONST_INT)
3031 return 0;
3032
3033 op = XEXP (op, 0);
3034 if (GET_CODE (op) == SYMBOL_REF
3035 || GET_CODE (op) == LABEL_REF)
3036 return 1;
3037 /* Only @GOTOFF gets offsets. */
3038 if (GET_CODE (op) != UNSPEC
3039 || XINT (op, 1) != UNSPEC_GOTOFF)
3040 return 0;
3041
3042 op = XVECEXP (op, 0, 0);
3043 if (GET_CODE (op) == SYMBOL_REF
3044 || GET_CODE (op) == LABEL_REF)
3045 return 1;
3046 return 0;
3047
3048 default:
3049 return 0;
3050 }
3051 }
3052
3053 /* Return true if the operand contains a @GOT or @GOTOFF reference. */
3054
3055 int
3056 pic_symbolic_operand (op, mode)
3057 register rtx op;
3058 enum machine_mode mode ATTRIBUTE_UNUSED;
3059 {
3060 if (GET_CODE (op) != CONST)
3061 return 0;
3062 op = XEXP (op, 0);
3063 if (TARGET_64BIT)
3064 {
3065 if (GET_CODE (XEXP (op, 0)) == UNSPEC)
3066 return 1;
3067 }
3068 else
3069 {
3070 if (GET_CODE (op) == UNSPEC)
3071 return 1;
3072 if (GET_CODE (op) != PLUS
3073 || GET_CODE (XEXP (op, 1)) != CONST_INT)
3074 return 0;
3075 op = XEXP (op, 0);
3076 if (GET_CODE (op) == UNSPEC)
3077 return 1;
3078 }
3079 return 0;
3080 }
3081
3082 /* Return true if OP is a symbolic operand that resolves locally. */
3083
3084 static int
3085 local_symbolic_operand (op, mode)
3086 rtx op;
3087 enum machine_mode mode ATTRIBUTE_UNUSED;
3088 {
3089 if (GET_CODE (op) == CONST
3090 && GET_CODE (XEXP (op, 0)) == PLUS
3091 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT)
3092 op = XEXP (XEXP (op, 0), 0);
3093
3094 if (GET_CODE (op) == LABEL_REF)
3095 return 1;
3096
3097 if (GET_CODE (op) != SYMBOL_REF)
3098 return 0;
3099
3100 /* These we've been told are local by varasm and encode_section_info
3101 respectively. */
3102 if (CONSTANT_POOL_ADDRESS_P (op) || SYMBOL_REF_FLAG (op))
3103 return 1;
3104
3105 /* There is, however, a not insubstantial body of code in the rest of
3106 the compiler that assumes it can just stick the results of
3107 ASM_GENERATE_INTERNAL_LABEL in a symbol_ref and have done. */
3108 /* ??? This is a hack. Should update the body of the compiler to
3109 always create a DECL an invoke targetm.encode_section_info. */
3110 if (strncmp (XSTR (op, 0), internal_label_prefix,
3111 internal_label_prefix_len) == 0)
3112 return 1;
3113
3114 return 0;
3115 }
3116
3117 /* Test for various thread-local symbols. See ix86_encode_section_info. */
3118
3119 int
3120 tls_symbolic_operand (op, mode)
3121 register rtx op;
3122 enum machine_mode mode ATTRIBUTE_UNUSED;
3123 {
3124 const char *symbol_str;
3125
3126 if (GET_CODE (op) != SYMBOL_REF)
3127 return 0;
3128 symbol_str = XSTR (op, 0);
3129
3130 if (symbol_str[0] != '%')
3131 return 0;
3132 return strchr (tls_model_chars, symbol_str[1]) - tls_model_chars;
3133 }
3134
3135 static int
3136 tls_symbolic_operand_1 (op, kind)
3137 rtx op;
3138 enum tls_model kind;
3139 {
3140 const char *symbol_str;
3141
3142 if (GET_CODE (op) != SYMBOL_REF)
3143 return 0;
3144 symbol_str = XSTR (op, 0);
3145
3146 return symbol_str[0] == '%' && symbol_str[1] == tls_model_chars[kind];
3147 }
3148
3149 int
3150 global_dynamic_symbolic_operand (op, mode)
3151 register rtx op;
3152 enum machine_mode mode ATTRIBUTE_UNUSED;
3153 {
3154 return tls_symbolic_operand_1 (op, TLS_MODEL_GLOBAL_DYNAMIC);
3155 }
3156
3157 int
3158 local_dynamic_symbolic_operand (op, mode)
3159 register rtx op;
3160 enum machine_mode mode ATTRIBUTE_UNUSED;
3161 {
3162 return tls_symbolic_operand_1 (op, TLS_MODEL_LOCAL_DYNAMIC);
3163 }
3164
3165 int
3166 initial_exec_symbolic_operand (op, mode)
3167 register rtx op;
3168 enum machine_mode mode ATTRIBUTE_UNUSED;
3169 {
3170 return tls_symbolic_operand_1 (op, TLS_MODEL_INITIAL_EXEC);
3171 }
3172
3173 int
3174 local_exec_symbolic_operand (op, mode)
3175 register rtx op;
3176 enum machine_mode mode ATTRIBUTE_UNUSED;
3177 {
3178 return tls_symbolic_operand_1 (op, TLS_MODEL_LOCAL_EXEC);
3179 }
3180
3181 /* Test for a valid operand for a call instruction. Don't allow the
3182 arg pointer register or virtual regs since they may decay into
3183 reg + const, which the patterns can't handle. */
3184
3185 int
3186 call_insn_operand (op, mode)
3187 rtx op;
3188 enum machine_mode mode ATTRIBUTE_UNUSED;
3189 {
3190 /* Disallow indirect through a virtual register. This leads to
3191 compiler aborts when trying to eliminate them. */
3192 if (GET_CODE (op) == REG
3193 && (op == arg_pointer_rtx
3194 || op == frame_pointer_rtx
3195 || (REGNO (op) >= FIRST_PSEUDO_REGISTER
3196 && REGNO (op) <= LAST_VIRTUAL_REGISTER)))
3197 return 0;
3198
3199 /* Disallow `call 1234'. Due to varying assembler lameness this
3200 gets either rejected or translated to `call .+1234'. */
3201 if (GET_CODE (op) == CONST_INT)
3202 return 0;
3203
3204 /* Explicitly allow SYMBOL_REF even if pic. */
3205 if (GET_CODE (op) == SYMBOL_REF)
3206 return 1;
3207
3208 /* Otherwise we can allow any general_operand in the address. */
3209 return general_operand (op, Pmode);
3210 }
3211
3212 int
3213 constant_call_address_operand (op, mode)
3214 rtx op;
3215 enum machine_mode mode ATTRIBUTE_UNUSED;
3216 {
3217 if (GET_CODE (op) == CONST
3218 && GET_CODE (XEXP (op, 0)) == PLUS
3219 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT)
3220 op = XEXP (XEXP (op, 0), 0);
3221 return GET_CODE (op) == SYMBOL_REF;
3222 }
3223
3224 /* Match exactly zero and one. */
3225
3226 int
3227 const0_operand (op, mode)
3228 register rtx op;
3229 enum machine_mode mode;
3230 {
3231 return op == CONST0_RTX (mode);
3232 }
3233
3234 int
3235 const1_operand (op, mode)
3236 register rtx op;
3237 enum machine_mode mode ATTRIBUTE_UNUSED;
3238 {
3239 return op == const1_rtx;
3240 }
3241
3242 /* Match 2, 4, or 8. Used for leal multiplicands. */
3243
3244 int
3245 const248_operand (op, mode)
3246 register rtx op;
3247 enum machine_mode mode ATTRIBUTE_UNUSED;
3248 {
3249 return (GET_CODE (op) == CONST_INT
3250 && (INTVAL (op) == 2 || INTVAL (op) == 4 || INTVAL (op) == 8));
3251 }
3252
3253 /* True if this is a constant appropriate for an increment or decremenmt. */
3254
3255 int
3256 incdec_operand (op, mode)
3257 register rtx op;
3258 enum machine_mode mode ATTRIBUTE_UNUSED;
3259 {
3260 /* On Pentium4, the inc and dec operations causes extra dependency on flag
3261 registers, since carry flag is not set. */
3262 if (TARGET_PENTIUM4 && !optimize_size)
3263 return 0;
3264 return op == const1_rtx || op == constm1_rtx;
3265 }
3266
3267 /* Return nonzero if OP is acceptable as operand of DImode shift
3268 expander. */
3269
3270 int
3271 shiftdi_operand (op, mode)
3272 rtx op;
3273 enum machine_mode mode ATTRIBUTE_UNUSED;
3274 {
3275 if (TARGET_64BIT)
3276 return nonimmediate_operand (op, mode);
3277 else
3278 return register_operand (op, mode);
3279 }
3280
3281 /* Return false if this is the stack pointer, or any other fake
3282 register eliminable to the stack pointer. Otherwise, this is
3283 a register operand.
3284
3285 This is used to prevent esp from being used as an index reg.
3286 Which would only happen in pathological cases. */
3287
3288 int
3289 reg_no_sp_operand (op, mode)
3290 register rtx op;
3291 enum machine_mode mode;
3292 {
3293 rtx t = op;
3294 if (GET_CODE (t) == SUBREG)
3295 t = SUBREG_REG (t);
3296 if (t == stack_pointer_rtx || t == arg_pointer_rtx || t == frame_pointer_rtx)
3297 return 0;
3298
3299 return register_operand (op, mode);
3300 }
3301
3302 int
3303 mmx_reg_operand (op, mode)
3304 register rtx op;
3305 enum machine_mode mode ATTRIBUTE_UNUSED;
3306 {
3307 return MMX_REG_P (op);
3308 }
3309
3310 /* Return false if this is any eliminable register. Otherwise
3311 general_operand. */
3312
3313 int
3314 general_no_elim_operand (op, mode)
3315 register rtx op;
3316 enum machine_mode mode;
3317 {
3318 rtx t = op;
3319 if (GET_CODE (t) == SUBREG)
3320 t = SUBREG_REG (t);
3321 if (t == arg_pointer_rtx || t == frame_pointer_rtx
3322 || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx
3323 || t == virtual_stack_dynamic_rtx)
3324 return 0;
3325 if (REG_P (t)
3326 && REGNO (t) >= FIRST_VIRTUAL_REGISTER
3327 && REGNO (t) <= LAST_VIRTUAL_REGISTER)
3328 return 0;
3329
3330 return general_operand (op, mode);
3331 }
3332
3333 /* Return false if this is any eliminable register. Otherwise
3334 register_operand or const_int. */
3335
3336 int
3337 nonmemory_no_elim_operand (op, mode)
3338 register rtx op;
3339 enum machine_mode mode;
3340 {
3341 rtx t = op;
3342 if (GET_CODE (t) == SUBREG)
3343 t = SUBREG_REG (t);
3344 if (t == arg_pointer_rtx || t == frame_pointer_rtx
3345 || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx
3346 || t == virtual_stack_dynamic_rtx)
3347 return 0;
3348
3349 return GET_CODE (op) == CONST_INT || register_operand (op, mode);
3350 }
3351
3352 /* Return false if this is any eliminable register or stack register,
3353 otherwise work like register_operand. */
3354
3355 int
3356 index_register_operand (op, mode)
3357 register rtx op;
3358 enum machine_mode mode;
3359 {
3360 rtx t = op;
3361 if (GET_CODE (t) == SUBREG)
3362 t = SUBREG_REG (t);
3363 if (!REG_P (t))
3364 return 0;
3365 if (t == arg_pointer_rtx
3366 || t == frame_pointer_rtx
3367 || t == virtual_incoming_args_rtx
3368 || t == virtual_stack_vars_rtx
3369 || t == virtual_stack_dynamic_rtx
3370 || REGNO (t) == STACK_POINTER_REGNUM)
3371 return 0;
3372
3373 return general_operand (op, mode);
3374 }
3375
3376 /* Return true if op is a Q_REGS class register. */
3377
3378 int
3379 q_regs_operand (op, mode)
3380 register rtx op;
3381 enum machine_mode mode;
3382 {
3383 if (mode != VOIDmode && GET_MODE (op) != mode)
3384 return 0;
3385 if (GET_CODE (op) == SUBREG)
3386 op = SUBREG_REG (op);
3387 return ANY_QI_REG_P (op);
3388 }
3389
3390 /* Return true if op is a NON_Q_REGS class register. */
3391
3392 int
3393 non_q_regs_operand (op, mode)
3394 register rtx op;
3395 enum machine_mode mode;
3396 {
3397 if (mode != VOIDmode && GET_MODE (op) != mode)
3398 return 0;
3399 if (GET_CODE (op) == SUBREG)
3400 op = SUBREG_REG (op);
3401 return NON_QI_REG_P (op);
3402 }
3403
3404 /* Return 1 if OP is a comparison that can be used in the CMPSS/CMPPS
3405 insns. */
3406 int
3407 sse_comparison_operator (op, mode)
3408 rtx op;
3409 enum machine_mode mode ATTRIBUTE_UNUSED;
3410 {
3411 enum rtx_code code = GET_CODE (op);
3412 switch (code)
3413 {
3414 /* Operations supported directly. */
3415 case EQ:
3416 case LT:
3417 case LE:
3418 case UNORDERED:
3419 case NE:
3420 case UNGE:
3421 case UNGT:
3422 case ORDERED:
3423 return 1;
3424 /* These are equivalent to ones above in non-IEEE comparisons. */
3425 case UNEQ:
3426 case UNLT:
3427 case UNLE:
3428 case LTGT:
3429 case GE:
3430 case GT:
3431 return !TARGET_IEEE_FP;
3432 default:
3433 return 0;
3434 }
3435 }
3436 /* Return 1 if OP is a valid comparison operator in valid mode. */
3437 int
3438 ix86_comparison_operator (op, mode)
3439 register rtx op;
3440 enum machine_mode mode;
3441 {
3442 enum machine_mode inmode;
3443 enum rtx_code code = GET_CODE (op);
3444 if (mode != VOIDmode && GET_MODE (op) != mode)
3445 return 0;
3446 if (GET_RTX_CLASS (code) != '<')
3447 return 0;
3448 inmode = GET_MODE (XEXP (op, 0));
3449
3450 if (inmode == CCFPmode || inmode == CCFPUmode)
3451 {
3452 enum rtx_code second_code, bypass_code;
3453 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
3454 return (bypass_code == NIL && second_code == NIL);
3455 }
3456 switch (code)
3457 {
3458 case EQ: case NE:
3459 return 1;
3460 case LT: case GE:
3461 if (inmode == CCmode || inmode == CCGCmode
3462 || inmode == CCGOCmode || inmode == CCNOmode)
3463 return 1;
3464 return 0;
3465 case LTU: case GTU: case LEU: case ORDERED: case UNORDERED: case GEU:
3466 if (inmode == CCmode)
3467 return 1;
3468 return 0;
3469 case GT: case LE:
3470 if (inmode == CCmode || inmode == CCGCmode || inmode == CCNOmode)
3471 return 1;
3472 return 0;
3473 default:
3474 return 0;
3475 }
3476 }
3477
3478 /* Return 1 if OP is a comparison operator that can be issued by fcmov. */
3479
3480 int
3481 fcmov_comparison_operator (op, mode)
3482 register rtx op;
3483 enum machine_mode mode;
3484 {
3485 enum machine_mode inmode;
3486 enum rtx_code code = GET_CODE (op);
3487 if (mode != VOIDmode && GET_MODE (op) != mode)
3488 return 0;
3489 if (GET_RTX_CLASS (code) != '<')
3490 return 0;
3491 inmode = GET_MODE (XEXP (op, 0));
3492 if (inmode == CCFPmode || inmode == CCFPUmode)
3493 {
3494 enum rtx_code second_code, bypass_code;
3495 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
3496 if (bypass_code != NIL || second_code != NIL)
3497 return 0;
3498 code = ix86_fp_compare_code_to_integer (code);
3499 }
3500 /* i387 supports just limited amount of conditional codes. */
3501 switch (code)
3502 {
3503 case LTU: case GTU: case LEU: case GEU:
3504 if (inmode == CCmode || inmode == CCFPmode || inmode == CCFPUmode)
3505 return 1;
3506 return 0;
3507 case ORDERED: case UNORDERED:
3508 case EQ: case NE:
3509 return 1;
3510 default:
3511 return 0;
3512 }
3513 }
3514
3515 /* Return 1 if OP is a binary operator that can be promoted to wider mode. */
3516
3517 int
3518 promotable_binary_operator (op, mode)
3519 register rtx op;
3520 enum machine_mode mode ATTRIBUTE_UNUSED;
3521 {
3522 switch (GET_CODE (op))
3523 {
3524 case MULT:
3525 /* Modern CPUs have same latency for HImode and SImode multiply,
3526 but 386 and 486 do HImode multiply faster. */
3527 return ix86_cpu > PROCESSOR_I486;
3528 case PLUS:
3529 case AND:
3530 case IOR:
3531 case XOR:
3532 case ASHIFT:
3533 return 1;
3534 default:
3535 return 0;
3536 }
3537 }
3538
3539 /* Nearly general operand, but accept any const_double, since we wish
3540 to be able to drop them into memory rather than have them get pulled
3541 into registers. */
3542
3543 int
3544 cmp_fp_expander_operand (op, mode)
3545 register rtx op;
3546 enum machine_mode mode;
3547 {
3548 if (mode != VOIDmode && mode != GET_MODE (op))
3549 return 0;
3550 if (GET_CODE (op) == CONST_DOUBLE)
3551 return 1;
3552 return general_operand (op, mode);
3553 }
3554
3555 /* Match an SI or HImode register for a zero_extract. */
3556
3557 int
3558 ext_register_operand (op, mode)
3559 register rtx op;
3560 enum machine_mode mode ATTRIBUTE_UNUSED;
3561 {
3562 int regno;
3563 if ((!TARGET_64BIT || GET_MODE (op) != DImode)
3564 && GET_MODE (op) != SImode && GET_MODE (op) != HImode)
3565 return 0;
3566
3567 if (!register_operand (op, VOIDmode))
3568 return 0;
3569
3570 /* Be curefull to accept only registers having upper parts. */
3571 regno = REG_P (op) ? REGNO (op) : REGNO (SUBREG_REG (op));
3572 return (regno > LAST_VIRTUAL_REGISTER || regno < 4);
3573 }
3574
3575 /* Return 1 if this is a valid binary floating-point operation.
3576 OP is the expression matched, and MODE is its mode. */
3577
3578 int
3579 binary_fp_operator (op, mode)
3580 register rtx op;
3581 enum machine_mode mode;
3582 {
3583 if (mode != VOIDmode && mode != GET_MODE (op))
3584 return 0;
3585
3586 switch (GET_CODE (op))
3587 {
3588 case PLUS:
3589 case MINUS:
3590 case MULT:
3591 case DIV:
3592 return GET_MODE_CLASS (GET_MODE (op)) == MODE_FLOAT;
3593
3594 default:
3595 return 0;
3596 }
3597 }
3598
3599 int
3600 mult_operator (op, mode)
3601 register rtx op;
3602 enum machine_mode mode ATTRIBUTE_UNUSED;
3603 {
3604 return GET_CODE (op) == MULT;
3605 }
3606
3607 int
3608 div_operator (op, mode)
3609 register rtx op;
3610 enum machine_mode mode ATTRIBUTE_UNUSED;
3611 {
3612 return GET_CODE (op) == DIV;
3613 }
3614
3615 int
3616 arith_or_logical_operator (op, mode)
3617 rtx op;
3618 enum machine_mode mode;
3619 {
3620 return ((mode == VOIDmode || GET_MODE (op) == mode)
3621 && (GET_RTX_CLASS (GET_CODE (op)) == 'c'
3622 || GET_RTX_CLASS (GET_CODE (op)) == '2'));
3623 }
3624
3625 /* Returns 1 if OP is memory operand with a displacement. */
3626
3627 int
3628 memory_displacement_operand (op, mode)
3629 register rtx op;
3630 enum machine_mode mode;
3631 {
3632 struct ix86_address parts;
3633
3634 if (! memory_operand (op, mode))
3635 return 0;
3636
3637 if (! ix86_decompose_address (XEXP (op, 0), &parts))
3638 abort ();
3639
3640 return parts.disp != NULL_RTX;
3641 }
3642
3643 /* To avoid problems when jump re-emits comparisons like testqi_ext_ccno_0,
3644 re-recognize the operand to avoid a copy_to_mode_reg that will fail.
3645
3646 ??? It seems likely that this will only work because cmpsi is an
3647 expander, and no actual insns use this. */
3648
3649 int
3650 cmpsi_operand (op, mode)
3651 rtx op;
3652 enum machine_mode mode;
3653 {
3654 if (nonimmediate_operand (op, mode))
3655 return 1;
3656
3657 if (GET_CODE (op) == AND
3658 && GET_MODE (op) == SImode
3659 && GET_CODE (XEXP (op, 0)) == ZERO_EXTRACT
3660 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT
3661 && GET_CODE (XEXP (XEXP (op, 0), 2)) == CONST_INT
3662 && INTVAL (XEXP (XEXP (op, 0), 1)) == 8
3663 && INTVAL (XEXP (XEXP (op, 0), 2)) == 8
3664 && GET_CODE (XEXP (op, 1)) == CONST_INT)
3665 return 1;
3666
3667 return 0;
3668 }
3669
3670 /* Returns 1 if OP is memory operand that can not be represented by the
3671 modRM array. */
3672
3673 int
3674 long_memory_operand (op, mode)
3675 register rtx op;
3676 enum machine_mode mode;
3677 {
3678 if (! memory_operand (op, mode))
3679 return 0;
3680
3681 return memory_address_length (op) != 0;
3682 }
3683
3684 /* Return nonzero if the rtx is known aligned. */
3685
3686 int
3687 aligned_operand (op, mode)
3688 rtx op;
3689 enum machine_mode mode;
3690 {
3691 struct ix86_address parts;
3692
3693 if (!general_operand (op, mode))
3694 return 0;
3695
3696 /* Registers and immediate operands are always "aligned". */
3697 if (GET_CODE (op) != MEM)
3698 return 1;
3699
3700 /* Don't even try to do any aligned optimizations with volatiles. */
3701 if (MEM_VOLATILE_P (op))
3702 return 0;
3703
3704 op = XEXP (op, 0);
3705
3706 /* Pushes and pops are only valid on the stack pointer. */
3707 if (GET_CODE (op) == PRE_DEC
3708 || GET_CODE (op) == POST_INC)
3709 return 1;
3710
3711 /* Decode the address. */
3712 if (! ix86_decompose_address (op, &parts))
3713 abort ();
3714
3715 if (parts.base && GET_CODE (parts.base) == SUBREG)
3716 parts.base = SUBREG_REG (parts.base);
3717 if (parts.index && GET_CODE (parts.index) == SUBREG)
3718 parts.index = SUBREG_REG (parts.index);
3719
3720 /* Look for some component that isn't known to be aligned. */
3721 if (parts.index)
3722 {
3723 if (parts.scale < 4
3724 && REGNO_POINTER_ALIGN (REGNO (parts.index)) < 32)
3725 return 0;
3726 }
3727 if (parts.base)
3728 {
3729 if (REGNO_POINTER_ALIGN (REGNO (parts.base)) < 32)
3730 return 0;
3731 }
3732 if (parts.disp)
3733 {
3734 if (GET_CODE (parts.disp) != CONST_INT
3735 || (INTVAL (parts.disp) & 3) != 0)
3736 return 0;
3737 }
3738
3739 /* Didn't find one -- this must be an aligned address. */
3740 return 1;
3741 }
3742 \f
3743 /* Return true if the constant is something that can be loaded with
3744 a special instruction. Only handle 0.0 and 1.0; others are less
3745 worthwhile. */
3746
3747 int
3748 standard_80387_constant_p (x)
3749 rtx x;
3750 {
3751 if (GET_CODE (x) != CONST_DOUBLE || !FLOAT_MODE_P (GET_MODE (x)))
3752 return -1;
3753 /* Note that on the 80387, other constants, such as pi, that we should support
3754 too. On some machines, these are much slower to load as standard constant,
3755 than to load from doubles in memory. */
3756 if (x == CONST0_RTX (GET_MODE (x)))
3757 return 1;
3758 if (x == CONST1_RTX (GET_MODE (x)))
3759 return 2;
3760 return 0;
3761 }
3762
3763 /* Return 1 if X is FP constant we can load to SSE register w/o using memory.
3764 */
3765 int
3766 standard_sse_constant_p (x)
3767 rtx x;
3768 {
3769 if (x == const0_rtx)
3770 return 1;
3771 return (x == CONST0_RTX (GET_MODE (x)));
3772 }
3773
3774 /* Returns 1 if OP contains a symbol reference */
3775
3776 int
3777 symbolic_reference_mentioned_p (op)
3778 rtx op;
3779 {
3780 register const char *fmt;
3781 register int i;
3782
3783 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
3784 return 1;
3785
3786 fmt = GET_RTX_FORMAT (GET_CODE (op));
3787 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
3788 {
3789 if (fmt[i] == 'E')
3790 {
3791 register int j;
3792
3793 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
3794 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
3795 return 1;
3796 }
3797
3798 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
3799 return 1;
3800 }
3801
3802 return 0;
3803 }
3804
3805 /* Return 1 if it is appropriate to emit `ret' instructions in the
3806 body of a function. Do this only if the epilogue is simple, needing a
3807 couple of insns. Prior to reloading, we can't tell how many registers
3808 must be saved, so return 0 then. Return 0 if there is no frame
3809 marker to de-allocate.
3810
3811 If NON_SAVING_SETJMP is defined and true, then it is not possible
3812 for the epilogue to be simple, so return 0. This is a special case
3813 since NON_SAVING_SETJMP will not cause regs_ever_live to change
3814 until final, but jump_optimize may need to know sooner if a
3815 `return' is OK. */
3816
3817 int
3818 ix86_can_use_return_insn_p ()
3819 {
3820 struct ix86_frame frame;
3821
3822 #ifdef NON_SAVING_SETJMP
3823 if (NON_SAVING_SETJMP && current_function_calls_setjmp)
3824 return 0;
3825 #endif
3826
3827 if (! reload_completed || frame_pointer_needed)
3828 return 0;
3829
3830 /* Don't allow more than 32 pop, since that's all we can do
3831 with one instruction. */
3832 if (current_function_pops_args
3833 && current_function_args_size >= 32768)
3834 return 0;
3835
3836 ix86_compute_frame_layout (&frame);
3837 return frame.to_allocate == 0 && frame.nregs == 0;
3838 }
3839 \f
3840 /* Return 1 if VALUE can be stored in the sign extended immediate field. */
3841 int
3842 x86_64_sign_extended_value (value)
3843 rtx value;
3844 {
3845 switch (GET_CODE (value))
3846 {
3847 /* CONST_DOUBLES never match, since HOST_BITS_PER_WIDE_INT is known
3848 to be at least 32 and this all acceptable constants are
3849 represented as CONST_INT. */
3850 case CONST_INT:
3851 if (HOST_BITS_PER_WIDE_INT == 32)
3852 return 1;
3853 else
3854 {
3855 HOST_WIDE_INT val = trunc_int_for_mode (INTVAL (value), DImode);
3856 return trunc_int_for_mode (val, SImode) == val;
3857 }
3858 break;
3859
3860 /* For certain code models, the symbolic references are known to fit.
3861 in CM_SMALL_PIC model we know it fits if it is local to the shared
3862 library. Don't count TLS SYMBOL_REFs here, since they should fit
3863 only if inside of UNSPEC handled below. */
3864 case SYMBOL_REF:
3865 return (ix86_cmodel == CM_SMALL || ix86_cmodel == CM_KERNEL);
3866
3867 /* For certain code models, the code is near as well. */
3868 case LABEL_REF:
3869 return (ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM
3870 || ix86_cmodel == CM_KERNEL);
3871
3872 /* We also may accept the offsetted memory references in certain special
3873 cases. */
3874 case CONST:
3875 if (GET_CODE (XEXP (value, 0)) == UNSPEC)
3876 switch (XINT (XEXP (value, 0), 1))
3877 {
3878 case UNSPEC_GOTPCREL:
3879 case UNSPEC_DTPOFF:
3880 case UNSPEC_GOTNTPOFF:
3881 case UNSPEC_NTPOFF:
3882 return 1;
3883 default:
3884 break;
3885 }
3886 if (GET_CODE (XEXP (value, 0)) == PLUS)
3887 {
3888 rtx op1 = XEXP (XEXP (value, 0), 0);
3889 rtx op2 = XEXP (XEXP (value, 0), 1);
3890 HOST_WIDE_INT offset;
3891
3892 if (ix86_cmodel == CM_LARGE)
3893 return 0;
3894 if (GET_CODE (op2) != CONST_INT)
3895 return 0;
3896 offset = trunc_int_for_mode (INTVAL (op2), DImode);
3897 switch (GET_CODE (op1))
3898 {
3899 case SYMBOL_REF:
3900 /* For CM_SMALL assume that latest object is 16MB before
3901 end of 31bits boundary. We may also accept pretty
3902 large negative constants knowing that all objects are
3903 in the positive half of address space. */
3904 if (ix86_cmodel == CM_SMALL
3905 && offset < 16*1024*1024
3906 && trunc_int_for_mode (offset, SImode) == offset)
3907 return 1;
3908 /* For CM_KERNEL we know that all object resist in the
3909 negative half of 32bits address space. We may not
3910 accept negative offsets, since they may be just off
3911 and we may accept pretty large positive ones. */
3912 if (ix86_cmodel == CM_KERNEL
3913 && offset > 0
3914 && trunc_int_for_mode (offset, SImode) == offset)
3915 return 1;
3916 break;
3917 case LABEL_REF:
3918 /* These conditions are similar to SYMBOL_REF ones, just the
3919 constraints for code models differ. */
3920 if ((ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM)
3921 && offset < 16*1024*1024
3922 && trunc_int_for_mode (offset, SImode) == offset)
3923 return 1;
3924 if (ix86_cmodel == CM_KERNEL
3925 && offset > 0
3926 && trunc_int_for_mode (offset, SImode) == offset)
3927 return 1;
3928 break;
3929 case UNSPEC:
3930 switch (XINT (op1, 1))
3931 {
3932 case UNSPEC_DTPOFF:
3933 case UNSPEC_NTPOFF:
3934 if (offset > 0
3935 && trunc_int_for_mode (offset, SImode) == offset)
3936 return 1;
3937 }
3938 break;
3939 default:
3940 return 0;
3941 }
3942 }
3943 return 0;
3944 default:
3945 return 0;
3946 }
3947 }
3948
3949 /* Return 1 if VALUE can be stored in the zero extended immediate field. */
3950 int
3951 x86_64_zero_extended_value (value)
3952 rtx value;
3953 {
3954 switch (GET_CODE (value))
3955 {
3956 case CONST_DOUBLE:
3957 if (HOST_BITS_PER_WIDE_INT == 32)
3958 return (GET_MODE (value) == VOIDmode
3959 && !CONST_DOUBLE_HIGH (value));
3960 else
3961 return 0;
3962 case CONST_INT:
3963 if (HOST_BITS_PER_WIDE_INT == 32)
3964 return INTVAL (value) >= 0;
3965 else
3966 return !(INTVAL (value) & ~(HOST_WIDE_INT) 0xffffffff);
3967 break;
3968
3969 /* For certain code models, the symbolic references are known to fit. */
3970 case SYMBOL_REF:
3971 return ix86_cmodel == CM_SMALL;
3972
3973 /* For certain code models, the code is near as well. */
3974 case LABEL_REF:
3975 return ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM;
3976
3977 /* We also may accept the offsetted memory references in certain special
3978 cases. */
3979 case CONST:
3980 if (GET_CODE (XEXP (value, 0)) == PLUS)
3981 {
3982 rtx op1 = XEXP (XEXP (value, 0), 0);
3983 rtx op2 = XEXP (XEXP (value, 0), 1);
3984
3985 if (ix86_cmodel == CM_LARGE)
3986 return 0;
3987 switch (GET_CODE (op1))
3988 {
3989 case SYMBOL_REF:
3990 return 0;
3991 /* For small code model we may accept pretty large positive
3992 offsets, since one bit is available for free. Negative
3993 offsets are limited by the size of NULL pointer area
3994 specified by the ABI. */
3995 if (ix86_cmodel == CM_SMALL
3996 && GET_CODE (op2) == CONST_INT
3997 && trunc_int_for_mode (INTVAL (op2), DImode) > -0x10000
3998 && (trunc_int_for_mode (INTVAL (op2), SImode)
3999 == INTVAL (op2)))
4000 return 1;
4001 /* ??? For the kernel, we may accept adjustment of
4002 -0x10000000, since we know that it will just convert
4003 negative address space to positive, but perhaps this
4004 is not worthwhile. */
4005 break;
4006 case LABEL_REF:
4007 /* These conditions are similar to SYMBOL_REF ones, just the
4008 constraints for code models differ. */
4009 if ((ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM)
4010 && GET_CODE (op2) == CONST_INT
4011 && trunc_int_for_mode (INTVAL (op2), DImode) > -0x10000
4012 && (trunc_int_for_mode (INTVAL (op2), SImode)
4013 == INTVAL (op2)))
4014 return 1;
4015 break;
4016 default:
4017 return 0;
4018 }
4019 }
4020 return 0;
4021 default:
4022 return 0;
4023 }
4024 }
4025
4026 /* Value should be nonzero if functions must have frame pointers.
4027 Zero means the frame pointer need not be set up (and parms may
4028 be accessed via the stack pointer) in functions that seem suitable. */
4029
4030 int
4031 ix86_frame_pointer_required ()
4032 {
4033 /* If we accessed previous frames, then the generated code expects
4034 to be able to access the saved ebp value in our frame. */
4035 if (cfun->machine->accesses_prev_frame)
4036 return 1;
4037
4038 /* Several x86 os'es need a frame pointer for other reasons,
4039 usually pertaining to setjmp. */
4040 if (SUBTARGET_FRAME_POINTER_REQUIRED)
4041 return 1;
4042
4043 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
4044 the frame pointer by default. Turn it back on now if we've not
4045 got a leaf function. */
4046 if (TARGET_OMIT_LEAF_FRAME_POINTER
4047 && (!current_function_is_leaf))
4048 return 1;
4049
4050 if (current_function_profile)
4051 return 1;
4052
4053 return 0;
4054 }
4055
4056 /* Record that the current function accesses previous call frames. */
4057
4058 void
4059 ix86_setup_frame_addresses ()
4060 {
4061 cfun->machine->accesses_prev_frame = 1;
4062 }
4063 \f
4064 #if defined(HAVE_GAS_HIDDEN) && defined(SUPPORTS_ONE_ONLY)
4065 # define USE_HIDDEN_LINKONCE 1
4066 #else
4067 # define USE_HIDDEN_LINKONCE 0
4068 #endif
4069
4070 static int pic_labels_used;
4071
4072 /* Fills in the label name that should be used for a pc thunk for
4073 the given register. */
4074
4075 static void
4076 get_pc_thunk_name (name, regno)
4077 char name[32];
4078 unsigned int regno;
4079 {
4080 if (USE_HIDDEN_LINKONCE)
4081 sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]);
4082 else
4083 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
4084 }
4085
4086
4087 /* This function generates code for -fpic that loads %ebx with
4088 the return address of the caller and then returns. */
4089
4090 void
4091 ix86_asm_file_end (file)
4092 FILE *file;
4093 {
4094 rtx xops[2];
4095 int regno;
4096
4097 for (regno = 0; regno < 8; ++regno)
4098 {
4099 char name[32];
4100
4101 if (! ((pic_labels_used >> regno) & 1))
4102 continue;
4103
4104 get_pc_thunk_name (name, regno);
4105
4106 if (USE_HIDDEN_LINKONCE)
4107 {
4108 tree decl;
4109
4110 decl = build_decl (FUNCTION_DECL, get_identifier (name),
4111 error_mark_node);
4112 TREE_PUBLIC (decl) = 1;
4113 TREE_STATIC (decl) = 1;
4114 DECL_ONE_ONLY (decl) = 1;
4115
4116 (*targetm.asm_out.unique_section) (decl, 0);
4117 named_section (decl, NULL, 0);
4118
4119 (*targetm.asm_out.globalize_label) (file, name);
4120 fputs ("\t.hidden\t", file);
4121 assemble_name (file, name);
4122 fputc ('\n', file);
4123 ASM_DECLARE_FUNCTION_NAME (file, name, decl);
4124 }
4125 else
4126 {
4127 text_section ();
4128 ASM_OUTPUT_LABEL (file, name);
4129 }
4130
4131 xops[0] = gen_rtx_REG (SImode, regno);
4132 xops[1] = gen_rtx_MEM (SImode, stack_pointer_rtx);
4133 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops);
4134 output_asm_insn ("ret", xops);
4135 }
4136 }
4137
4138 /* Emit code for the SET_GOT patterns. */
4139
4140 const char *
4141 output_set_got (dest)
4142 rtx dest;
4143 {
4144 rtx xops[3];
4145
4146 xops[0] = dest;
4147 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
4148
4149 if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic)
4150 {
4151 xops[2] = gen_rtx_LABEL_REF (Pmode, gen_label_rtx ());
4152
4153 if (!flag_pic)
4154 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
4155 else
4156 output_asm_insn ("call\t%a2", xops);
4157
4158 #if TARGET_MACHO
4159 /* Output the "canonical" label name ("Lxx$pb") here too. This
4160 is what will be referred to by the Mach-O PIC subsystem. */
4161 ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
4162 #endif
4163 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, "L",
4164 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
4165
4166 if (flag_pic)
4167 output_asm_insn ("pop{l}\t%0", xops);
4168 }
4169 else
4170 {
4171 char name[32];
4172 get_pc_thunk_name (name, REGNO (dest));
4173 pic_labels_used |= 1 << REGNO (dest);
4174
4175 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
4176 xops[2] = gen_rtx_MEM (QImode, xops[2]);
4177 output_asm_insn ("call\t%X2", xops);
4178 }
4179
4180 if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION)
4181 output_asm_insn ("add{l}\t{%1, %0|%0, %1}", xops);
4182 else if (!TARGET_MACHO)
4183 output_asm_insn ("add{l}\t{%1+[.-%a2], %0|%0, %a1+(.-%a2)}", xops);
4184
4185 return "";
4186 }
4187
4188 /* Generate an "push" pattern for input ARG. */
4189
4190 static rtx
4191 gen_push (arg)
4192 rtx arg;
4193 {
4194 return gen_rtx_SET (VOIDmode,
4195 gen_rtx_MEM (Pmode,
4196 gen_rtx_PRE_DEC (Pmode,
4197 stack_pointer_rtx)),
4198 arg);
4199 }
4200
4201 /* Return >= 0 if there is an unused call-clobbered register available
4202 for the entire function. */
4203
4204 static unsigned int
4205 ix86_select_alt_pic_regnum ()
4206 {
4207 if (current_function_is_leaf && !current_function_profile)
4208 {
4209 int i;
4210 for (i = 2; i >= 0; --i)
4211 if (!regs_ever_live[i])
4212 return i;
4213 }
4214
4215 return INVALID_REGNUM;
4216 }
4217
4218 /* Return 1 if we need to save REGNO. */
4219 static int
4220 ix86_save_reg (regno, maybe_eh_return)
4221 unsigned int regno;
4222 int maybe_eh_return;
4223 {
4224 if (pic_offset_table_rtx
4225 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
4226 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
4227 || current_function_profile
4228 || current_function_calls_eh_return))
4229 {
4230 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM)
4231 return 0;
4232 return 1;
4233 }
4234
4235 if (current_function_calls_eh_return && maybe_eh_return)
4236 {
4237 unsigned i;
4238 for (i = 0; ; i++)
4239 {
4240 unsigned test = EH_RETURN_DATA_REGNO (i);
4241 if (test == INVALID_REGNUM)
4242 break;
4243 if (test == regno)
4244 return 1;
4245 }
4246 }
4247
4248 return (regs_ever_live[regno]
4249 && !call_used_regs[regno]
4250 && !fixed_regs[regno]
4251 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
4252 }
4253
4254 /* Return number of registers to be saved on the stack. */
4255
4256 static int
4257 ix86_nsaved_regs ()
4258 {
4259 int nregs = 0;
4260 int regno;
4261
4262 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
4263 if (ix86_save_reg (regno, true))
4264 nregs++;
4265 return nregs;
4266 }
4267
4268 /* Return the offset between two registers, one to be eliminated, and the other
4269 its replacement, at the start of a routine. */
4270
4271 HOST_WIDE_INT
4272 ix86_initial_elimination_offset (from, to)
4273 int from;
4274 int to;
4275 {
4276 struct ix86_frame frame;
4277 ix86_compute_frame_layout (&frame);
4278
4279 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
4280 return frame.hard_frame_pointer_offset;
4281 else if (from == FRAME_POINTER_REGNUM
4282 && to == HARD_FRAME_POINTER_REGNUM)
4283 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
4284 else
4285 {
4286 if (to != STACK_POINTER_REGNUM)
4287 abort ();
4288 else if (from == ARG_POINTER_REGNUM)
4289 return frame.stack_pointer_offset;
4290 else if (from != FRAME_POINTER_REGNUM)
4291 abort ();
4292 else
4293 return frame.stack_pointer_offset - frame.frame_pointer_offset;
4294 }
4295 }
4296
4297 /* Fill structure ix86_frame about frame of currently computed function. */
4298
4299 static void
4300 ix86_compute_frame_layout (frame)
4301 struct ix86_frame *frame;
4302 {
4303 HOST_WIDE_INT total_size;
4304 int stack_alignment_needed = cfun->stack_alignment_needed / BITS_PER_UNIT;
4305 int offset;
4306 int preferred_alignment = cfun->preferred_stack_boundary / BITS_PER_UNIT;
4307 HOST_WIDE_INT size = get_frame_size ();
4308
4309 frame->nregs = ix86_nsaved_regs ();
4310 total_size = size;
4311
4312 /* Skip return address and saved base pointer. */
4313 offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD;
4314
4315 frame->hard_frame_pointer_offset = offset;
4316
4317 /* Do some sanity checking of stack_alignment_needed and
4318 preferred_alignment, since i386 port is the only using those features
4319 that may break easily. */
4320
4321 if (size && !stack_alignment_needed)
4322 abort ();
4323 if (preferred_alignment < STACK_BOUNDARY / BITS_PER_UNIT)
4324 abort ();
4325 if (preferred_alignment > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
4326 abort ();
4327 if (stack_alignment_needed > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
4328 abort ();
4329
4330 if (stack_alignment_needed < STACK_BOUNDARY / BITS_PER_UNIT)
4331 stack_alignment_needed = STACK_BOUNDARY / BITS_PER_UNIT;
4332
4333 /* Register save area */
4334 offset += frame->nregs * UNITS_PER_WORD;
4335
4336 /* Va-arg area */
4337 if (ix86_save_varrargs_registers)
4338 {
4339 offset += X86_64_VARARGS_SIZE;
4340 frame->va_arg_size = X86_64_VARARGS_SIZE;
4341 }
4342 else
4343 frame->va_arg_size = 0;
4344
4345 /* Align start of frame for local function. */
4346 frame->padding1 = ((offset + stack_alignment_needed - 1)
4347 & -stack_alignment_needed) - offset;
4348
4349 offset += frame->padding1;
4350
4351 /* Frame pointer points here. */
4352 frame->frame_pointer_offset = offset;
4353
4354 offset += size;
4355
4356 /* Add outgoing arguments area. Can be skipped if we eliminated
4357 all the function calls as dead code. */
4358 if (ACCUMULATE_OUTGOING_ARGS && !current_function_is_leaf)
4359 {
4360 offset += current_function_outgoing_args_size;
4361 frame->outgoing_arguments_size = current_function_outgoing_args_size;
4362 }
4363 else
4364 frame->outgoing_arguments_size = 0;
4365
4366 /* Align stack boundary. Only needed if we're calling another function
4367 or using alloca. */
4368 if (!current_function_is_leaf || current_function_calls_alloca)
4369 frame->padding2 = ((offset + preferred_alignment - 1)
4370 & -preferred_alignment) - offset;
4371 else
4372 frame->padding2 = 0;
4373
4374 offset += frame->padding2;
4375
4376 /* We've reached end of stack frame. */
4377 frame->stack_pointer_offset = offset;
4378
4379 /* Size prologue needs to allocate. */
4380 frame->to_allocate =
4381 (size + frame->padding1 + frame->padding2
4382 + frame->outgoing_arguments_size + frame->va_arg_size);
4383
4384 if (TARGET_64BIT && TARGET_RED_ZONE && current_function_sp_is_unchanging
4385 && current_function_is_leaf)
4386 {
4387 frame->red_zone_size = frame->to_allocate;
4388 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
4389 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
4390 }
4391 else
4392 frame->red_zone_size = 0;
4393 frame->to_allocate -= frame->red_zone_size;
4394 frame->stack_pointer_offset -= frame->red_zone_size;
4395 #if 0
4396 fprintf (stderr, "nregs: %i\n", frame->nregs);
4397 fprintf (stderr, "size: %i\n", size);
4398 fprintf (stderr, "alignment1: %i\n", stack_alignment_needed);
4399 fprintf (stderr, "padding1: %i\n", frame->padding1);
4400 fprintf (stderr, "va_arg: %i\n", frame->va_arg_size);
4401 fprintf (stderr, "padding2: %i\n", frame->padding2);
4402 fprintf (stderr, "to_allocate: %i\n", frame->to_allocate);
4403 fprintf (stderr, "red_zone_size: %i\n", frame->red_zone_size);
4404 fprintf (stderr, "frame_pointer_offset: %i\n", frame->frame_pointer_offset);
4405 fprintf (stderr, "hard_frame_pointer_offset: %i\n",
4406 frame->hard_frame_pointer_offset);
4407 fprintf (stderr, "stack_pointer_offset: %i\n", frame->stack_pointer_offset);
4408 #endif
4409 }
4410
4411 /* Emit code to save registers in the prologue. */
4412
4413 static void
4414 ix86_emit_save_regs ()
4415 {
4416 register int regno;
4417 rtx insn;
4418
4419 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
4420 if (ix86_save_reg (regno, true))
4421 {
4422 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
4423 RTX_FRAME_RELATED_P (insn) = 1;
4424 }
4425 }
4426
4427 /* Emit code to save registers using MOV insns. First register
4428 is restored from POINTER + OFFSET. */
4429 static void
4430 ix86_emit_save_regs_using_mov (pointer, offset)
4431 rtx pointer;
4432 HOST_WIDE_INT offset;
4433 {
4434 int regno;
4435 rtx insn;
4436
4437 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
4438 if (ix86_save_reg (regno, true))
4439 {
4440 insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer),
4441 Pmode, offset),
4442 gen_rtx_REG (Pmode, regno));
4443 RTX_FRAME_RELATED_P (insn) = 1;
4444 offset += UNITS_PER_WORD;
4445 }
4446 }
4447
4448 /* Expand the prologue into a bunch of separate insns. */
4449
4450 void
4451 ix86_expand_prologue ()
4452 {
4453 rtx insn;
4454 bool pic_reg_used;
4455 struct ix86_frame frame;
4456 int use_mov = 0;
4457 HOST_WIDE_INT allocate;
4458
4459 if (!optimize_size)
4460 {
4461 use_fast_prologue_epilogue
4462 = !expensive_function_p (FAST_PROLOGUE_INSN_COUNT);
4463 if (TARGET_PROLOGUE_USING_MOVE)
4464 use_mov = use_fast_prologue_epilogue;
4465 }
4466 ix86_compute_frame_layout (&frame);
4467
4468 /* Note: AT&T enter does NOT have reversed args. Enter is probably
4469 slower on all targets. Also sdb doesn't like it. */
4470
4471 if (frame_pointer_needed)
4472 {
4473 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
4474 RTX_FRAME_RELATED_P (insn) = 1;
4475
4476 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
4477 RTX_FRAME_RELATED_P (insn) = 1;
4478 }
4479
4480 allocate = frame.to_allocate;
4481 /* In case we are dealing only with single register and empty frame,
4482 push is equivalent of the mov+add sequence. */
4483 if (allocate == 0 && frame.nregs <= 1)
4484 use_mov = 0;
4485
4486 if (!use_mov)
4487 ix86_emit_save_regs ();
4488 else
4489 allocate += frame.nregs * UNITS_PER_WORD;
4490
4491 if (allocate == 0)
4492 ;
4493 else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)
4494 {
4495 insn = emit_insn (gen_pro_epilogue_adjust_stack
4496 (stack_pointer_rtx, stack_pointer_rtx,
4497 GEN_INT (-allocate)));
4498 RTX_FRAME_RELATED_P (insn) = 1;
4499 }
4500 else
4501 {
4502 /* ??? Is this only valid for Win32? */
4503
4504 rtx arg0, sym;
4505
4506 if (TARGET_64BIT)
4507 abort ();
4508
4509 arg0 = gen_rtx_REG (SImode, 0);
4510 emit_move_insn (arg0, GEN_INT (allocate));
4511
4512 sym = gen_rtx_MEM (FUNCTION_MODE,
4513 gen_rtx_SYMBOL_REF (Pmode, "_alloca"));
4514 insn = emit_call_insn (gen_call (sym, const0_rtx, constm1_rtx));
4515
4516 CALL_INSN_FUNCTION_USAGE (insn)
4517 = gen_rtx_EXPR_LIST (VOIDmode, gen_rtx_USE (VOIDmode, arg0),
4518 CALL_INSN_FUNCTION_USAGE (insn));
4519 }
4520 if (use_mov)
4521 {
4522 if (!frame_pointer_needed || !frame.to_allocate)
4523 ix86_emit_save_regs_using_mov (stack_pointer_rtx, frame.to_allocate);
4524 else
4525 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx,
4526 -frame.nregs * UNITS_PER_WORD);
4527 }
4528
4529 #ifdef SUBTARGET_PROLOGUE
4530 SUBTARGET_PROLOGUE;
4531 #endif
4532
4533 pic_reg_used = false;
4534 if (pic_offset_table_rtx
4535 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
4536 || current_function_profile))
4537 {
4538 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
4539
4540 if (alt_pic_reg_used != INVALID_REGNUM)
4541 REGNO (pic_offset_table_rtx) = alt_pic_reg_used;
4542
4543 pic_reg_used = true;
4544 }
4545
4546 if (pic_reg_used)
4547 {
4548 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
4549
4550 /* Even with accurate pre-reload life analysis, we can wind up
4551 deleting all references to the pic register after reload.
4552 Consider if cross-jumping unifies two sides of a branch
4553 controled by a comparison vs the only read from a global.
4554 In which case, allow the set_got to be deleted, though we're
4555 too late to do anything about the ebx save in the prologue. */
4556 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx, NULL);
4557 }
4558
4559 /* Prevent function calls from be scheduled before the call to mcount.
4560 In the pic_reg_used case, make sure that the got load isn't deleted. */
4561 if (current_function_profile)
4562 emit_insn (gen_blockage (pic_reg_used ? pic_offset_table_rtx : const0_rtx));
4563 }
4564
4565 /* Emit code to restore saved registers using MOV insns. First register
4566 is restored from POINTER + OFFSET. */
4567 static void
4568 ix86_emit_restore_regs_using_mov (pointer, offset, maybe_eh_return)
4569 rtx pointer;
4570 int offset;
4571 int maybe_eh_return;
4572 {
4573 int regno;
4574
4575 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
4576 if (ix86_save_reg (regno, maybe_eh_return))
4577 {
4578 emit_move_insn (gen_rtx_REG (Pmode, regno),
4579 adjust_address (gen_rtx_MEM (Pmode, pointer),
4580 Pmode, offset));
4581 offset += UNITS_PER_WORD;
4582 }
4583 }
4584
4585 /* Restore function stack, frame, and registers. */
4586
4587 void
4588 ix86_expand_epilogue (style)
4589 int style;
4590 {
4591 int regno;
4592 int sp_valid = !frame_pointer_needed || current_function_sp_is_unchanging;
4593 struct ix86_frame frame;
4594 HOST_WIDE_INT offset;
4595
4596 ix86_compute_frame_layout (&frame);
4597
4598 /* Calculate start of saved registers relative to ebp. Special care
4599 must be taken for the normal return case of a function using
4600 eh_return: the eax and edx registers are marked as saved, but not
4601 restored along this path. */
4602 offset = frame.nregs;
4603 if (current_function_calls_eh_return && style != 2)
4604 offset -= 2;
4605 offset *= -UNITS_PER_WORD;
4606
4607 /* If we're only restoring one register and sp is not valid then
4608 using a move instruction to restore the register since it's
4609 less work than reloading sp and popping the register.
4610
4611 The default code result in stack adjustment using add/lea instruction,
4612 while this code results in LEAVE instruction (or discrete equivalent),
4613 so it is profitable in some other cases as well. Especially when there
4614 are no registers to restore. We also use this code when TARGET_USE_LEAVE
4615 and there is exactly one register to pop. This heruistic may need some
4616 tuning in future. */
4617 if ((!sp_valid && frame.nregs <= 1)
4618 || (TARGET_EPILOGUE_USING_MOVE
4619 && use_fast_prologue_epilogue
4620 && (frame.nregs > 1 || frame.to_allocate))
4621 || (frame_pointer_needed && !frame.nregs && frame.to_allocate)
4622 || (frame_pointer_needed && TARGET_USE_LEAVE
4623 && use_fast_prologue_epilogue && frame.nregs == 1)
4624 || current_function_calls_eh_return)
4625 {
4626 /* Restore registers. We can use ebp or esp to address the memory
4627 locations. If both are available, default to ebp, since offsets
4628 are known to be small. Only exception is esp pointing directly to the
4629 end of block of saved registers, where we may simplify addressing
4630 mode. */
4631
4632 if (!frame_pointer_needed || (sp_valid && !frame.to_allocate))
4633 ix86_emit_restore_regs_using_mov (stack_pointer_rtx,
4634 frame.to_allocate, style == 2);
4635 else
4636 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx,
4637 offset, style == 2);
4638
4639 /* eh_return epilogues need %ecx added to the stack pointer. */
4640 if (style == 2)
4641 {
4642 rtx tmp, sa = EH_RETURN_STACKADJ_RTX;
4643
4644 if (frame_pointer_needed)
4645 {
4646 tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
4647 tmp = plus_constant (tmp, UNITS_PER_WORD);
4648 emit_insn (gen_rtx_SET (VOIDmode, sa, tmp));
4649
4650 tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx);
4651 emit_move_insn (hard_frame_pointer_rtx, tmp);
4652
4653 emit_insn (gen_pro_epilogue_adjust_stack
4654 (stack_pointer_rtx, sa, const0_rtx));
4655 }
4656 else
4657 {
4658 tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
4659 tmp = plus_constant (tmp, (frame.to_allocate
4660 + frame.nregs * UNITS_PER_WORD));
4661 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp));
4662 }
4663 }
4664 else if (!frame_pointer_needed)
4665 emit_insn (gen_pro_epilogue_adjust_stack
4666 (stack_pointer_rtx, stack_pointer_rtx,
4667 GEN_INT (frame.to_allocate
4668 + frame.nregs * UNITS_PER_WORD)));
4669 /* If not an i386, mov & pop is faster than "leave". */
4670 else if (TARGET_USE_LEAVE || optimize_size || !use_fast_prologue_epilogue)
4671 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
4672 else
4673 {
4674 emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx,
4675 hard_frame_pointer_rtx,
4676 const0_rtx));
4677 if (TARGET_64BIT)
4678 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
4679 else
4680 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
4681 }
4682 }
4683 else
4684 {
4685 /* First step is to deallocate the stack frame so that we can
4686 pop the registers. */
4687 if (!sp_valid)
4688 {
4689 if (!frame_pointer_needed)
4690 abort ();
4691 emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx,
4692 hard_frame_pointer_rtx,
4693 GEN_INT (offset)));
4694 }
4695 else if (frame.to_allocate)
4696 emit_insn (gen_pro_epilogue_adjust_stack
4697 (stack_pointer_rtx, stack_pointer_rtx,
4698 GEN_INT (frame.to_allocate)));
4699
4700 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
4701 if (ix86_save_reg (regno, false))
4702 {
4703 if (TARGET_64BIT)
4704 emit_insn (gen_popdi1 (gen_rtx_REG (Pmode, regno)));
4705 else
4706 emit_insn (gen_popsi1 (gen_rtx_REG (Pmode, regno)));
4707 }
4708 if (frame_pointer_needed)
4709 {
4710 /* Leave results in shorter dependency chains on CPUs that are
4711 able to grok it fast. */
4712 if (TARGET_USE_LEAVE)
4713 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
4714 else if (TARGET_64BIT)
4715 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
4716 else
4717 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
4718 }
4719 }
4720
4721 /* Sibcall epilogues don't want a return instruction. */
4722 if (style == 0)
4723 return;
4724
4725 if (current_function_pops_args && current_function_args_size)
4726 {
4727 rtx popc = GEN_INT (current_function_pops_args);
4728
4729 /* i386 can only pop 64K bytes. If asked to pop more, pop
4730 return address, do explicit add, and jump indirectly to the
4731 caller. */
4732
4733 if (current_function_pops_args >= 65536)
4734 {
4735 rtx ecx = gen_rtx_REG (SImode, 2);
4736
4737 /* There are is no "pascal" calling convention in 64bit ABI. */
4738 if (TARGET_64BIT)
4739 abort ();
4740
4741 emit_insn (gen_popsi1 (ecx));
4742 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
4743 emit_jump_insn (gen_return_indirect_internal (ecx));
4744 }
4745 else
4746 emit_jump_insn (gen_return_pop_internal (popc));
4747 }
4748 else
4749 emit_jump_insn (gen_return_internal ());
4750 }
4751
4752 /* Reset from the function's potential modifications. */
4753
4754 static void
4755 ix86_output_function_epilogue (file, size)
4756 FILE *file ATTRIBUTE_UNUSED;
4757 HOST_WIDE_INT size ATTRIBUTE_UNUSED;
4758 {
4759 if (pic_offset_table_rtx)
4760 REGNO (pic_offset_table_rtx) = REAL_PIC_OFFSET_TABLE_REGNUM;
4761 }
4762 \f
4763 /* Extract the parts of an RTL expression that is a valid memory address
4764 for an instruction. Return 0 if the structure of the address is
4765 grossly off. Return -1 if the address contains ASHIFT, so it is not
4766 strictly valid, but still used for computing length of lea instruction.
4767 */
4768
4769 static int
4770 ix86_decompose_address (addr, out)
4771 register rtx addr;
4772 struct ix86_address *out;
4773 {
4774 rtx base = NULL_RTX;
4775 rtx index = NULL_RTX;
4776 rtx disp = NULL_RTX;
4777 HOST_WIDE_INT scale = 1;
4778 rtx scale_rtx = NULL_RTX;
4779 int retval = 1;
4780
4781 if (REG_P (addr) || GET_CODE (addr) == SUBREG)
4782 base = addr;
4783 else if (GET_CODE (addr) == PLUS)
4784 {
4785 rtx op0 = XEXP (addr, 0);
4786 rtx op1 = XEXP (addr, 1);
4787 enum rtx_code code0 = GET_CODE (op0);
4788 enum rtx_code code1 = GET_CODE (op1);
4789
4790 if (code0 == REG || code0 == SUBREG)
4791 {
4792 if (code1 == REG || code1 == SUBREG)
4793 index = op0, base = op1; /* index + base */
4794 else
4795 base = op0, disp = op1; /* base + displacement */
4796 }
4797 else if (code0 == MULT)
4798 {
4799 index = XEXP (op0, 0);
4800 scale_rtx = XEXP (op0, 1);
4801 if (code1 == REG || code1 == SUBREG)
4802 base = op1; /* index*scale + base */
4803 else
4804 disp = op1; /* index*scale + disp */
4805 }
4806 else if (code0 == PLUS && GET_CODE (XEXP (op0, 0)) == MULT)
4807 {
4808 index = XEXP (XEXP (op0, 0), 0); /* index*scale + base + disp */
4809 scale_rtx = XEXP (XEXP (op0, 0), 1);
4810 base = XEXP (op0, 1);
4811 disp = op1;
4812 }
4813 else if (code0 == PLUS)
4814 {
4815 index = XEXP (op0, 0); /* index + base + disp */
4816 base = XEXP (op0, 1);
4817 disp = op1;
4818 }
4819 else
4820 return 0;
4821 }
4822 else if (GET_CODE (addr) == MULT)
4823 {
4824 index = XEXP (addr, 0); /* index*scale */
4825 scale_rtx = XEXP (addr, 1);
4826 }
4827 else if (GET_CODE (addr) == ASHIFT)
4828 {
4829 rtx tmp;
4830
4831 /* We're called for lea too, which implements ashift on occasion. */
4832 index = XEXP (addr, 0);
4833 tmp = XEXP (addr, 1);
4834 if (GET_CODE (tmp) != CONST_INT)
4835 return 0;
4836 scale = INTVAL (tmp);
4837 if ((unsigned HOST_WIDE_INT) scale > 3)
4838 return 0;
4839 scale = 1 << scale;
4840 retval = -1;
4841 }
4842 else
4843 disp = addr; /* displacement */
4844
4845 /* Extract the integral value of scale. */
4846 if (scale_rtx)
4847 {
4848 if (GET_CODE (scale_rtx) != CONST_INT)
4849 return 0;
4850 scale = INTVAL (scale_rtx);
4851 }
4852
4853 /* Allow arg pointer and stack pointer as index if there is not scaling */
4854 if (base && index && scale == 1
4855 && (index == arg_pointer_rtx || index == frame_pointer_rtx
4856 || index == stack_pointer_rtx))
4857 {
4858 rtx tmp = base;
4859 base = index;
4860 index = tmp;
4861 }
4862
4863 /* Special case: %ebp cannot be encoded as a base without a displacement. */
4864 if ((base == hard_frame_pointer_rtx
4865 || base == frame_pointer_rtx
4866 || base == arg_pointer_rtx) && !disp)
4867 disp = const0_rtx;
4868
4869 /* Special case: on K6, [%esi] makes the instruction vector decoded.
4870 Avoid this by transforming to [%esi+0]. */
4871 if (ix86_cpu == PROCESSOR_K6 && !optimize_size
4872 && base && !index && !disp
4873 && REG_P (base)
4874 && REGNO_REG_CLASS (REGNO (base)) == SIREG)
4875 disp = const0_rtx;
4876
4877 /* Special case: encode reg+reg instead of reg*2. */
4878 if (!base && index && scale && scale == 2)
4879 base = index, scale = 1;
4880
4881 /* Special case: scaling cannot be encoded without base or displacement. */
4882 if (!base && !disp && index && scale != 1)
4883 disp = const0_rtx;
4884
4885 out->base = base;
4886 out->index = index;
4887 out->disp = disp;
4888 out->scale = scale;
4889
4890 return retval;
4891 }
4892 \f
4893 /* Return cost of the memory address x.
4894 For i386, it is better to use a complex address than let gcc copy
4895 the address into a reg and make a new pseudo. But not if the address
4896 requires to two regs - that would mean more pseudos with longer
4897 lifetimes. */
4898 int
4899 ix86_address_cost (x)
4900 rtx x;
4901 {
4902 struct ix86_address parts;
4903 int cost = 1;
4904
4905 if (!ix86_decompose_address (x, &parts))
4906 abort ();
4907
4908 if (parts.base && GET_CODE (parts.base) == SUBREG)
4909 parts.base = SUBREG_REG (parts.base);
4910 if (parts.index && GET_CODE (parts.index) == SUBREG)
4911 parts.index = SUBREG_REG (parts.index);
4912
4913 /* More complex memory references are better. */
4914 if (parts.disp && parts.disp != const0_rtx)
4915 cost--;
4916
4917 /* Attempt to minimize number of registers in the address. */
4918 if ((parts.base
4919 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
4920 || (parts.index
4921 && (!REG_P (parts.index)
4922 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
4923 cost++;
4924
4925 if (parts.base
4926 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
4927 && parts.index
4928 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
4929 && parts.base != parts.index)
4930 cost++;
4931
4932 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
4933 since it's predecode logic can't detect the length of instructions
4934 and it degenerates to vector decoded. Increase cost of such
4935 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
4936 to split such addresses or even refuse such addresses at all.
4937
4938 Following addressing modes are affected:
4939 [base+scale*index]
4940 [scale*index+disp]
4941 [base+index]
4942
4943 The first and last case may be avoidable by explicitly coding the zero in
4944 memory address, but I don't have AMD-K6 machine handy to check this
4945 theory. */
4946
4947 if (TARGET_K6
4948 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
4949 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
4950 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
4951 cost += 10;
4952
4953 return cost;
4954 }
4955 \f
4956 /* If X is a machine specific address (i.e. a symbol or label being
4957 referenced as a displacement from the GOT implemented using an
4958 UNSPEC), then return the base term. Otherwise return X. */
4959
4960 rtx
4961 ix86_find_base_term (x)
4962 rtx x;
4963 {
4964 rtx term;
4965
4966 if (TARGET_64BIT)
4967 {
4968 if (GET_CODE (x) != CONST)
4969 return x;
4970 term = XEXP (x, 0);
4971 if (GET_CODE (term) == PLUS
4972 && (GET_CODE (XEXP (term, 1)) == CONST_INT
4973 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
4974 term = XEXP (term, 0);
4975 if (GET_CODE (term) != UNSPEC
4976 || XINT (term, 1) != UNSPEC_GOTPCREL)
4977 return x;
4978
4979 term = XVECEXP (term, 0, 0);
4980
4981 if (GET_CODE (term) != SYMBOL_REF
4982 && GET_CODE (term) != LABEL_REF)
4983 return x;
4984
4985 return term;
4986 }
4987
4988 if (GET_CODE (x) != PLUS
4989 || XEXP (x, 0) != pic_offset_table_rtx
4990 || GET_CODE (XEXP (x, 1)) != CONST)
4991 return x;
4992
4993 term = XEXP (XEXP (x, 1), 0);
4994
4995 if (GET_CODE (term) == PLUS && GET_CODE (XEXP (term, 1)) == CONST_INT)
4996 term = XEXP (term, 0);
4997
4998 if (GET_CODE (term) != UNSPEC
4999 || XINT (term, 1) != UNSPEC_GOTOFF)
5000 return x;
5001
5002 term = XVECEXP (term, 0, 0);
5003
5004 if (GET_CODE (term) != SYMBOL_REF
5005 && GET_CODE (term) != LABEL_REF)
5006 return x;
5007
5008 return term;
5009 }
5010 \f
5011 /* Determine if a given RTX is a valid constant. We already know this
5012 satisfies CONSTANT_P. */
5013
5014 bool
5015 legitimate_constant_p (x)
5016 rtx x;
5017 {
5018 rtx inner;
5019
5020 switch (GET_CODE (x))
5021 {
5022 case SYMBOL_REF:
5023 /* TLS symbols are not constant. */
5024 if (tls_symbolic_operand (x, Pmode))
5025 return false;
5026 break;
5027
5028 case CONST:
5029 inner = XEXP (x, 0);
5030
5031 /* Offsets of TLS symbols are never valid.
5032 Discourage CSE from creating them. */
5033 if (GET_CODE (inner) == PLUS
5034 && tls_symbolic_operand (XEXP (inner, 0), Pmode))
5035 return false;
5036
5037 /* Only some unspecs are valid as "constants". */
5038 if (GET_CODE (inner) == UNSPEC)
5039 switch (XINT (inner, 1))
5040 {
5041 case UNSPEC_TPOFF:
5042 return local_exec_symbolic_operand (XVECEXP (inner, 0, 0), Pmode);
5043 default:
5044 return false;
5045 }
5046 break;
5047
5048 default:
5049 break;
5050 }
5051
5052 /* Otherwise we handle everything else in the move patterns. */
5053 return true;
5054 }
5055
5056 /* Determine if it's legal to put X into the constant pool. This
5057 is not possible for the address of thread-local symbols, which
5058 is checked above. */
5059
5060 static bool
5061 ix86_cannot_force_const_mem (x)
5062 rtx x;
5063 {
5064 return !legitimate_constant_p (x);
5065 }
5066
5067 /* Determine if a given RTX is a valid constant address. */
5068
5069 bool
5070 constant_address_p (x)
5071 rtx x;
5072 {
5073 switch (GET_CODE (x))
5074 {
5075 case LABEL_REF:
5076 case CONST_INT:
5077 return true;
5078
5079 case CONST_DOUBLE:
5080 return TARGET_64BIT;
5081
5082 case CONST:
5083 /* For Mach-O, really believe the CONST. */
5084 if (TARGET_MACHO)
5085 return true;
5086 /* Otherwise fall through. */
5087 case SYMBOL_REF:
5088 return !flag_pic && legitimate_constant_p (x);
5089
5090 default:
5091 return false;
5092 }
5093 }
5094
5095 /* Nonzero if the constant value X is a legitimate general operand
5096 when generating PIC code. It is given that flag_pic is on and
5097 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
5098
5099 bool
5100 legitimate_pic_operand_p (x)
5101 rtx x;
5102 {
5103 rtx inner;
5104
5105 switch (GET_CODE (x))
5106 {
5107 case CONST:
5108 inner = XEXP (x, 0);
5109
5110 /* Only some unspecs are valid as "constants". */
5111 if (GET_CODE (inner) == UNSPEC)
5112 switch (XINT (inner, 1))
5113 {
5114 case UNSPEC_TPOFF:
5115 return local_exec_symbolic_operand (XVECEXP (inner, 0, 0), Pmode);
5116 default:
5117 return false;
5118 }
5119 /* FALLTHRU */
5120
5121 case SYMBOL_REF:
5122 case LABEL_REF:
5123 return legitimate_pic_address_disp_p (x);
5124
5125 default:
5126 return true;
5127 }
5128 }
5129
5130 /* Determine if a given CONST RTX is a valid memory displacement
5131 in PIC mode. */
5132
5133 int
5134 legitimate_pic_address_disp_p (disp)
5135 register rtx disp;
5136 {
5137 bool saw_plus;
5138
5139 /* In 64bit mode we can allow direct addresses of symbols and labels
5140 when they are not dynamic symbols. */
5141 if (TARGET_64BIT)
5142 {
5143 /* TLS references should always be enclosed in UNSPEC. */
5144 if (tls_symbolic_operand (disp, GET_MODE (disp)))
5145 return 0;
5146 if (GET_CODE (disp) == SYMBOL_REF
5147 && ix86_cmodel == CM_SMALL_PIC
5148 && (CONSTANT_POOL_ADDRESS_P (disp)
5149 || SYMBOL_REF_FLAG (disp)))
5150 return 1;
5151 if (GET_CODE (disp) == LABEL_REF)
5152 return 1;
5153 if (GET_CODE (disp) == CONST
5154 && GET_CODE (XEXP (disp, 0)) == PLUS
5155 && ((GET_CODE (XEXP (XEXP (disp, 0), 0)) == SYMBOL_REF
5156 && ix86_cmodel == CM_SMALL_PIC
5157 && (CONSTANT_POOL_ADDRESS_P (XEXP (XEXP (disp, 0), 0))
5158 || SYMBOL_REF_FLAG (XEXP (XEXP (disp, 0), 0))))
5159 || GET_CODE (XEXP (XEXP (disp, 0), 0)) == LABEL_REF)
5160 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT
5161 && INTVAL (XEXP (XEXP (disp, 0), 1)) < 16*1024*1024
5162 && INTVAL (XEXP (XEXP (disp, 0), 1)) >= -16*1024*1024)
5163 return 1;
5164 }
5165 if (GET_CODE (disp) != CONST)
5166 return 0;
5167 disp = XEXP (disp, 0);
5168
5169 if (TARGET_64BIT)
5170 {
5171 /* We are unsafe to allow PLUS expressions. This limit allowed distance
5172 of GOT tables. We should not need these anyway. */
5173 if (GET_CODE (disp) != UNSPEC
5174 || XINT (disp, 1) != UNSPEC_GOTPCREL)
5175 return 0;
5176
5177 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
5178 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
5179 return 0;
5180 return 1;
5181 }
5182
5183 saw_plus = false;
5184 if (GET_CODE (disp) == PLUS)
5185 {
5186 if (GET_CODE (XEXP (disp, 1)) != CONST_INT)
5187 return 0;
5188 disp = XEXP (disp, 0);
5189 saw_plus = true;
5190 }
5191
5192 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O. */
5193 if (TARGET_MACHO && GET_CODE (disp) == MINUS)
5194 {
5195 if (GET_CODE (XEXP (disp, 0)) == LABEL_REF
5196 || GET_CODE (XEXP (disp, 0)) == SYMBOL_REF)
5197 if (GET_CODE (XEXP (disp, 1)) == SYMBOL_REF)
5198 {
5199 const char *sym_name = XSTR (XEXP (disp, 1), 0);
5200 if (strstr (sym_name, "$pb") != 0)
5201 return 1;
5202 }
5203 }
5204
5205 if (GET_CODE (disp) != UNSPEC)
5206 return 0;
5207
5208 switch (XINT (disp, 1))
5209 {
5210 case UNSPEC_GOT:
5211 if (saw_plus)
5212 return false;
5213 return GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF;
5214 case UNSPEC_GOTOFF:
5215 return local_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5216 case UNSPEC_GOTTPOFF:
5217 case UNSPEC_GOTNTPOFF:
5218 case UNSPEC_INDNTPOFF:
5219 if (saw_plus)
5220 return false;
5221 return initial_exec_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5222 case UNSPEC_NTPOFF:
5223 return local_exec_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5224 case UNSPEC_DTPOFF:
5225 return local_dynamic_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5226 }
5227
5228 return 0;
5229 }
5230
5231 /* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
5232 memory address for an instruction. The MODE argument is the machine mode
5233 for the MEM expression that wants to use this address.
5234
5235 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
5236 convert common non-canonical forms to canonical form so that they will
5237 be recognized. */
5238
5239 int
5240 legitimate_address_p (mode, addr, strict)
5241 enum machine_mode mode;
5242 register rtx addr;
5243 int strict;
5244 {
5245 struct ix86_address parts;
5246 rtx base, index, disp;
5247 HOST_WIDE_INT scale;
5248 const char *reason = NULL;
5249 rtx reason_rtx = NULL_RTX;
5250
5251 if (TARGET_DEBUG_ADDR)
5252 {
5253 fprintf (stderr,
5254 "\n======\nGO_IF_LEGITIMATE_ADDRESS, mode = %s, strict = %d\n",
5255 GET_MODE_NAME (mode), strict);
5256 debug_rtx (addr);
5257 }
5258
5259 if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_TP)
5260 {
5261 if (TARGET_DEBUG_ADDR)
5262 fprintf (stderr, "Success.\n");
5263 return TRUE;
5264 }
5265
5266 if (ix86_decompose_address (addr, &parts) <= 0)
5267 {
5268 reason = "decomposition failed";
5269 goto report_error;
5270 }
5271
5272 base = parts.base;
5273 index = parts.index;
5274 disp = parts.disp;
5275 scale = parts.scale;
5276
5277 /* Validate base register.
5278
5279 Don't allow SUBREG's here, it can lead to spill failures when the base
5280 is one word out of a two word structure, which is represented internally
5281 as a DImode int. */
5282
5283 if (base)
5284 {
5285 rtx reg;
5286 reason_rtx = base;
5287
5288 if (GET_CODE (base) == SUBREG)
5289 reg = SUBREG_REG (base);
5290 else
5291 reg = base;
5292
5293 if (GET_CODE (reg) != REG)
5294 {
5295 reason = "base is not a register";
5296 goto report_error;
5297 }
5298
5299 if (GET_MODE (base) != Pmode)
5300 {
5301 reason = "base is not in Pmode";
5302 goto report_error;
5303 }
5304
5305 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
5306 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
5307 {
5308 reason = "base is not valid";
5309 goto report_error;
5310 }
5311 }
5312
5313 /* Validate index register.
5314
5315 Don't allow SUBREG's here, it can lead to spill failures when the index
5316 is one word out of a two word structure, which is represented internally
5317 as a DImode int. */
5318
5319 if (index)
5320 {
5321 rtx reg;
5322 reason_rtx = index;
5323
5324 if (GET_CODE (index) == SUBREG)
5325 reg = SUBREG_REG (index);
5326 else
5327 reg = index;
5328
5329 if (GET_CODE (reg) != REG)
5330 {
5331 reason = "index is not a register";
5332 goto report_error;
5333 }
5334
5335 if (GET_MODE (index) != Pmode)
5336 {
5337 reason = "index is not in Pmode";
5338 goto report_error;
5339 }
5340
5341 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
5342 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
5343 {
5344 reason = "index is not valid";
5345 goto report_error;
5346 }
5347 }
5348
5349 /* Validate scale factor. */
5350 if (scale != 1)
5351 {
5352 reason_rtx = GEN_INT (scale);
5353 if (!index)
5354 {
5355 reason = "scale without index";
5356 goto report_error;
5357 }
5358
5359 if (scale != 2 && scale != 4 && scale != 8)
5360 {
5361 reason = "scale is not a valid multiplier";
5362 goto report_error;
5363 }
5364 }
5365
5366 /* Validate displacement. */
5367 if (disp)
5368 {
5369 reason_rtx = disp;
5370
5371 if (GET_CODE (disp) == CONST
5372 && GET_CODE (XEXP (disp, 0)) == UNSPEC)
5373 switch (XINT (XEXP (disp, 0), 1))
5374 {
5375 case UNSPEC_GOT:
5376 case UNSPEC_GOTOFF:
5377 case UNSPEC_GOTPCREL:
5378 if (!flag_pic)
5379 abort ();
5380 goto is_legitimate_pic;
5381
5382 case UNSPEC_GOTTPOFF:
5383 case UNSPEC_GOTNTPOFF:
5384 case UNSPEC_INDNTPOFF:
5385 case UNSPEC_NTPOFF:
5386 case UNSPEC_DTPOFF:
5387 break;
5388
5389 default:
5390 reason = "invalid address unspec";
5391 goto report_error;
5392 }
5393
5394 else if (flag_pic && (SYMBOLIC_CONST (disp)
5395 #if TARGET_MACHO
5396 && !machopic_operand_p (disp)
5397 #endif
5398 ))
5399 {
5400 is_legitimate_pic:
5401 if (TARGET_64BIT && (index || base))
5402 {
5403 /* foo@dtpoff(%rX) is ok. */
5404 if (GET_CODE (disp) != CONST
5405 || GET_CODE (XEXP (disp, 0)) != PLUS
5406 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
5407 || GET_CODE (XEXP (XEXP (disp, 0), 1)) != CONST_INT
5408 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
5409 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
5410 {
5411 reason = "non-constant pic memory reference";
5412 goto report_error;
5413 }
5414 }
5415 else if (! legitimate_pic_address_disp_p (disp))
5416 {
5417 reason = "displacement is an invalid pic construct";
5418 goto report_error;
5419 }
5420
5421 /* This code used to verify that a symbolic pic displacement
5422 includes the pic_offset_table_rtx register.
5423
5424 While this is good idea, unfortunately these constructs may
5425 be created by "adds using lea" optimization for incorrect
5426 code like:
5427
5428 int a;
5429 int foo(int i)
5430 {
5431 return *(&a+i);
5432 }
5433
5434 This code is nonsensical, but results in addressing
5435 GOT table with pic_offset_table_rtx base. We can't
5436 just refuse it easily, since it gets matched by
5437 "addsi3" pattern, that later gets split to lea in the
5438 case output register differs from input. While this
5439 can be handled by separate addsi pattern for this case
5440 that never results in lea, this seems to be easier and
5441 correct fix for crash to disable this test. */
5442 }
5443 else if (!CONSTANT_ADDRESS_P (disp))
5444 {
5445 reason = "displacement is not constant";
5446 goto report_error;
5447 }
5448 else if (TARGET_64BIT && !x86_64_sign_extended_value (disp))
5449 {
5450 reason = "displacement is out of range";
5451 goto report_error;
5452 }
5453 else if (!TARGET_64BIT && GET_CODE (disp) == CONST_DOUBLE)
5454 {
5455 reason = "displacement is a const_double";
5456 goto report_error;
5457 }
5458 }
5459
5460 /* Everything looks valid. */
5461 if (TARGET_DEBUG_ADDR)
5462 fprintf (stderr, "Success.\n");
5463 return TRUE;
5464
5465 report_error:
5466 if (TARGET_DEBUG_ADDR)
5467 {
5468 fprintf (stderr, "Error: %s\n", reason);
5469 debug_rtx (reason_rtx);
5470 }
5471 return FALSE;
5472 }
5473 \f
5474 /* Return an unique alias set for the GOT. */
5475
5476 static HOST_WIDE_INT
5477 ix86_GOT_alias_set ()
5478 {
5479 static HOST_WIDE_INT set = -1;
5480 if (set == -1)
5481 set = new_alias_set ();
5482 return set;
5483 }
5484
5485 /* Return a legitimate reference for ORIG (an address) using the
5486 register REG. If REG is 0, a new pseudo is generated.
5487
5488 There are two types of references that must be handled:
5489
5490 1. Global data references must load the address from the GOT, via
5491 the PIC reg. An insn is emitted to do this load, and the reg is
5492 returned.
5493
5494 2. Static data references, constant pool addresses, and code labels
5495 compute the address as an offset from the GOT, whose base is in
5496 the PIC reg. Static data objects have SYMBOL_REF_FLAG set to
5497 differentiate them from global data objects. The returned
5498 address is the PIC reg + an unspec constant.
5499
5500 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
5501 reg also appears in the address. */
5502
5503 rtx
5504 legitimize_pic_address (orig, reg)
5505 rtx orig;
5506 rtx reg;
5507 {
5508 rtx addr = orig;
5509 rtx new = orig;
5510 rtx base;
5511
5512 #if TARGET_MACHO
5513 if (reg == 0)
5514 reg = gen_reg_rtx (Pmode);
5515 /* Use the generic Mach-O PIC machinery. */
5516 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
5517 #endif
5518
5519 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
5520 new = addr;
5521 else if (!TARGET_64BIT && local_symbolic_operand (addr, Pmode))
5522 {
5523 /* This symbol may be referenced via a displacement from the PIC
5524 base address (@GOTOFF). */
5525
5526 if (reload_in_progress)
5527 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
5528 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
5529 new = gen_rtx_CONST (Pmode, new);
5530 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
5531
5532 if (reg != 0)
5533 {
5534 emit_move_insn (reg, new);
5535 new = reg;
5536 }
5537 }
5538 else if (GET_CODE (addr) == SYMBOL_REF)
5539 {
5540 if (TARGET_64BIT)
5541 {
5542 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
5543 new = gen_rtx_CONST (Pmode, new);
5544 new = gen_rtx_MEM (Pmode, new);
5545 RTX_UNCHANGING_P (new) = 1;
5546 set_mem_alias_set (new, ix86_GOT_alias_set ());
5547
5548 if (reg == 0)
5549 reg = gen_reg_rtx (Pmode);
5550 /* Use directly gen_movsi, otherwise the address is loaded
5551 into register for CSE. We don't want to CSE this addresses,
5552 instead we CSE addresses from the GOT table, so skip this. */
5553 emit_insn (gen_movsi (reg, new));
5554 new = reg;
5555 }
5556 else
5557 {
5558 /* This symbol must be referenced via a load from the
5559 Global Offset Table (@GOT). */
5560
5561 if (reload_in_progress)
5562 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
5563 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
5564 new = gen_rtx_CONST (Pmode, new);
5565 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
5566 new = gen_rtx_MEM (Pmode, new);
5567 RTX_UNCHANGING_P (new) = 1;
5568 set_mem_alias_set (new, ix86_GOT_alias_set ());
5569
5570 if (reg == 0)
5571 reg = gen_reg_rtx (Pmode);
5572 emit_move_insn (reg, new);
5573 new = reg;
5574 }
5575 }
5576 else
5577 {
5578 if (GET_CODE (addr) == CONST)
5579 {
5580 addr = XEXP (addr, 0);
5581
5582 /* We must match stuff we generate before. Assume the only
5583 unspecs that can get here are ours. Not that we could do
5584 anything with them anyway... */
5585 if (GET_CODE (addr) == UNSPEC
5586 || (GET_CODE (addr) == PLUS
5587 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
5588 return orig;
5589 if (GET_CODE (addr) != PLUS)
5590 abort ();
5591 }
5592 if (GET_CODE (addr) == PLUS)
5593 {
5594 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
5595
5596 /* Check first to see if this is a constant offset from a @GOTOFF
5597 symbol reference. */
5598 if (local_symbolic_operand (op0, Pmode)
5599 && GET_CODE (op1) == CONST_INT)
5600 {
5601 if (!TARGET_64BIT)
5602 {
5603 if (reload_in_progress)
5604 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
5605 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
5606 UNSPEC_GOTOFF);
5607 new = gen_rtx_PLUS (Pmode, new, op1);
5608 new = gen_rtx_CONST (Pmode, new);
5609 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
5610
5611 if (reg != 0)
5612 {
5613 emit_move_insn (reg, new);
5614 new = reg;
5615 }
5616 }
5617 else
5618 {
5619 if (INTVAL (op1) < -16*1024*1024
5620 || INTVAL (op1) >= 16*1024*1024)
5621 new = gen_rtx_PLUS (Pmode, op0, force_reg (Pmode, op1));
5622 }
5623 }
5624 else
5625 {
5626 base = legitimize_pic_address (XEXP (addr, 0), reg);
5627 new = legitimize_pic_address (XEXP (addr, 1),
5628 base == reg ? NULL_RTX : reg);
5629
5630 if (GET_CODE (new) == CONST_INT)
5631 new = plus_constant (base, INTVAL (new));
5632 else
5633 {
5634 if (GET_CODE (new) == PLUS && CONSTANT_P (XEXP (new, 1)))
5635 {
5636 base = gen_rtx_PLUS (Pmode, base, XEXP (new, 0));
5637 new = XEXP (new, 1);
5638 }
5639 new = gen_rtx_PLUS (Pmode, base, new);
5640 }
5641 }
5642 }
5643 }
5644 return new;
5645 }
5646
5647 static void
5648 ix86_encode_section_info (decl, first)
5649 tree decl;
5650 int first ATTRIBUTE_UNUSED;
5651 {
5652 bool local_p = (*targetm.binds_local_p) (decl);
5653 rtx rtl, symbol;
5654
5655 rtl = DECL_P (decl) ? DECL_RTL (decl) : TREE_CST_RTL (decl);
5656 if (GET_CODE (rtl) != MEM)
5657 return;
5658 symbol = XEXP (rtl, 0);
5659 if (GET_CODE (symbol) != SYMBOL_REF)
5660 return;
5661
5662 /* For basic x86, if using PIC, mark a SYMBOL_REF for a non-global
5663 symbol so that we may access it directly in the GOT. */
5664
5665 if (flag_pic)
5666 SYMBOL_REF_FLAG (symbol) = local_p;
5667
5668 /* For ELF, encode thread-local data with %[GLil] for "global dynamic",
5669 "local dynamic", "initial exec" or "local exec" TLS models
5670 respectively. */
5671
5672 if (TREE_CODE (decl) == VAR_DECL && DECL_THREAD_LOCAL (decl))
5673 {
5674 const char *symbol_str;
5675 char *newstr;
5676 size_t len;
5677 enum tls_model kind = decl_tls_model (decl);
5678
5679 if (TARGET_64BIT && ! flag_pic)
5680 {
5681 /* x86-64 doesn't allow non-pic code for shared libraries,
5682 so don't generate GD/LD TLS models for non-pic code. */
5683 switch (kind)
5684 {
5685 case TLS_MODEL_GLOBAL_DYNAMIC:
5686 kind = TLS_MODEL_INITIAL_EXEC; break;
5687 case TLS_MODEL_LOCAL_DYNAMIC:
5688 kind = TLS_MODEL_LOCAL_EXEC; break;
5689 default:
5690 break;
5691 }
5692 }
5693
5694 symbol_str = XSTR (symbol, 0);
5695
5696 if (symbol_str[0] == '%')
5697 {
5698 if (symbol_str[1] == tls_model_chars[kind])
5699 return;
5700 symbol_str += 2;
5701 }
5702 len = strlen (symbol_str) + 1;
5703 newstr = alloca (len + 2);
5704
5705 newstr[0] = '%';
5706 newstr[1] = tls_model_chars[kind];
5707 memcpy (newstr + 2, symbol_str, len);
5708
5709 XSTR (symbol, 0) = ggc_alloc_string (newstr, len + 2 - 1);
5710 }
5711 }
5712
5713 /* Undo the above when printing symbol names. */
5714
5715 static const char *
5716 ix86_strip_name_encoding (str)
5717 const char *str;
5718 {
5719 if (str[0] == '%')
5720 str += 2;
5721 if (str [0] == '*')
5722 str += 1;
5723 return str;
5724 }
5725 \f
5726 /* Load the thread pointer into a register. */
5727
5728 static rtx
5729 get_thread_pointer ()
5730 {
5731 rtx tp;
5732
5733 tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
5734 tp = gen_rtx_MEM (Pmode, tp);
5735 RTX_UNCHANGING_P (tp) = 1;
5736 set_mem_alias_set (tp, ix86_GOT_alias_set ());
5737 tp = force_reg (Pmode, tp);
5738
5739 return tp;
5740 }
5741
5742 /* Try machine-dependent ways of modifying an illegitimate address
5743 to be legitimate. If we find one, return the new, valid address.
5744 This macro is used in only one place: `memory_address' in explow.c.
5745
5746 OLDX is the address as it was before break_out_memory_refs was called.
5747 In some cases it is useful to look at this to decide what needs to be done.
5748
5749 MODE and WIN are passed so that this macro can use
5750 GO_IF_LEGITIMATE_ADDRESS.
5751
5752 It is always safe for this macro to do nothing. It exists to recognize
5753 opportunities to optimize the output.
5754
5755 For the 80386, we handle X+REG by loading X into a register R and
5756 using R+REG. R will go in a general reg and indexing will be used.
5757 However, if REG is a broken-out memory address or multiplication,
5758 nothing needs to be done because REG can certainly go in a general reg.
5759
5760 When -fpic is used, special handling is needed for symbolic references.
5761 See comments by legitimize_pic_address in i386.c for details. */
5762
5763 rtx
5764 legitimize_address (x, oldx, mode)
5765 register rtx x;
5766 register rtx oldx ATTRIBUTE_UNUSED;
5767 enum machine_mode mode;
5768 {
5769 int changed = 0;
5770 unsigned log;
5771
5772 if (TARGET_DEBUG_ADDR)
5773 {
5774 fprintf (stderr, "\n==========\nLEGITIMIZE_ADDRESS, mode = %s\n",
5775 GET_MODE_NAME (mode));
5776 debug_rtx (x);
5777 }
5778
5779 log = tls_symbolic_operand (x, mode);
5780 if (log)
5781 {
5782 rtx dest, base, off, pic;
5783 int type;
5784
5785 switch (log)
5786 {
5787 case TLS_MODEL_GLOBAL_DYNAMIC:
5788 dest = gen_reg_rtx (Pmode);
5789 if (TARGET_64BIT)
5790 {
5791 rtx rax = gen_rtx_REG (Pmode, 0), insns;
5792
5793 start_sequence ();
5794 emit_call_insn (gen_tls_global_dynamic_64 (rax, x));
5795 insns = get_insns ();
5796 end_sequence ();
5797
5798 emit_libcall_block (insns, dest, rax, x);
5799 }
5800 else
5801 emit_insn (gen_tls_global_dynamic_32 (dest, x));
5802 break;
5803
5804 case TLS_MODEL_LOCAL_DYNAMIC:
5805 base = gen_reg_rtx (Pmode);
5806 if (TARGET_64BIT)
5807 {
5808 rtx rax = gen_rtx_REG (Pmode, 0), insns, note;
5809
5810 start_sequence ();
5811 emit_call_insn (gen_tls_local_dynamic_base_64 (rax));
5812 insns = get_insns ();
5813 end_sequence ();
5814
5815 note = gen_rtx_EXPR_LIST (VOIDmode, const0_rtx, NULL);
5816 note = gen_rtx_EXPR_LIST (VOIDmode, ix86_tls_get_addr (), note);
5817 emit_libcall_block (insns, base, rax, note);
5818 }
5819 else
5820 emit_insn (gen_tls_local_dynamic_base_32 (base));
5821
5822 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
5823 off = gen_rtx_CONST (Pmode, off);
5824
5825 return gen_rtx_PLUS (Pmode, base, off);
5826
5827 case TLS_MODEL_INITIAL_EXEC:
5828 if (TARGET_64BIT)
5829 {
5830 pic = NULL;
5831 type = UNSPEC_GOTNTPOFF;
5832 }
5833 else if (flag_pic)
5834 {
5835 if (reload_in_progress)
5836 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
5837 pic = pic_offset_table_rtx;
5838 type = TARGET_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
5839 }
5840 else if (!TARGET_GNU_TLS)
5841 {
5842 pic = gen_reg_rtx (Pmode);
5843 emit_insn (gen_set_got (pic));
5844 type = UNSPEC_GOTTPOFF;
5845 }
5846 else
5847 {
5848 pic = NULL;
5849 type = UNSPEC_INDNTPOFF;
5850 }
5851
5852 base = get_thread_pointer ();
5853
5854 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type);
5855 off = gen_rtx_CONST (Pmode, off);
5856 if (pic)
5857 off = gen_rtx_PLUS (Pmode, pic, off);
5858 off = gen_rtx_MEM (Pmode, off);
5859 RTX_UNCHANGING_P (off) = 1;
5860 set_mem_alias_set (off, ix86_GOT_alias_set ());
5861 dest = gen_reg_rtx (Pmode);
5862
5863 if (TARGET_64BIT || TARGET_GNU_TLS)
5864 {
5865 emit_move_insn (dest, off);
5866 return gen_rtx_PLUS (Pmode, base, dest);
5867 }
5868 else
5869 emit_insn (gen_subsi3 (dest, base, off));
5870 break;
5871
5872 case TLS_MODEL_LOCAL_EXEC:
5873 base = get_thread_pointer ();
5874
5875 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
5876 (TARGET_64BIT || TARGET_GNU_TLS)
5877 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
5878 off = gen_rtx_CONST (Pmode, off);
5879
5880 if (TARGET_64BIT || TARGET_GNU_TLS)
5881 return gen_rtx_PLUS (Pmode, base, off);
5882 else
5883 {
5884 dest = gen_reg_rtx (Pmode);
5885 emit_insn (gen_subsi3 (dest, base, off));
5886 }
5887 break;
5888
5889 default:
5890 abort ();
5891 }
5892
5893 return dest;
5894 }
5895
5896 if (flag_pic && SYMBOLIC_CONST (x))
5897 return legitimize_pic_address (x, 0);
5898
5899 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
5900 if (GET_CODE (x) == ASHIFT
5901 && GET_CODE (XEXP (x, 1)) == CONST_INT
5902 && (log = (unsigned) exact_log2 (INTVAL (XEXP (x, 1)))) < 4)
5903 {
5904 changed = 1;
5905 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
5906 GEN_INT (1 << log));
5907 }
5908
5909 if (GET_CODE (x) == PLUS)
5910 {
5911 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
5912
5913 if (GET_CODE (XEXP (x, 0)) == ASHIFT
5914 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
5915 && (log = (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)))) < 4)
5916 {
5917 changed = 1;
5918 XEXP (x, 0) = gen_rtx_MULT (Pmode,
5919 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
5920 GEN_INT (1 << log));
5921 }
5922
5923 if (GET_CODE (XEXP (x, 1)) == ASHIFT
5924 && GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT
5925 && (log = (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x, 1), 1)))) < 4)
5926 {
5927 changed = 1;
5928 XEXP (x, 1) = gen_rtx_MULT (Pmode,
5929 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
5930 GEN_INT (1 << log));
5931 }
5932
5933 /* Put multiply first if it isn't already. */
5934 if (GET_CODE (XEXP (x, 1)) == MULT)
5935 {
5936 rtx tmp = XEXP (x, 0);
5937 XEXP (x, 0) = XEXP (x, 1);
5938 XEXP (x, 1) = tmp;
5939 changed = 1;
5940 }
5941
5942 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
5943 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
5944 created by virtual register instantiation, register elimination, and
5945 similar optimizations. */
5946 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
5947 {
5948 changed = 1;
5949 x = gen_rtx_PLUS (Pmode,
5950 gen_rtx_PLUS (Pmode, XEXP (x, 0),
5951 XEXP (XEXP (x, 1), 0)),
5952 XEXP (XEXP (x, 1), 1));
5953 }
5954
5955 /* Canonicalize
5956 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
5957 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
5958 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
5959 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
5960 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
5961 && CONSTANT_P (XEXP (x, 1)))
5962 {
5963 rtx constant;
5964 rtx other = NULL_RTX;
5965
5966 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
5967 {
5968 constant = XEXP (x, 1);
5969 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
5970 }
5971 else if (GET_CODE (XEXP (XEXP (XEXP (x, 0), 1), 1)) == CONST_INT)
5972 {
5973 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
5974 other = XEXP (x, 1);
5975 }
5976 else
5977 constant = 0;
5978
5979 if (constant)
5980 {
5981 changed = 1;
5982 x = gen_rtx_PLUS (Pmode,
5983 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
5984 XEXP (XEXP (XEXP (x, 0), 1), 0)),
5985 plus_constant (other, INTVAL (constant)));
5986 }
5987 }
5988
5989 if (changed && legitimate_address_p (mode, x, FALSE))
5990 return x;
5991
5992 if (GET_CODE (XEXP (x, 0)) == MULT)
5993 {
5994 changed = 1;
5995 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
5996 }
5997
5998 if (GET_CODE (XEXP (x, 1)) == MULT)
5999 {
6000 changed = 1;
6001 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
6002 }
6003
6004 if (changed
6005 && GET_CODE (XEXP (x, 1)) == REG
6006 && GET_CODE (XEXP (x, 0)) == REG)
6007 return x;
6008
6009 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
6010 {
6011 changed = 1;
6012 x = legitimize_pic_address (x, 0);
6013 }
6014
6015 if (changed && legitimate_address_p (mode, x, FALSE))
6016 return x;
6017
6018 if (GET_CODE (XEXP (x, 0)) == REG)
6019 {
6020 register rtx temp = gen_reg_rtx (Pmode);
6021 register rtx val = force_operand (XEXP (x, 1), temp);
6022 if (val != temp)
6023 emit_move_insn (temp, val);
6024
6025 XEXP (x, 1) = temp;
6026 return x;
6027 }
6028
6029 else if (GET_CODE (XEXP (x, 1)) == REG)
6030 {
6031 register rtx temp = gen_reg_rtx (Pmode);
6032 register rtx val = force_operand (XEXP (x, 0), temp);
6033 if (val != temp)
6034 emit_move_insn (temp, val);
6035
6036 XEXP (x, 0) = temp;
6037 return x;
6038 }
6039 }
6040
6041 return x;
6042 }
6043 \f
6044 /* Print an integer constant expression in assembler syntax. Addition
6045 and subtraction are the only arithmetic that may appear in these
6046 expressions. FILE is the stdio stream to write to, X is the rtx, and
6047 CODE is the operand print code from the output string. */
6048
6049 static void
6050 output_pic_addr_const (file, x, code)
6051 FILE *file;
6052 rtx x;
6053 int code;
6054 {
6055 char buf[256];
6056
6057 switch (GET_CODE (x))
6058 {
6059 case PC:
6060 if (flag_pic)
6061 putc ('.', file);
6062 else
6063 abort ();
6064 break;
6065
6066 case SYMBOL_REF:
6067 assemble_name (file, XSTR (x, 0));
6068 if (!TARGET_MACHO && code == 'P' && ! SYMBOL_REF_FLAG (x))
6069 fputs ("@PLT", file);
6070 break;
6071
6072 case LABEL_REF:
6073 x = XEXP (x, 0);
6074 /* FALLTHRU */
6075 case CODE_LABEL:
6076 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
6077 assemble_name (asm_out_file, buf);
6078 break;
6079
6080 case CONST_INT:
6081 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
6082 break;
6083
6084 case CONST:
6085 /* This used to output parentheses around the expression,
6086 but that does not work on the 386 (either ATT or BSD assembler). */
6087 output_pic_addr_const (file, XEXP (x, 0), code);
6088 break;
6089
6090 case CONST_DOUBLE:
6091 if (GET_MODE (x) == VOIDmode)
6092 {
6093 /* We can use %d if the number is <32 bits and positive. */
6094 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
6095 fprintf (file, "0x%lx%08lx",
6096 (unsigned long) CONST_DOUBLE_HIGH (x),
6097 (unsigned long) CONST_DOUBLE_LOW (x));
6098 else
6099 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
6100 }
6101 else
6102 /* We can't handle floating point constants;
6103 PRINT_OPERAND must handle them. */
6104 output_operand_lossage ("floating constant misused");
6105 break;
6106
6107 case PLUS:
6108 /* Some assemblers need integer constants to appear first. */
6109 if (GET_CODE (XEXP (x, 0)) == CONST_INT)
6110 {
6111 output_pic_addr_const (file, XEXP (x, 0), code);
6112 putc ('+', file);
6113 output_pic_addr_const (file, XEXP (x, 1), code);
6114 }
6115 else if (GET_CODE (XEXP (x, 1)) == CONST_INT)
6116 {
6117 output_pic_addr_const (file, XEXP (x, 1), code);
6118 putc ('+', file);
6119 output_pic_addr_const (file, XEXP (x, 0), code);
6120 }
6121 else
6122 abort ();
6123 break;
6124
6125 case MINUS:
6126 if (!TARGET_MACHO)
6127 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
6128 output_pic_addr_const (file, XEXP (x, 0), code);
6129 putc ('-', file);
6130 output_pic_addr_const (file, XEXP (x, 1), code);
6131 if (!TARGET_MACHO)
6132 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
6133 break;
6134
6135 case UNSPEC:
6136 if (XVECLEN (x, 0) != 1)
6137 abort ();
6138 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
6139 switch (XINT (x, 1))
6140 {
6141 case UNSPEC_GOT:
6142 fputs ("@GOT", file);
6143 break;
6144 case UNSPEC_GOTOFF:
6145 fputs ("@GOTOFF", file);
6146 break;
6147 case UNSPEC_GOTPCREL:
6148 fputs ("@GOTPCREL(%rip)", file);
6149 break;
6150 case UNSPEC_GOTTPOFF:
6151 /* FIXME: This might be @TPOFF in Sun ld too. */
6152 fputs ("@GOTTPOFF", file);
6153 break;
6154 case UNSPEC_TPOFF:
6155 fputs ("@TPOFF", file);
6156 break;
6157 case UNSPEC_NTPOFF:
6158 if (TARGET_64BIT)
6159 fputs ("@TPOFF", file);
6160 else
6161 fputs ("@NTPOFF", file);
6162 break;
6163 case UNSPEC_DTPOFF:
6164 fputs ("@DTPOFF", file);
6165 break;
6166 case UNSPEC_GOTNTPOFF:
6167 if (TARGET_64BIT)
6168 fputs ("@GOTTPOFF(%rip)", file);
6169 else
6170 fputs ("@GOTNTPOFF", file);
6171 break;
6172 case UNSPEC_INDNTPOFF:
6173 fputs ("@INDNTPOFF", file);
6174 break;
6175 default:
6176 output_operand_lossage ("invalid UNSPEC as operand");
6177 break;
6178 }
6179 break;
6180
6181 default:
6182 output_operand_lossage ("invalid expression as operand");
6183 }
6184 }
6185
6186 /* This is called from dwarfout.c via ASM_OUTPUT_DWARF_ADDR_CONST.
6187 We need to handle our special PIC relocations. */
6188
6189 void
6190 i386_dwarf_output_addr_const (file, x)
6191 FILE *file;
6192 rtx x;
6193 {
6194 #ifdef ASM_QUAD
6195 fprintf (file, "%s", TARGET_64BIT ? ASM_QUAD : ASM_LONG);
6196 #else
6197 if (TARGET_64BIT)
6198 abort ();
6199 fprintf (file, "%s", ASM_LONG);
6200 #endif
6201 if (flag_pic)
6202 output_pic_addr_const (file, x, '\0');
6203 else
6204 output_addr_const (file, x);
6205 fputc ('\n', file);
6206 }
6207
6208 /* This is called from dwarf2out.c via ASM_OUTPUT_DWARF_DTPREL.
6209 We need to emit DTP-relative relocations. */
6210
6211 void
6212 i386_output_dwarf_dtprel (file, size, x)
6213 FILE *file;
6214 int size;
6215 rtx x;
6216 {
6217 fputs (ASM_LONG, file);
6218 output_addr_const (file, x);
6219 fputs ("@DTPOFF", file);
6220 switch (size)
6221 {
6222 case 4:
6223 break;
6224 case 8:
6225 fputs (", 0", file);
6226 break;
6227 default:
6228 abort ();
6229 }
6230 }
6231
6232 /* In the name of slightly smaller debug output, and to cater to
6233 general assembler losage, recognize PIC+GOTOFF and turn it back
6234 into a direct symbol reference. */
6235
6236 rtx
6237 i386_simplify_dwarf_addr (orig_x)
6238 rtx orig_x;
6239 {
6240 rtx x = orig_x, y;
6241
6242 if (GET_CODE (x) == MEM)
6243 x = XEXP (x, 0);
6244
6245 if (TARGET_64BIT)
6246 {
6247 if (GET_CODE (x) != CONST
6248 || GET_CODE (XEXP (x, 0)) != UNSPEC
6249 || XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
6250 || GET_CODE (orig_x) != MEM)
6251 return orig_x;
6252 return XVECEXP (XEXP (x, 0), 0, 0);
6253 }
6254
6255 if (GET_CODE (x) != PLUS
6256 || GET_CODE (XEXP (x, 1)) != CONST)
6257 return orig_x;
6258
6259 if (GET_CODE (XEXP (x, 0)) == REG
6260 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM)
6261 /* %ebx + GOT/GOTOFF */
6262 y = NULL;
6263 else if (GET_CODE (XEXP (x, 0)) == PLUS)
6264 {
6265 /* %ebx + %reg * scale + GOT/GOTOFF */
6266 y = XEXP (x, 0);
6267 if (GET_CODE (XEXP (y, 0)) == REG
6268 && REGNO (XEXP (y, 0)) == PIC_OFFSET_TABLE_REGNUM)
6269 y = XEXP (y, 1);
6270 else if (GET_CODE (XEXP (y, 1)) == REG
6271 && REGNO (XEXP (y, 1)) == PIC_OFFSET_TABLE_REGNUM)
6272 y = XEXP (y, 0);
6273 else
6274 return orig_x;
6275 if (GET_CODE (y) != REG
6276 && GET_CODE (y) != MULT
6277 && GET_CODE (y) != ASHIFT)
6278 return orig_x;
6279 }
6280 else
6281 return orig_x;
6282
6283 x = XEXP (XEXP (x, 1), 0);
6284 if (GET_CODE (x) == UNSPEC
6285 && ((XINT (x, 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM)
6286 || (XINT (x, 1) == UNSPEC_GOTOFF && GET_CODE (orig_x) != MEM)))
6287 {
6288 if (y)
6289 return gen_rtx_PLUS (Pmode, y, XVECEXP (x, 0, 0));
6290 return XVECEXP (x, 0, 0);
6291 }
6292
6293 if (GET_CODE (x) == PLUS
6294 && GET_CODE (XEXP (x, 0)) == UNSPEC
6295 && GET_CODE (XEXP (x, 1)) == CONST_INT
6296 && ((XINT (XEXP (x, 0), 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM)
6297 || (XINT (XEXP (x, 0), 1) == UNSPEC_GOTOFF
6298 && GET_CODE (orig_x) != MEM)))
6299 {
6300 x = gen_rtx_PLUS (VOIDmode, XVECEXP (XEXP (x, 0), 0, 0), XEXP (x, 1));
6301 if (y)
6302 return gen_rtx_PLUS (Pmode, y, x);
6303 return x;
6304 }
6305
6306 return orig_x;
6307 }
6308 \f
6309 static void
6310 put_condition_code (code, mode, reverse, fp, file)
6311 enum rtx_code code;
6312 enum machine_mode mode;
6313 int reverse, fp;
6314 FILE *file;
6315 {
6316 const char *suffix;
6317
6318 if (mode == CCFPmode || mode == CCFPUmode)
6319 {
6320 enum rtx_code second_code, bypass_code;
6321 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
6322 if (bypass_code != NIL || second_code != NIL)
6323 abort ();
6324 code = ix86_fp_compare_code_to_integer (code);
6325 mode = CCmode;
6326 }
6327 if (reverse)
6328 code = reverse_condition (code);
6329
6330 switch (code)
6331 {
6332 case EQ:
6333 suffix = "e";
6334 break;
6335 case NE:
6336 suffix = "ne";
6337 break;
6338 case GT:
6339 if (mode != CCmode && mode != CCNOmode && mode != CCGCmode)
6340 abort ();
6341 suffix = "g";
6342 break;
6343 case GTU:
6344 /* ??? Use "nbe" instead of "a" for fcmov losage on some assemblers.
6345 Those same assemblers have the same but opposite losage on cmov. */
6346 if (mode != CCmode)
6347 abort ();
6348 suffix = fp ? "nbe" : "a";
6349 break;
6350 case LT:
6351 if (mode == CCNOmode || mode == CCGOCmode)
6352 suffix = "s";
6353 else if (mode == CCmode || mode == CCGCmode)
6354 suffix = "l";
6355 else
6356 abort ();
6357 break;
6358 case LTU:
6359 if (mode != CCmode)
6360 abort ();
6361 suffix = "b";
6362 break;
6363 case GE:
6364 if (mode == CCNOmode || mode == CCGOCmode)
6365 suffix = "ns";
6366 else if (mode == CCmode || mode == CCGCmode)
6367 suffix = "ge";
6368 else
6369 abort ();
6370 break;
6371 case GEU:
6372 /* ??? As above. */
6373 if (mode != CCmode)
6374 abort ();
6375 suffix = fp ? "nb" : "ae";
6376 break;
6377 case LE:
6378 if (mode != CCmode && mode != CCGCmode && mode != CCNOmode)
6379 abort ();
6380 suffix = "le";
6381 break;
6382 case LEU:
6383 if (mode != CCmode)
6384 abort ();
6385 suffix = "be";
6386 break;
6387 case UNORDERED:
6388 suffix = fp ? "u" : "p";
6389 break;
6390 case ORDERED:
6391 suffix = fp ? "nu" : "np";
6392 break;
6393 default:
6394 abort ();
6395 }
6396 fputs (suffix, file);
6397 }
6398
6399 void
6400 print_reg (x, code, file)
6401 rtx x;
6402 int code;
6403 FILE *file;
6404 {
6405 if (REGNO (x) == ARG_POINTER_REGNUM
6406 || REGNO (x) == FRAME_POINTER_REGNUM
6407 || REGNO (x) == FLAGS_REG
6408 || REGNO (x) == FPSR_REG)
6409 abort ();
6410
6411 if (ASSEMBLER_DIALECT == ASM_ATT || USER_LABEL_PREFIX[0] == 0)
6412 putc ('%', file);
6413
6414 if (code == 'w' || MMX_REG_P (x))
6415 code = 2;
6416 else if (code == 'b')
6417 code = 1;
6418 else if (code == 'k')
6419 code = 4;
6420 else if (code == 'q')
6421 code = 8;
6422 else if (code == 'y')
6423 code = 3;
6424 else if (code == 'h')
6425 code = 0;
6426 else
6427 code = GET_MODE_SIZE (GET_MODE (x));
6428
6429 /* Irritatingly, AMD extended registers use different naming convention
6430 from the normal registers. */
6431 if (REX_INT_REG_P (x))
6432 {
6433 if (!TARGET_64BIT)
6434 abort ();
6435 switch (code)
6436 {
6437 case 0:
6438 error ("extended registers have no high halves");
6439 break;
6440 case 1:
6441 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
6442 break;
6443 case 2:
6444 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
6445 break;
6446 case 4:
6447 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
6448 break;
6449 case 8:
6450 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
6451 break;
6452 default:
6453 error ("unsupported operand size for extended register");
6454 break;
6455 }
6456 return;
6457 }
6458 switch (code)
6459 {
6460 case 3:
6461 if (STACK_TOP_P (x))
6462 {
6463 fputs ("st(0)", file);
6464 break;
6465 }
6466 /* FALLTHRU */
6467 case 8:
6468 case 4:
6469 case 12:
6470 if (! ANY_FP_REG_P (x))
6471 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
6472 /* FALLTHRU */
6473 case 16:
6474 case 2:
6475 fputs (hi_reg_name[REGNO (x)], file);
6476 break;
6477 case 1:
6478 fputs (qi_reg_name[REGNO (x)], file);
6479 break;
6480 case 0:
6481 fputs (qi_high_reg_name[REGNO (x)], file);
6482 break;
6483 default:
6484 abort ();
6485 }
6486 }
6487
6488 /* Locate some local-dynamic symbol still in use by this function
6489 so that we can print its name in some tls_local_dynamic_base
6490 pattern. */
6491
6492 static const char *
6493 get_some_local_dynamic_name ()
6494 {
6495 rtx insn;
6496
6497 if (cfun->machine->some_ld_name)
6498 return cfun->machine->some_ld_name;
6499
6500 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
6501 if (INSN_P (insn)
6502 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
6503 return cfun->machine->some_ld_name;
6504
6505 abort ();
6506 }
6507
6508 static int
6509 get_some_local_dynamic_name_1 (px, data)
6510 rtx *px;
6511 void *data ATTRIBUTE_UNUSED;
6512 {
6513 rtx x = *px;
6514
6515 if (GET_CODE (x) == SYMBOL_REF
6516 && local_dynamic_symbolic_operand (x, Pmode))
6517 {
6518 cfun->machine->some_ld_name = XSTR (x, 0);
6519 return 1;
6520 }
6521
6522 return 0;
6523 }
6524
6525 /* Meaning of CODE:
6526 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
6527 C -- print opcode suffix for set/cmov insn.
6528 c -- like C, but print reversed condition
6529 F,f -- likewise, but for floating-point.
6530 O -- if CMOV_SUN_AS_SYNTAX, expand to "w.", "l." or "q.", otherwise
6531 nothing
6532 R -- print the prefix for register names.
6533 z -- print the opcode suffix for the size of the current operand.
6534 * -- print a star (in certain assembler syntax)
6535 A -- print an absolute memory reference.
6536 w -- print the operand as if it's a "word" (HImode) even if it isn't.
6537 s -- print a shift double count, followed by the assemblers argument
6538 delimiter.
6539 b -- print the QImode name of the register for the indicated operand.
6540 %b0 would print %al if operands[0] is reg 0.
6541 w -- likewise, print the HImode name of the register.
6542 k -- likewise, print the SImode name of the register.
6543 q -- likewise, print the DImode name of the register.
6544 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
6545 y -- print "st(0)" instead of "st" as a register.
6546 D -- print condition for SSE cmp instruction.
6547 P -- if PIC, print an @PLT suffix.
6548 X -- don't print any sort of PIC '@' suffix for a symbol.
6549 & -- print some in-use local-dynamic symbol name.
6550 */
6551
6552 void
6553 print_operand (file, x, code)
6554 FILE *file;
6555 rtx x;
6556 int code;
6557 {
6558 if (code)
6559 {
6560 switch (code)
6561 {
6562 case '*':
6563 if (ASSEMBLER_DIALECT == ASM_ATT)
6564 putc ('*', file);
6565 return;
6566
6567 case '&':
6568 assemble_name (file, get_some_local_dynamic_name ());
6569 return;
6570
6571 case 'A':
6572 if (ASSEMBLER_DIALECT == ASM_ATT)
6573 putc ('*', file);
6574 else if (ASSEMBLER_DIALECT == ASM_INTEL)
6575 {
6576 /* Intel syntax. For absolute addresses, registers should not
6577 be surrounded by braces. */
6578 if (GET_CODE (x) != REG)
6579 {
6580 putc ('[', file);
6581 PRINT_OPERAND (file, x, 0);
6582 putc (']', file);
6583 return;
6584 }
6585 }
6586 else
6587 abort ();
6588
6589 PRINT_OPERAND (file, x, 0);
6590 return;
6591
6592
6593 case 'L':
6594 if (ASSEMBLER_DIALECT == ASM_ATT)
6595 putc ('l', file);
6596 return;
6597
6598 case 'W':
6599 if (ASSEMBLER_DIALECT == ASM_ATT)
6600 putc ('w', file);
6601 return;
6602
6603 case 'B':
6604 if (ASSEMBLER_DIALECT == ASM_ATT)
6605 putc ('b', file);
6606 return;
6607
6608 case 'Q':
6609 if (ASSEMBLER_DIALECT == ASM_ATT)
6610 putc ('l', file);
6611 return;
6612
6613 case 'S':
6614 if (ASSEMBLER_DIALECT == ASM_ATT)
6615 putc ('s', file);
6616 return;
6617
6618 case 'T':
6619 if (ASSEMBLER_DIALECT == ASM_ATT)
6620 putc ('t', file);
6621 return;
6622
6623 case 'z':
6624 /* 387 opcodes don't get size suffixes if the operands are
6625 registers. */
6626 if (STACK_REG_P (x))
6627 return;
6628
6629 /* Likewise if using Intel opcodes. */
6630 if (ASSEMBLER_DIALECT == ASM_INTEL)
6631 return;
6632
6633 /* This is the size of op from size of operand. */
6634 switch (GET_MODE_SIZE (GET_MODE (x)))
6635 {
6636 case 2:
6637 #ifdef HAVE_GAS_FILDS_FISTS
6638 putc ('s', file);
6639 #endif
6640 return;
6641
6642 case 4:
6643 if (GET_MODE (x) == SFmode)
6644 {
6645 putc ('s', file);
6646 return;
6647 }
6648 else
6649 putc ('l', file);
6650 return;
6651
6652 case 12:
6653 case 16:
6654 putc ('t', file);
6655 return;
6656
6657 case 8:
6658 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
6659 {
6660 #ifdef GAS_MNEMONICS
6661 putc ('q', file);
6662 #else
6663 putc ('l', file);
6664 putc ('l', file);
6665 #endif
6666 }
6667 else
6668 putc ('l', file);
6669 return;
6670
6671 default:
6672 abort ();
6673 }
6674
6675 case 'b':
6676 case 'w':
6677 case 'k':
6678 case 'q':
6679 case 'h':
6680 case 'y':
6681 case 'X':
6682 case 'P':
6683 break;
6684
6685 case 's':
6686 if (GET_CODE (x) == CONST_INT || ! SHIFT_DOUBLE_OMITS_COUNT)
6687 {
6688 PRINT_OPERAND (file, x, 0);
6689 putc (',', file);
6690 }
6691 return;
6692
6693 case 'D':
6694 /* Little bit of braindamage here. The SSE compare instructions
6695 does use completely different names for the comparisons that the
6696 fp conditional moves. */
6697 switch (GET_CODE (x))
6698 {
6699 case EQ:
6700 case UNEQ:
6701 fputs ("eq", file);
6702 break;
6703 case LT:
6704 case UNLT:
6705 fputs ("lt", file);
6706 break;
6707 case LE:
6708 case UNLE:
6709 fputs ("le", file);
6710 break;
6711 case UNORDERED:
6712 fputs ("unord", file);
6713 break;
6714 case NE:
6715 case LTGT:
6716 fputs ("neq", file);
6717 break;
6718 case UNGE:
6719 case GE:
6720 fputs ("nlt", file);
6721 break;
6722 case UNGT:
6723 case GT:
6724 fputs ("nle", file);
6725 break;
6726 case ORDERED:
6727 fputs ("ord", file);
6728 break;
6729 default:
6730 abort ();
6731 break;
6732 }
6733 return;
6734 case 'O':
6735 #ifdef CMOV_SUN_AS_SYNTAX
6736 if (ASSEMBLER_DIALECT == ASM_ATT)
6737 {
6738 switch (GET_MODE (x))
6739 {
6740 case HImode: putc ('w', file); break;
6741 case SImode:
6742 case SFmode: putc ('l', file); break;
6743 case DImode:
6744 case DFmode: putc ('q', file); break;
6745 default: abort ();
6746 }
6747 putc ('.', file);
6748 }
6749 #endif
6750 return;
6751 case 'C':
6752 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
6753 return;
6754 case 'F':
6755 #ifdef CMOV_SUN_AS_SYNTAX
6756 if (ASSEMBLER_DIALECT == ASM_ATT)
6757 putc ('.', file);
6758 #endif
6759 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
6760 return;
6761
6762 /* Like above, but reverse condition */
6763 case 'c':
6764 /* Check to see if argument to %c is really a constant
6765 and not a condition code which needs to be reversed. */
6766 if (GET_RTX_CLASS (GET_CODE (x)) != '<')
6767 {
6768 output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'");
6769 return;
6770 }
6771 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
6772 return;
6773 case 'f':
6774 #ifdef CMOV_SUN_AS_SYNTAX
6775 if (ASSEMBLER_DIALECT == ASM_ATT)
6776 putc ('.', file);
6777 #endif
6778 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
6779 return;
6780 case '+':
6781 {
6782 rtx x;
6783
6784 if (!optimize || optimize_size || !TARGET_BRANCH_PREDICTION_HINTS)
6785 return;
6786
6787 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
6788 if (x)
6789 {
6790 int pred_val = INTVAL (XEXP (x, 0));
6791
6792 if (pred_val < REG_BR_PROB_BASE * 45 / 100
6793 || pred_val > REG_BR_PROB_BASE * 55 / 100)
6794 {
6795 int taken = pred_val > REG_BR_PROB_BASE / 2;
6796 int cputaken = final_forward_branch_p (current_output_insn) == 0;
6797
6798 /* Emit hints only in the case default branch prediction
6799 heruistics would fail. */
6800 if (taken != cputaken)
6801 {
6802 /* We use 3e (DS) prefix for taken branches and
6803 2e (CS) prefix for not taken branches. */
6804 if (taken)
6805 fputs ("ds ; ", file);
6806 else
6807 fputs ("cs ; ", file);
6808 }
6809 }
6810 }
6811 return;
6812 }
6813 default:
6814 output_operand_lossage ("invalid operand code `%c'", code);
6815 }
6816 }
6817
6818 if (GET_CODE (x) == REG)
6819 {
6820 PRINT_REG (x, code, file);
6821 }
6822
6823 else if (GET_CODE (x) == MEM)
6824 {
6825 /* No `byte ptr' prefix for call instructions. */
6826 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P')
6827 {
6828 const char * size;
6829 switch (GET_MODE_SIZE (GET_MODE (x)))
6830 {
6831 case 1: size = "BYTE"; break;
6832 case 2: size = "WORD"; break;
6833 case 4: size = "DWORD"; break;
6834 case 8: size = "QWORD"; break;
6835 case 12: size = "XWORD"; break;
6836 case 16: size = "XMMWORD"; break;
6837 default:
6838 abort ();
6839 }
6840
6841 /* Check for explicit size override (codes 'b', 'w' and 'k') */
6842 if (code == 'b')
6843 size = "BYTE";
6844 else if (code == 'w')
6845 size = "WORD";
6846 else if (code == 'k')
6847 size = "DWORD";
6848
6849 fputs (size, file);
6850 fputs (" PTR ", file);
6851 }
6852
6853 x = XEXP (x, 0);
6854 if (flag_pic && CONSTANT_ADDRESS_P (x))
6855 output_pic_addr_const (file, x, code);
6856 /* Avoid (%rip) for call operands. */
6857 else if (CONSTANT_ADDRESS_P (x) && code == 'P'
6858 && GET_CODE (x) != CONST_INT)
6859 output_addr_const (file, x);
6860 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
6861 output_operand_lossage ("invalid constraints for operand");
6862 else
6863 output_address (x);
6864 }
6865
6866 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
6867 {
6868 REAL_VALUE_TYPE r;
6869 long l;
6870
6871 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
6872 REAL_VALUE_TO_TARGET_SINGLE (r, l);
6873
6874 if (ASSEMBLER_DIALECT == ASM_ATT)
6875 putc ('$', file);
6876 fprintf (file, "0x%lx", l);
6877 }
6878
6879 /* These float cases don't actually occur as immediate operands. */
6880 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
6881 {
6882 char dstr[30];
6883
6884 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
6885 fprintf (file, "%s", dstr);
6886 }
6887
6888 else if (GET_CODE (x) == CONST_DOUBLE
6889 && (GET_MODE (x) == XFmode || GET_MODE (x) == TFmode))
6890 {
6891 char dstr[30];
6892
6893 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
6894 fprintf (file, "%s", dstr);
6895 }
6896
6897 else
6898 {
6899 if (code != 'P')
6900 {
6901 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
6902 {
6903 if (ASSEMBLER_DIALECT == ASM_ATT)
6904 putc ('$', file);
6905 }
6906 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
6907 || GET_CODE (x) == LABEL_REF)
6908 {
6909 if (ASSEMBLER_DIALECT == ASM_ATT)
6910 putc ('$', file);
6911 else
6912 fputs ("OFFSET FLAT:", file);
6913 }
6914 }
6915 if (GET_CODE (x) == CONST_INT)
6916 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
6917 else if (flag_pic)
6918 output_pic_addr_const (file, x, code);
6919 else
6920 output_addr_const (file, x);
6921 }
6922 }
6923 \f
6924 /* Print a memory operand whose address is ADDR. */
6925
6926 void
6927 print_operand_address (file, addr)
6928 FILE *file;
6929 register rtx addr;
6930 {
6931 struct ix86_address parts;
6932 rtx base, index, disp;
6933 int scale;
6934
6935 if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_TP)
6936 {
6937 if (ASSEMBLER_DIALECT == ASM_INTEL)
6938 fputs ("DWORD PTR ", file);
6939 if (ASSEMBLER_DIALECT == ASM_ATT || USER_LABEL_PREFIX[0] == 0)
6940 putc ('%', file);
6941 if (TARGET_64BIT)
6942 fputs ("fs:0", file);
6943 else
6944 fputs ("gs:0", file);
6945 return;
6946 }
6947
6948 if (! ix86_decompose_address (addr, &parts))
6949 abort ();
6950
6951 base = parts.base;
6952 index = parts.index;
6953 disp = parts.disp;
6954 scale = parts.scale;
6955
6956 if (!base && !index)
6957 {
6958 /* Displacement only requires special attention. */
6959
6960 if (GET_CODE (disp) == CONST_INT)
6961 {
6962 if (ASSEMBLER_DIALECT == ASM_INTEL)
6963 {
6964 if (USER_LABEL_PREFIX[0] == 0)
6965 putc ('%', file);
6966 fputs ("ds:", file);
6967 }
6968 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (addr));
6969 }
6970 else if (flag_pic)
6971 output_pic_addr_const (file, addr, 0);
6972 else
6973 output_addr_const (file, addr);
6974
6975 /* Use one byte shorter RIP relative addressing for 64bit mode. */
6976 if (TARGET_64BIT
6977 && ((GET_CODE (addr) == SYMBOL_REF
6978 && ! tls_symbolic_operand (addr, GET_MODE (addr)))
6979 || GET_CODE (addr) == LABEL_REF
6980 || (GET_CODE (addr) == CONST
6981 && GET_CODE (XEXP (addr, 0)) == PLUS
6982 && (GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
6983 || GET_CODE (XEXP (XEXP (addr, 0), 0)) == LABEL_REF)
6984 && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST_INT)))
6985 fputs ("(%rip)", file);
6986 }
6987 else
6988 {
6989 if (ASSEMBLER_DIALECT == ASM_ATT)
6990 {
6991 if (disp)
6992 {
6993 if (flag_pic)
6994 output_pic_addr_const (file, disp, 0);
6995 else if (GET_CODE (disp) == LABEL_REF)
6996 output_asm_label (disp);
6997 else
6998 output_addr_const (file, disp);
6999 }
7000
7001 putc ('(', file);
7002 if (base)
7003 PRINT_REG (base, 0, file);
7004 if (index)
7005 {
7006 putc (',', file);
7007 PRINT_REG (index, 0, file);
7008 if (scale != 1)
7009 fprintf (file, ",%d", scale);
7010 }
7011 putc (')', file);
7012 }
7013 else
7014 {
7015 rtx offset = NULL_RTX;
7016
7017 if (disp)
7018 {
7019 /* Pull out the offset of a symbol; print any symbol itself. */
7020 if (GET_CODE (disp) == CONST
7021 && GET_CODE (XEXP (disp, 0)) == PLUS
7022 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
7023 {
7024 offset = XEXP (XEXP (disp, 0), 1);
7025 disp = gen_rtx_CONST (VOIDmode,
7026 XEXP (XEXP (disp, 0), 0));
7027 }
7028
7029 if (flag_pic)
7030 output_pic_addr_const (file, disp, 0);
7031 else if (GET_CODE (disp) == LABEL_REF)
7032 output_asm_label (disp);
7033 else if (GET_CODE (disp) == CONST_INT)
7034 offset = disp;
7035 else
7036 output_addr_const (file, disp);
7037 }
7038
7039 putc ('[', file);
7040 if (base)
7041 {
7042 PRINT_REG (base, 0, file);
7043 if (offset)
7044 {
7045 if (INTVAL (offset) >= 0)
7046 putc ('+', file);
7047 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
7048 }
7049 }
7050 else if (offset)
7051 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
7052 else
7053 putc ('0', file);
7054
7055 if (index)
7056 {
7057 putc ('+', file);
7058 PRINT_REG (index, 0, file);
7059 if (scale != 1)
7060 fprintf (file, "*%d", scale);
7061 }
7062 putc (']', file);
7063 }
7064 }
7065 }
7066
7067 bool
7068 output_addr_const_extra (file, x)
7069 FILE *file;
7070 rtx x;
7071 {
7072 rtx op;
7073
7074 if (GET_CODE (x) != UNSPEC)
7075 return false;
7076
7077 op = XVECEXP (x, 0, 0);
7078 switch (XINT (x, 1))
7079 {
7080 case UNSPEC_GOTTPOFF:
7081 output_addr_const (file, op);
7082 /* FIXME: This might be @TPOFF in Sun ld. */
7083 fputs ("@GOTTPOFF", file);
7084 break;
7085 case UNSPEC_TPOFF:
7086 output_addr_const (file, op);
7087 fputs ("@TPOFF", file);
7088 break;
7089 case UNSPEC_NTPOFF:
7090 output_addr_const (file, op);
7091 if (TARGET_64BIT)
7092 fputs ("@TPOFF", file);
7093 else
7094 fputs ("@NTPOFF", file);
7095 break;
7096 case UNSPEC_DTPOFF:
7097 output_addr_const (file, op);
7098 fputs ("@DTPOFF", file);
7099 break;
7100 case UNSPEC_GOTNTPOFF:
7101 output_addr_const (file, op);
7102 if (TARGET_64BIT)
7103 fputs ("@GOTTPOFF(%rip)", file);
7104 else
7105 fputs ("@GOTNTPOFF", file);
7106 break;
7107 case UNSPEC_INDNTPOFF:
7108 output_addr_const (file, op);
7109 fputs ("@INDNTPOFF", file);
7110 break;
7111
7112 default:
7113 return false;
7114 }
7115
7116 return true;
7117 }
7118 \f
7119 /* Split one or more DImode RTL references into pairs of SImode
7120 references. The RTL can be REG, offsettable MEM, integer constant, or
7121 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
7122 split and "num" is its length. lo_half and hi_half are output arrays
7123 that parallel "operands". */
7124
7125 void
7126 split_di (operands, num, lo_half, hi_half)
7127 rtx operands[];
7128 int num;
7129 rtx lo_half[], hi_half[];
7130 {
7131 while (num--)
7132 {
7133 rtx op = operands[num];
7134
7135 /* simplify_subreg refuse to split volatile memory addresses,
7136 but we still have to handle it. */
7137 if (GET_CODE (op) == MEM)
7138 {
7139 lo_half[num] = adjust_address (op, SImode, 0);
7140 hi_half[num] = adjust_address (op, SImode, 4);
7141 }
7142 else
7143 {
7144 lo_half[num] = simplify_gen_subreg (SImode, op,
7145 GET_MODE (op) == VOIDmode
7146 ? DImode : GET_MODE (op), 0);
7147 hi_half[num] = simplify_gen_subreg (SImode, op,
7148 GET_MODE (op) == VOIDmode
7149 ? DImode : GET_MODE (op), 4);
7150 }
7151 }
7152 }
7153 /* Split one or more TImode RTL references into pairs of SImode
7154 references. The RTL can be REG, offsettable MEM, integer constant, or
7155 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
7156 split and "num" is its length. lo_half and hi_half are output arrays
7157 that parallel "operands". */
7158
7159 void
7160 split_ti (operands, num, lo_half, hi_half)
7161 rtx operands[];
7162 int num;
7163 rtx lo_half[], hi_half[];
7164 {
7165 while (num--)
7166 {
7167 rtx op = operands[num];
7168
7169 /* simplify_subreg refuse to split volatile memory addresses, but we
7170 still have to handle it. */
7171 if (GET_CODE (op) == MEM)
7172 {
7173 lo_half[num] = adjust_address (op, DImode, 0);
7174 hi_half[num] = adjust_address (op, DImode, 8);
7175 }
7176 else
7177 {
7178 lo_half[num] = simplify_gen_subreg (DImode, op, TImode, 0);
7179 hi_half[num] = simplify_gen_subreg (DImode, op, TImode, 8);
7180 }
7181 }
7182 }
7183 \f
7184 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
7185 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
7186 is the expression of the binary operation. The output may either be
7187 emitted here, or returned to the caller, like all output_* functions.
7188
7189 There is no guarantee that the operands are the same mode, as they
7190 might be within FLOAT or FLOAT_EXTEND expressions. */
7191
7192 #ifndef SYSV386_COMPAT
7193 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
7194 wants to fix the assemblers because that causes incompatibility
7195 with gcc. No-one wants to fix gcc because that causes
7196 incompatibility with assemblers... You can use the option of
7197 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
7198 #define SYSV386_COMPAT 1
7199 #endif
7200
7201 const char *
7202 output_387_binary_op (insn, operands)
7203 rtx insn;
7204 rtx *operands;
7205 {
7206 static char buf[30];
7207 const char *p;
7208 const char *ssep;
7209 int is_sse = SSE_REG_P (operands[0]) | SSE_REG_P (operands[1]) | SSE_REG_P (operands[2]);
7210
7211 #ifdef ENABLE_CHECKING
7212 /* Even if we do not want to check the inputs, this documents input
7213 constraints. Which helps in understanding the following code. */
7214 if (STACK_REG_P (operands[0])
7215 && ((REG_P (operands[1])
7216 && REGNO (operands[0]) == REGNO (operands[1])
7217 && (STACK_REG_P (operands[2]) || GET_CODE (operands[2]) == MEM))
7218 || (REG_P (operands[2])
7219 && REGNO (operands[0]) == REGNO (operands[2])
7220 && (STACK_REG_P (operands[1]) || GET_CODE (operands[1]) == MEM)))
7221 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
7222 ; /* ok */
7223 else if (!is_sse)
7224 abort ();
7225 #endif
7226
7227 switch (GET_CODE (operands[3]))
7228 {
7229 case PLUS:
7230 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7231 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7232 p = "fiadd";
7233 else
7234 p = "fadd";
7235 ssep = "add";
7236 break;
7237
7238 case MINUS:
7239 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7240 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7241 p = "fisub";
7242 else
7243 p = "fsub";
7244 ssep = "sub";
7245 break;
7246
7247 case MULT:
7248 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7249 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7250 p = "fimul";
7251 else
7252 p = "fmul";
7253 ssep = "mul";
7254 break;
7255
7256 case DIV:
7257 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7258 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7259 p = "fidiv";
7260 else
7261 p = "fdiv";
7262 ssep = "div";
7263 break;
7264
7265 default:
7266 abort ();
7267 }
7268
7269 if (is_sse)
7270 {
7271 strcpy (buf, ssep);
7272 if (GET_MODE (operands[0]) == SFmode)
7273 strcat (buf, "ss\t{%2, %0|%0, %2}");
7274 else
7275 strcat (buf, "sd\t{%2, %0|%0, %2}");
7276 return buf;
7277 }
7278 strcpy (buf, p);
7279
7280 switch (GET_CODE (operands[3]))
7281 {
7282 case MULT:
7283 case PLUS:
7284 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
7285 {
7286 rtx temp = operands[2];
7287 operands[2] = operands[1];
7288 operands[1] = temp;
7289 }
7290
7291 /* know operands[0] == operands[1]. */
7292
7293 if (GET_CODE (operands[2]) == MEM)
7294 {
7295 p = "%z2\t%2";
7296 break;
7297 }
7298
7299 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
7300 {
7301 if (STACK_TOP_P (operands[0]))
7302 /* How is it that we are storing to a dead operand[2]?
7303 Well, presumably operands[1] is dead too. We can't
7304 store the result to st(0) as st(0) gets popped on this
7305 instruction. Instead store to operands[2] (which I
7306 think has to be st(1)). st(1) will be popped later.
7307 gcc <= 2.8.1 didn't have this check and generated
7308 assembly code that the Unixware assembler rejected. */
7309 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
7310 else
7311 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
7312 break;
7313 }
7314
7315 if (STACK_TOP_P (operands[0]))
7316 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
7317 else
7318 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
7319 break;
7320
7321 case MINUS:
7322 case DIV:
7323 if (GET_CODE (operands[1]) == MEM)
7324 {
7325 p = "r%z1\t%1";
7326 break;
7327 }
7328
7329 if (GET_CODE (operands[2]) == MEM)
7330 {
7331 p = "%z2\t%2";
7332 break;
7333 }
7334
7335 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
7336 {
7337 #if SYSV386_COMPAT
7338 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
7339 derived assemblers, confusingly reverse the direction of
7340 the operation for fsub{r} and fdiv{r} when the
7341 destination register is not st(0). The Intel assembler
7342 doesn't have this brain damage. Read !SYSV386_COMPAT to
7343 figure out what the hardware really does. */
7344 if (STACK_TOP_P (operands[0]))
7345 p = "{p\t%0, %2|rp\t%2, %0}";
7346 else
7347 p = "{rp\t%2, %0|p\t%0, %2}";
7348 #else
7349 if (STACK_TOP_P (operands[0]))
7350 /* As above for fmul/fadd, we can't store to st(0). */
7351 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
7352 else
7353 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
7354 #endif
7355 break;
7356 }
7357
7358 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
7359 {
7360 #if SYSV386_COMPAT
7361 if (STACK_TOP_P (operands[0]))
7362 p = "{rp\t%0, %1|p\t%1, %0}";
7363 else
7364 p = "{p\t%1, %0|rp\t%0, %1}";
7365 #else
7366 if (STACK_TOP_P (operands[0]))
7367 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
7368 else
7369 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
7370 #endif
7371 break;
7372 }
7373
7374 if (STACK_TOP_P (operands[0]))
7375 {
7376 if (STACK_TOP_P (operands[1]))
7377 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
7378 else
7379 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
7380 break;
7381 }
7382 else if (STACK_TOP_P (operands[1]))
7383 {
7384 #if SYSV386_COMPAT
7385 p = "{\t%1, %0|r\t%0, %1}";
7386 #else
7387 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
7388 #endif
7389 }
7390 else
7391 {
7392 #if SYSV386_COMPAT
7393 p = "{r\t%2, %0|\t%0, %2}";
7394 #else
7395 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
7396 #endif
7397 }
7398 break;
7399
7400 default:
7401 abort ();
7402 }
7403
7404 strcat (buf, p);
7405 return buf;
7406 }
7407
7408 /* Output code to initialize control word copies used by
7409 trunc?f?i patterns. NORMAL is set to current control word, while ROUND_DOWN
7410 is set to control word rounding downwards. */
7411 void
7412 emit_i387_cw_initialization (normal, round_down)
7413 rtx normal, round_down;
7414 {
7415 rtx reg = gen_reg_rtx (HImode);
7416
7417 emit_insn (gen_x86_fnstcw_1 (normal));
7418 emit_move_insn (reg, normal);
7419 if (!TARGET_PARTIAL_REG_STALL && !optimize_size
7420 && !TARGET_64BIT)
7421 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
7422 else
7423 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0xc00)));
7424 emit_move_insn (round_down, reg);
7425 }
7426
7427 /* Output code for INSN to convert a float to a signed int. OPERANDS
7428 are the insn operands. The output may be [HSD]Imode and the input
7429 operand may be [SDX]Fmode. */
7430
7431 const char *
7432 output_fix_trunc (insn, operands)
7433 rtx insn;
7434 rtx *operands;
7435 {
7436 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
7437 int dimode_p = GET_MODE (operands[0]) == DImode;
7438
7439 /* Jump through a hoop or two for DImode, since the hardware has no
7440 non-popping instruction. We used to do this a different way, but
7441 that was somewhat fragile and broke with post-reload splitters. */
7442 if (dimode_p && !stack_top_dies)
7443 output_asm_insn ("fld\t%y1", operands);
7444
7445 if (!STACK_TOP_P (operands[1]))
7446 abort ();
7447
7448 if (GET_CODE (operands[0]) != MEM)
7449 abort ();
7450
7451 output_asm_insn ("fldcw\t%3", operands);
7452 if (stack_top_dies || dimode_p)
7453 output_asm_insn ("fistp%z0\t%0", operands);
7454 else
7455 output_asm_insn ("fist%z0\t%0", operands);
7456 output_asm_insn ("fldcw\t%2", operands);
7457
7458 return "";
7459 }
7460
7461 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
7462 should be used and 2 when fnstsw should be used. UNORDERED_P is true
7463 when fucom should be used. */
7464
7465 const char *
7466 output_fp_compare (insn, operands, eflags_p, unordered_p)
7467 rtx insn;
7468 rtx *operands;
7469 int eflags_p, unordered_p;
7470 {
7471 int stack_top_dies;
7472 rtx cmp_op0 = operands[0];
7473 rtx cmp_op1 = operands[1];
7474 int is_sse = SSE_REG_P (operands[0]) | SSE_REG_P (operands[1]);
7475
7476 if (eflags_p == 2)
7477 {
7478 cmp_op0 = cmp_op1;
7479 cmp_op1 = operands[2];
7480 }
7481 if (is_sse)
7482 {
7483 if (GET_MODE (operands[0]) == SFmode)
7484 if (unordered_p)
7485 return "ucomiss\t{%1, %0|%0, %1}";
7486 else
7487 return "comiss\t{%1, %0|%0, %y}";
7488 else
7489 if (unordered_p)
7490 return "ucomisd\t{%1, %0|%0, %1}";
7491 else
7492 return "comisd\t{%1, %0|%0, %y}";
7493 }
7494
7495 if (! STACK_TOP_P (cmp_op0))
7496 abort ();
7497
7498 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
7499
7500 if (STACK_REG_P (cmp_op1)
7501 && stack_top_dies
7502 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
7503 && REGNO (cmp_op1) != FIRST_STACK_REG)
7504 {
7505 /* If both the top of the 387 stack dies, and the other operand
7506 is also a stack register that dies, then this must be a
7507 `fcompp' float compare */
7508
7509 if (eflags_p == 1)
7510 {
7511 /* There is no double popping fcomi variant. Fortunately,
7512 eflags is immune from the fstp's cc clobbering. */
7513 if (unordered_p)
7514 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
7515 else
7516 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
7517 return "fstp\t%y0";
7518 }
7519 else
7520 {
7521 if (eflags_p == 2)
7522 {
7523 if (unordered_p)
7524 return "fucompp\n\tfnstsw\t%0";
7525 else
7526 return "fcompp\n\tfnstsw\t%0";
7527 }
7528 else
7529 {
7530 if (unordered_p)
7531 return "fucompp";
7532 else
7533 return "fcompp";
7534 }
7535 }
7536 }
7537 else
7538 {
7539 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
7540
7541 static const char * const alt[24] =
7542 {
7543 "fcom%z1\t%y1",
7544 "fcomp%z1\t%y1",
7545 "fucom%z1\t%y1",
7546 "fucomp%z1\t%y1",
7547
7548 "ficom%z1\t%y1",
7549 "ficomp%z1\t%y1",
7550 NULL,
7551 NULL,
7552
7553 "fcomi\t{%y1, %0|%0, %y1}",
7554 "fcomip\t{%y1, %0|%0, %y1}",
7555 "fucomi\t{%y1, %0|%0, %y1}",
7556 "fucomip\t{%y1, %0|%0, %y1}",
7557
7558 NULL,
7559 NULL,
7560 NULL,
7561 NULL,
7562
7563 "fcom%z2\t%y2\n\tfnstsw\t%0",
7564 "fcomp%z2\t%y2\n\tfnstsw\t%0",
7565 "fucom%z2\t%y2\n\tfnstsw\t%0",
7566 "fucomp%z2\t%y2\n\tfnstsw\t%0",
7567
7568 "ficom%z2\t%y2\n\tfnstsw\t%0",
7569 "ficomp%z2\t%y2\n\tfnstsw\t%0",
7570 NULL,
7571 NULL
7572 };
7573
7574 int mask;
7575 const char *ret;
7576
7577 mask = eflags_p << 3;
7578 mask |= (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT) << 2;
7579 mask |= unordered_p << 1;
7580 mask |= stack_top_dies;
7581
7582 if (mask >= 24)
7583 abort ();
7584 ret = alt[mask];
7585 if (ret == NULL)
7586 abort ();
7587
7588 return ret;
7589 }
7590 }
7591
7592 void
7593 ix86_output_addr_vec_elt (file, value)
7594 FILE *file;
7595 int value;
7596 {
7597 const char *directive = ASM_LONG;
7598
7599 if (TARGET_64BIT)
7600 {
7601 #ifdef ASM_QUAD
7602 directive = ASM_QUAD;
7603 #else
7604 abort ();
7605 #endif
7606 }
7607
7608 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
7609 }
7610
7611 void
7612 ix86_output_addr_diff_elt (file, value, rel)
7613 FILE *file;
7614 int value, rel;
7615 {
7616 if (TARGET_64BIT)
7617 fprintf (file, "%s%s%d-%s%d\n",
7618 ASM_LONG, LPREFIX, value, LPREFIX, rel);
7619 else if (HAVE_AS_GOTOFF_IN_DATA)
7620 fprintf (file, "%s%s%d@GOTOFF\n", ASM_LONG, LPREFIX, value);
7621 #if TARGET_MACHO
7622 else if (TARGET_MACHO)
7623 fprintf (file, "%s%s%d-%s\n", ASM_LONG, LPREFIX, value,
7624 machopic_function_base_name () + 1);
7625 #endif
7626 else
7627 asm_fprintf (file, "%s%U%s+[.-%s%d]\n",
7628 ASM_LONG, GOT_SYMBOL_NAME, LPREFIX, value);
7629 }
7630 \f
7631 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
7632 for the target. */
7633
7634 void
7635 ix86_expand_clear (dest)
7636 rtx dest;
7637 {
7638 rtx tmp;
7639
7640 /* We play register width games, which are only valid after reload. */
7641 if (!reload_completed)
7642 abort ();
7643
7644 /* Avoid HImode and its attendant prefix byte. */
7645 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
7646 dest = gen_rtx_REG (SImode, REGNO (dest));
7647
7648 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
7649
7650 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
7651 if (reload_completed && (!TARGET_USE_MOV0 || optimize_size))
7652 {
7653 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, 17));
7654 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
7655 }
7656
7657 emit_insn (tmp);
7658 }
7659
7660 /* X is an unchanging MEM. If it is a constant pool reference, return
7661 the constant pool rtx, else NULL. */
7662
7663 static rtx
7664 maybe_get_pool_constant (x)
7665 rtx x;
7666 {
7667 x = XEXP (x, 0);
7668
7669 if (flag_pic && ! TARGET_64BIT)
7670 {
7671 if (GET_CODE (x) != PLUS)
7672 return NULL_RTX;
7673 if (XEXP (x, 0) != pic_offset_table_rtx)
7674 return NULL_RTX;
7675 x = XEXP (x, 1);
7676 if (GET_CODE (x) != CONST)
7677 return NULL_RTX;
7678 x = XEXP (x, 0);
7679 if (GET_CODE (x) != UNSPEC)
7680 return NULL_RTX;
7681 if (XINT (x, 1) != UNSPEC_GOTOFF)
7682 return NULL_RTX;
7683 x = XVECEXP (x, 0, 0);
7684 }
7685
7686 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
7687 return get_pool_constant (x);
7688
7689 return NULL_RTX;
7690 }
7691
7692 void
7693 ix86_expand_move (mode, operands)
7694 enum machine_mode mode;
7695 rtx operands[];
7696 {
7697 int strict = (reload_in_progress || reload_completed);
7698 rtx insn, op0, op1, tmp;
7699
7700 op0 = operands[0];
7701 op1 = operands[1];
7702
7703 if (tls_symbolic_operand (op1, Pmode))
7704 {
7705 op1 = legitimize_address (op1, op1, VOIDmode);
7706 if (GET_CODE (op0) == MEM)
7707 {
7708 tmp = gen_reg_rtx (mode);
7709 emit_insn (gen_rtx_SET (VOIDmode, tmp, op1));
7710 op1 = tmp;
7711 }
7712 }
7713 else if (flag_pic && mode == Pmode && symbolic_operand (op1, Pmode))
7714 {
7715 #if TARGET_MACHO
7716 if (MACHOPIC_PURE)
7717 {
7718 rtx temp = ((reload_in_progress
7719 || ((op0 && GET_CODE (op0) == REG)
7720 && mode == Pmode))
7721 ? op0 : gen_reg_rtx (Pmode));
7722 op1 = machopic_indirect_data_reference (op1, temp);
7723 op1 = machopic_legitimize_pic_address (op1, mode,
7724 temp == op1 ? 0 : temp);
7725 }
7726 else
7727 {
7728 if (MACHOPIC_INDIRECT)
7729 op1 = machopic_indirect_data_reference (op1, 0);
7730 }
7731 if (op0 != op1)
7732 {
7733 insn = gen_rtx_SET (VOIDmode, op0, op1);
7734 emit_insn (insn);
7735 }
7736 return;
7737 #endif /* TARGET_MACHO */
7738 if (GET_CODE (op0) == MEM)
7739 op1 = force_reg (Pmode, op1);
7740 else
7741 {
7742 rtx temp = op0;
7743 if (GET_CODE (temp) != REG)
7744 temp = gen_reg_rtx (Pmode);
7745 temp = legitimize_pic_address (op1, temp);
7746 if (temp == op0)
7747 return;
7748 op1 = temp;
7749 }
7750 }
7751 else
7752 {
7753 if (GET_CODE (op0) == MEM
7754 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
7755 || !push_operand (op0, mode))
7756 && GET_CODE (op1) == MEM)
7757 op1 = force_reg (mode, op1);
7758
7759 if (push_operand (op0, mode)
7760 && ! general_no_elim_operand (op1, mode))
7761 op1 = copy_to_mode_reg (mode, op1);
7762
7763 /* Force large constants in 64bit compilation into register
7764 to get them CSEed. */
7765 if (TARGET_64BIT && mode == DImode
7766 && immediate_operand (op1, mode)
7767 && !x86_64_zero_extended_value (op1)
7768 && !register_operand (op0, mode)
7769 && optimize && !reload_completed && !reload_in_progress)
7770 op1 = copy_to_mode_reg (mode, op1);
7771
7772 if (FLOAT_MODE_P (mode))
7773 {
7774 /* If we are loading a floating point constant to a register,
7775 force the value to memory now, since we'll get better code
7776 out the back end. */
7777
7778 if (strict)
7779 ;
7780 else if (GET_CODE (op1) == CONST_DOUBLE
7781 && register_operand (op0, mode))
7782 op1 = validize_mem (force_const_mem (mode, op1));
7783 }
7784 }
7785
7786 insn = gen_rtx_SET (VOIDmode, op0, op1);
7787
7788 emit_insn (insn);
7789 }
7790
7791 void
7792 ix86_expand_vector_move (mode, operands)
7793 enum machine_mode mode;
7794 rtx operands[];
7795 {
7796 /* Force constants other than zero into memory. We do not know how
7797 the instructions used to build constants modify the upper 64 bits
7798 of the register, once we have that information we may be able
7799 to handle some of them more efficiently. */
7800 if ((reload_in_progress | reload_completed) == 0
7801 && register_operand (operands[0], mode)
7802 && CONSTANT_P (operands[1]))
7803 operands[1] = force_const_mem (mode, operands[1]);
7804
7805 /* Make operand1 a register if it isn't already. */
7806 if (!no_new_pseudos
7807 && !register_operand (operands[0], mode)
7808 && !register_operand (operands[1], mode))
7809 {
7810 rtx temp = force_reg (GET_MODE (operands[1]), operands[1]);
7811 emit_move_insn (operands[0], temp);
7812 return;
7813 }
7814
7815 emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[1]));
7816 }
7817
7818 /* Attempt to expand a binary operator. Make the expansion closer to the
7819 actual machine, then just general_operand, which will allow 3 separate
7820 memory references (one output, two input) in a single insn. */
7821
7822 void
7823 ix86_expand_binary_operator (code, mode, operands)
7824 enum rtx_code code;
7825 enum machine_mode mode;
7826 rtx operands[];
7827 {
7828 int matching_memory;
7829 rtx src1, src2, dst, op, clob;
7830
7831 dst = operands[0];
7832 src1 = operands[1];
7833 src2 = operands[2];
7834
7835 /* Recognize <var1> = <value> <op> <var1> for commutative operators */
7836 if (GET_RTX_CLASS (code) == 'c'
7837 && (rtx_equal_p (dst, src2)
7838 || immediate_operand (src1, mode)))
7839 {
7840 rtx temp = src1;
7841 src1 = src2;
7842 src2 = temp;
7843 }
7844
7845 /* If the destination is memory, and we do not have matching source
7846 operands, do things in registers. */
7847 matching_memory = 0;
7848 if (GET_CODE (dst) == MEM)
7849 {
7850 if (rtx_equal_p (dst, src1))
7851 matching_memory = 1;
7852 else if (GET_RTX_CLASS (code) == 'c'
7853 && rtx_equal_p (dst, src2))
7854 matching_memory = 2;
7855 else
7856 dst = gen_reg_rtx (mode);
7857 }
7858
7859 /* Both source operands cannot be in memory. */
7860 if (GET_CODE (src1) == MEM && GET_CODE (src2) == MEM)
7861 {
7862 if (matching_memory != 2)
7863 src2 = force_reg (mode, src2);
7864 else
7865 src1 = force_reg (mode, src1);
7866 }
7867
7868 /* If the operation is not commutable, source 1 cannot be a constant
7869 or non-matching memory. */
7870 if ((CONSTANT_P (src1)
7871 || (!matching_memory && GET_CODE (src1) == MEM))
7872 && GET_RTX_CLASS (code) != 'c')
7873 src1 = force_reg (mode, src1);
7874
7875 /* If optimizing, copy to regs to improve CSE */
7876 if (optimize && ! no_new_pseudos)
7877 {
7878 if (GET_CODE (dst) == MEM)
7879 dst = gen_reg_rtx (mode);
7880 if (GET_CODE (src1) == MEM)
7881 src1 = force_reg (mode, src1);
7882 if (GET_CODE (src2) == MEM)
7883 src2 = force_reg (mode, src2);
7884 }
7885
7886 /* Emit the instruction. */
7887
7888 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
7889 if (reload_in_progress)
7890 {
7891 /* Reload doesn't know about the flags register, and doesn't know that
7892 it doesn't want to clobber it. We can only do this with PLUS. */
7893 if (code != PLUS)
7894 abort ();
7895 emit_insn (op);
7896 }
7897 else
7898 {
7899 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
7900 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
7901 }
7902
7903 /* Fix up the destination if needed. */
7904 if (dst != operands[0])
7905 emit_move_insn (operands[0], dst);
7906 }
7907
7908 /* Return TRUE or FALSE depending on whether the binary operator meets the
7909 appropriate constraints. */
7910
7911 int
7912 ix86_binary_operator_ok (code, mode, operands)
7913 enum rtx_code code;
7914 enum machine_mode mode ATTRIBUTE_UNUSED;
7915 rtx operands[3];
7916 {
7917 /* Both source operands cannot be in memory. */
7918 if (GET_CODE (operands[1]) == MEM && GET_CODE (operands[2]) == MEM)
7919 return 0;
7920 /* If the operation is not commutable, source 1 cannot be a constant. */
7921 if (CONSTANT_P (operands[1]) && GET_RTX_CLASS (code) != 'c')
7922 return 0;
7923 /* If the destination is memory, we must have a matching source operand. */
7924 if (GET_CODE (operands[0]) == MEM
7925 && ! (rtx_equal_p (operands[0], operands[1])
7926 || (GET_RTX_CLASS (code) == 'c'
7927 && rtx_equal_p (operands[0], operands[2]))))
7928 return 0;
7929 /* If the operation is not commutable and the source 1 is memory, we must
7930 have a matching destination. */
7931 if (GET_CODE (operands[1]) == MEM
7932 && GET_RTX_CLASS (code) != 'c'
7933 && ! rtx_equal_p (operands[0], operands[1]))
7934 return 0;
7935 return 1;
7936 }
7937
7938 /* Attempt to expand a unary operator. Make the expansion closer to the
7939 actual machine, then just general_operand, which will allow 2 separate
7940 memory references (one output, one input) in a single insn. */
7941
7942 void
7943 ix86_expand_unary_operator (code, mode, operands)
7944 enum rtx_code code;
7945 enum machine_mode mode;
7946 rtx operands[];
7947 {
7948 int matching_memory;
7949 rtx src, dst, op, clob;
7950
7951 dst = operands[0];
7952 src = operands[1];
7953
7954 /* If the destination is memory, and we do not have matching source
7955 operands, do things in registers. */
7956 matching_memory = 0;
7957 if (GET_CODE (dst) == MEM)
7958 {
7959 if (rtx_equal_p (dst, src))
7960 matching_memory = 1;
7961 else
7962 dst = gen_reg_rtx (mode);
7963 }
7964
7965 /* When source operand is memory, destination must match. */
7966 if (!matching_memory && GET_CODE (src) == MEM)
7967 src = force_reg (mode, src);
7968
7969 /* If optimizing, copy to regs to improve CSE */
7970 if (optimize && ! no_new_pseudos)
7971 {
7972 if (GET_CODE (dst) == MEM)
7973 dst = gen_reg_rtx (mode);
7974 if (GET_CODE (src) == MEM)
7975 src = force_reg (mode, src);
7976 }
7977
7978 /* Emit the instruction. */
7979
7980 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
7981 if (reload_in_progress || code == NOT)
7982 {
7983 /* Reload doesn't know about the flags register, and doesn't know that
7984 it doesn't want to clobber it. */
7985 if (code != NOT)
7986 abort ();
7987 emit_insn (op);
7988 }
7989 else
7990 {
7991 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
7992 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
7993 }
7994
7995 /* Fix up the destination if needed. */
7996 if (dst != operands[0])
7997 emit_move_insn (operands[0], dst);
7998 }
7999
8000 /* Return TRUE or FALSE depending on whether the unary operator meets the
8001 appropriate constraints. */
8002
8003 int
8004 ix86_unary_operator_ok (code, mode, operands)
8005 enum rtx_code code ATTRIBUTE_UNUSED;
8006 enum machine_mode mode ATTRIBUTE_UNUSED;
8007 rtx operands[2] ATTRIBUTE_UNUSED;
8008 {
8009 /* If one of operands is memory, source and destination must match. */
8010 if ((GET_CODE (operands[0]) == MEM
8011 || GET_CODE (operands[1]) == MEM)
8012 && ! rtx_equal_p (operands[0], operands[1]))
8013 return FALSE;
8014 return TRUE;
8015 }
8016
8017 /* Return TRUE or FALSE depending on whether the first SET in INSN
8018 has source and destination with matching CC modes, and that the
8019 CC mode is at least as constrained as REQ_MODE. */
8020
8021 int
8022 ix86_match_ccmode (insn, req_mode)
8023 rtx insn;
8024 enum machine_mode req_mode;
8025 {
8026 rtx set;
8027 enum machine_mode set_mode;
8028
8029 set = PATTERN (insn);
8030 if (GET_CODE (set) == PARALLEL)
8031 set = XVECEXP (set, 0, 0);
8032 if (GET_CODE (set) != SET)
8033 abort ();
8034 if (GET_CODE (SET_SRC (set)) != COMPARE)
8035 abort ();
8036
8037 set_mode = GET_MODE (SET_DEST (set));
8038 switch (set_mode)
8039 {
8040 case CCNOmode:
8041 if (req_mode != CCNOmode
8042 && (req_mode != CCmode
8043 || XEXP (SET_SRC (set), 1) != const0_rtx))
8044 return 0;
8045 break;
8046 case CCmode:
8047 if (req_mode == CCGCmode)
8048 return 0;
8049 /* FALLTHRU */
8050 case CCGCmode:
8051 if (req_mode == CCGOCmode || req_mode == CCNOmode)
8052 return 0;
8053 /* FALLTHRU */
8054 case CCGOCmode:
8055 if (req_mode == CCZmode)
8056 return 0;
8057 /* FALLTHRU */
8058 case CCZmode:
8059 break;
8060
8061 default:
8062 abort ();
8063 }
8064
8065 return (GET_MODE (SET_SRC (set)) == set_mode);
8066 }
8067
8068 /* Generate insn patterns to do an integer compare of OPERANDS. */
8069
8070 static rtx
8071 ix86_expand_int_compare (code, op0, op1)
8072 enum rtx_code code;
8073 rtx op0, op1;
8074 {
8075 enum machine_mode cmpmode;
8076 rtx tmp, flags;
8077
8078 cmpmode = SELECT_CC_MODE (code, op0, op1);
8079 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
8080
8081 /* This is very simple, but making the interface the same as in the
8082 FP case makes the rest of the code easier. */
8083 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
8084 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
8085
8086 /* Return the test that should be put into the flags user, i.e.
8087 the bcc, scc, or cmov instruction. */
8088 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
8089 }
8090
8091 /* Figure out whether to use ordered or unordered fp comparisons.
8092 Return the appropriate mode to use. */
8093
8094 enum machine_mode
8095 ix86_fp_compare_mode (code)
8096 enum rtx_code code ATTRIBUTE_UNUSED;
8097 {
8098 /* ??? In order to make all comparisons reversible, we do all comparisons
8099 non-trapping when compiling for IEEE. Once gcc is able to distinguish
8100 all forms trapping and nontrapping comparisons, we can make inequality
8101 comparisons trapping again, since it results in better code when using
8102 FCOM based compares. */
8103 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
8104 }
8105
8106 enum machine_mode
8107 ix86_cc_mode (code, op0, op1)
8108 enum rtx_code code;
8109 rtx op0, op1;
8110 {
8111 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
8112 return ix86_fp_compare_mode (code);
8113 switch (code)
8114 {
8115 /* Only zero flag is needed. */
8116 case EQ: /* ZF=0 */
8117 case NE: /* ZF!=0 */
8118 return CCZmode;
8119 /* Codes needing carry flag. */
8120 case GEU: /* CF=0 */
8121 case GTU: /* CF=0 & ZF=0 */
8122 case LTU: /* CF=1 */
8123 case LEU: /* CF=1 | ZF=1 */
8124 return CCmode;
8125 /* Codes possibly doable only with sign flag when
8126 comparing against zero. */
8127 case GE: /* SF=OF or SF=0 */
8128 case LT: /* SF<>OF or SF=1 */
8129 if (op1 == const0_rtx)
8130 return CCGOCmode;
8131 else
8132 /* For other cases Carry flag is not required. */
8133 return CCGCmode;
8134 /* Codes doable only with sign flag when comparing
8135 against zero, but we miss jump instruction for it
8136 so we need to use relational tests agains overflow
8137 that thus needs to be zero. */
8138 case GT: /* ZF=0 & SF=OF */
8139 case LE: /* ZF=1 | SF<>OF */
8140 if (op1 == const0_rtx)
8141 return CCNOmode;
8142 else
8143 return CCGCmode;
8144 /* strcmp pattern do (use flags) and combine may ask us for proper
8145 mode. */
8146 case USE:
8147 return CCmode;
8148 default:
8149 abort ();
8150 }
8151 }
8152
8153 /* Return true if we should use an FCOMI instruction for this fp comparison. */
8154
8155 int
8156 ix86_use_fcomi_compare (code)
8157 enum rtx_code code ATTRIBUTE_UNUSED;
8158 {
8159 enum rtx_code swapped_code = swap_condition (code);
8160 return ((ix86_fp_comparison_cost (code) == ix86_fp_comparison_fcomi_cost (code))
8161 || (ix86_fp_comparison_cost (swapped_code)
8162 == ix86_fp_comparison_fcomi_cost (swapped_code)));
8163 }
8164
8165 /* Swap, force into registers, or otherwise massage the two operands
8166 to a fp comparison. The operands are updated in place; the new
8167 comparsion code is returned. */
8168
8169 static enum rtx_code
8170 ix86_prepare_fp_compare_args (code, pop0, pop1)
8171 enum rtx_code code;
8172 rtx *pop0, *pop1;
8173 {
8174 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
8175 rtx op0 = *pop0, op1 = *pop1;
8176 enum machine_mode op_mode = GET_MODE (op0);
8177 int is_sse = SSE_REG_P (op0) | SSE_REG_P (op1);
8178
8179 /* All of the unordered compare instructions only work on registers.
8180 The same is true of the XFmode compare instructions. The same is
8181 true of the fcomi compare instructions. */
8182
8183 if (!is_sse
8184 && (fpcmp_mode == CCFPUmode
8185 || op_mode == XFmode
8186 || op_mode == TFmode
8187 || ix86_use_fcomi_compare (code)))
8188 {
8189 op0 = force_reg (op_mode, op0);
8190 op1 = force_reg (op_mode, op1);
8191 }
8192 else
8193 {
8194 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
8195 things around if they appear profitable, otherwise force op0
8196 into a register. */
8197
8198 if (standard_80387_constant_p (op0) == 0
8199 || (GET_CODE (op0) == MEM
8200 && ! (standard_80387_constant_p (op1) == 0
8201 || GET_CODE (op1) == MEM)))
8202 {
8203 rtx tmp;
8204 tmp = op0, op0 = op1, op1 = tmp;
8205 code = swap_condition (code);
8206 }
8207
8208 if (GET_CODE (op0) != REG)
8209 op0 = force_reg (op_mode, op0);
8210
8211 if (CONSTANT_P (op1))
8212 {
8213 if (standard_80387_constant_p (op1))
8214 op1 = force_reg (op_mode, op1);
8215 else
8216 op1 = validize_mem (force_const_mem (op_mode, op1));
8217 }
8218 }
8219
8220 /* Try to rearrange the comparison to make it cheaper. */
8221 if (ix86_fp_comparison_cost (code)
8222 > ix86_fp_comparison_cost (swap_condition (code))
8223 && (GET_CODE (op1) == REG || !no_new_pseudos))
8224 {
8225 rtx tmp;
8226 tmp = op0, op0 = op1, op1 = tmp;
8227 code = swap_condition (code);
8228 if (GET_CODE (op0) != REG)
8229 op0 = force_reg (op_mode, op0);
8230 }
8231
8232 *pop0 = op0;
8233 *pop1 = op1;
8234 return code;
8235 }
8236
8237 /* Convert comparison codes we use to represent FP comparison to integer
8238 code that will result in proper branch. Return UNKNOWN if no such code
8239 is available. */
8240 static enum rtx_code
8241 ix86_fp_compare_code_to_integer (code)
8242 enum rtx_code code;
8243 {
8244 switch (code)
8245 {
8246 case GT:
8247 return GTU;
8248 case GE:
8249 return GEU;
8250 case ORDERED:
8251 case UNORDERED:
8252 return code;
8253 break;
8254 case UNEQ:
8255 return EQ;
8256 break;
8257 case UNLT:
8258 return LTU;
8259 break;
8260 case UNLE:
8261 return LEU;
8262 break;
8263 case LTGT:
8264 return NE;
8265 break;
8266 default:
8267 return UNKNOWN;
8268 }
8269 }
8270
8271 /* Split comparison code CODE into comparisons we can do using branch
8272 instructions. BYPASS_CODE is comparison code for branch that will
8273 branch around FIRST_CODE and SECOND_CODE. If some of branches
8274 is not required, set value to NIL.
8275 We never require more than two branches. */
8276 static void
8277 ix86_fp_comparison_codes (code, bypass_code, first_code, second_code)
8278 enum rtx_code code, *bypass_code, *first_code, *second_code;
8279 {
8280 *first_code = code;
8281 *bypass_code = NIL;
8282 *second_code = NIL;
8283
8284 /* The fcomi comparison sets flags as follows:
8285
8286 cmp ZF PF CF
8287 > 0 0 0
8288 < 0 0 1
8289 = 1 0 0
8290 un 1 1 1 */
8291
8292 switch (code)
8293 {
8294 case GT: /* GTU - CF=0 & ZF=0 */
8295 case GE: /* GEU - CF=0 */
8296 case ORDERED: /* PF=0 */
8297 case UNORDERED: /* PF=1 */
8298 case UNEQ: /* EQ - ZF=1 */
8299 case UNLT: /* LTU - CF=1 */
8300 case UNLE: /* LEU - CF=1 | ZF=1 */
8301 case LTGT: /* EQ - ZF=0 */
8302 break;
8303 case LT: /* LTU - CF=1 - fails on unordered */
8304 *first_code = UNLT;
8305 *bypass_code = UNORDERED;
8306 break;
8307 case LE: /* LEU - CF=1 | ZF=1 - fails on unordered */
8308 *first_code = UNLE;
8309 *bypass_code = UNORDERED;
8310 break;
8311 case EQ: /* EQ - ZF=1 - fails on unordered */
8312 *first_code = UNEQ;
8313 *bypass_code = UNORDERED;
8314 break;
8315 case NE: /* NE - ZF=0 - fails on unordered */
8316 *first_code = LTGT;
8317 *second_code = UNORDERED;
8318 break;
8319 case UNGE: /* GEU - CF=0 - fails on unordered */
8320 *first_code = GE;
8321 *second_code = UNORDERED;
8322 break;
8323 case UNGT: /* GTU - CF=0 & ZF=0 - fails on unordered */
8324 *first_code = GT;
8325 *second_code = UNORDERED;
8326 break;
8327 default:
8328 abort ();
8329 }
8330 if (!TARGET_IEEE_FP)
8331 {
8332 *second_code = NIL;
8333 *bypass_code = NIL;
8334 }
8335 }
8336
8337 /* Return cost of comparison done fcom + arithmetics operations on AX.
8338 All following functions do use number of instructions as an cost metrics.
8339 In future this should be tweaked to compute bytes for optimize_size and
8340 take into account performance of various instructions on various CPUs. */
8341 static int
8342 ix86_fp_comparison_arithmetics_cost (code)
8343 enum rtx_code code;
8344 {
8345 if (!TARGET_IEEE_FP)
8346 return 4;
8347 /* The cost of code output by ix86_expand_fp_compare. */
8348 switch (code)
8349 {
8350 case UNLE:
8351 case UNLT:
8352 case LTGT:
8353 case GT:
8354 case GE:
8355 case UNORDERED:
8356 case ORDERED:
8357 case UNEQ:
8358 return 4;
8359 break;
8360 case LT:
8361 case NE:
8362 case EQ:
8363 case UNGE:
8364 return 5;
8365 break;
8366 case LE:
8367 case UNGT:
8368 return 6;
8369 break;
8370 default:
8371 abort ();
8372 }
8373 }
8374
8375 /* Return cost of comparison done using fcomi operation.
8376 See ix86_fp_comparison_arithmetics_cost for the metrics. */
8377 static int
8378 ix86_fp_comparison_fcomi_cost (code)
8379 enum rtx_code code;
8380 {
8381 enum rtx_code bypass_code, first_code, second_code;
8382 /* Return arbitarily high cost when instruction is not supported - this
8383 prevents gcc from using it. */
8384 if (!TARGET_CMOVE)
8385 return 1024;
8386 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8387 return (bypass_code != NIL || second_code != NIL) + 2;
8388 }
8389
8390 /* Return cost of comparison done using sahf operation.
8391 See ix86_fp_comparison_arithmetics_cost for the metrics. */
8392 static int
8393 ix86_fp_comparison_sahf_cost (code)
8394 enum rtx_code code;
8395 {
8396 enum rtx_code bypass_code, first_code, second_code;
8397 /* Return arbitarily high cost when instruction is not preferred - this
8398 avoids gcc from using it. */
8399 if (!TARGET_USE_SAHF && !optimize_size)
8400 return 1024;
8401 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8402 return (bypass_code != NIL || second_code != NIL) + 3;
8403 }
8404
8405 /* Compute cost of the comparison done using any method.
8406 See ix86_fp_comparison_arithmetics_cost for the metrics. */
8407 static int
8408 ix86_fp_comparison_cost (code)
8409 enum rtx_code code;
8410 {
8411 int fcomi_cost, sahf_cost, arithmetics_cost = 1024;
8412 int min;
8413
8414 fcomi_cost = ix86_fp_comparison_fcomi_cost (code);
8415 sahf_cost = ix86_fp_comparison_sahf_cost (code);
8416
8417 min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code);
8418 if (min > sahf_cost)
8419 min = sahf_cost;
8420 if (min > fcomi_cost)
8421 min = fcomi_cost;
8422 return min;
8423 }
8424
8425 /* Generate insn patterns to do a floating point compare of OPERANDS. */
8426
8427 static rtx
8428 ix86_expand_fp_compare (code, op0, op1, scratch, second_test, bypass_test)
8429 enum rtx_code code;
8430 rtx op0, op1, scratch;
8431 rtx *second_test;
8432 rtx *bypass_test;
8433 {
8434 enum machine_mode fpcmp_mode, intcmp_mode;
8435 rtx tmp, tmp2;
8436 int cost = ix86_fp_comparison_cost (code);
8437 enum rtx_code bypass_code, first_code, second_code;
8438
8439 fpcmp_mode = ix86_fp_compare_mode (code);
8440 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
8441
8442 if (second_test)
8443 *second_test = NULL_RTX;
8444 if (bypass_test)
8445 *bypass_test = NULL_RTX;
8446
8447 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8448
8449 /* Do fcomi/sahf based test when profitable. */
8450 if ((bypass_code == NIL || bypass_test)
8451 && (second_code == NIL || second_test)
8452 && ix86_fp_comparison_arithmetics_cost (code) > cost)
8453 {
8454 if (TARGET_CMOVE)
8455 {
8456 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
8457 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
8458 tmp);
8459 emit_insn (tmp);
8460 }
8461 else
8462 {
8463 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
8464 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
8465 if (!scratch)
8466 scratch = gen_reg_rtx (HImode);
8467 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
8468 emit_insn (gen_x86_sahf_1 (scratch));
8469 }
8470
8471 /* The FP codes work out to act like unsigned. */
8472 intcmp_mode = fpcmp_mode;
8473 code = first_code;
8474 if (bypass_code != NIL)
8475 *bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode,
8476 gen_rtx_REG (intcmp_mode, FLAGS_REG),
8477 const0_rtx);
8478 if (second_code != NIL)
8479 *second_test = gen_rtx_fmt_ee (second_code, VOIDmode,
8480 gen_rtx_REG (intcmp_mode, FLAGS_REG),
8481 const0_rtx);
8482 }
8483 else
8484 {
8485 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
8486 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
8487 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
8488 if (!scratch)
8489 scratch = gen_reg_rtx (HImode);
8490 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
8491
8492 /* In the unordered case, we have to check C2 for NaN's, which
8493 doesn't happen to work out to anything nice combination-wise.
8494 So do some bit twiddling on the value we've got in AH to come
8495 up with an appropriate set of condition codes. */
8496
8497 intcmp_mode = CCNOmode;
8498 switch (code)
8499 {
8500 case GT:
8501 case UNGT:
8502 if (code == GT || !TARGET_IEEE_FP)
8503 {
8504 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
8505 code = EQ;
8506 }
8507 else
8508 {
8509 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8510 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
8511 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
8512 intcmp_mode = CCmode;
8513 code = GEU;
8514 }
8515 break;
8516 case LT:
8517 case UNLT:
8518 if (code == LT && TARGET_IEEE_FP)
8519 {
8520 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8521 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01)));
8522 intcmp_mode = CCmode;
8523 code = EQ;
8524 }
8525 else
8526 {
8527 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01)));
8528 code = NE;
8529 }
8530 break;
8531 case GE:
8532 case UNGE:
8533 if (code == GE || !TARGET_IEEE_FP)
8534 {
8535 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
8536 code = EQ;
8537 }
8538 else
8539 {
8540 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8541 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
8542 GEN_INT (0x01)));
8543 code = NE;
8544 }
8545 break;
8546 case LE:
8547 case UNLE:
8548 if (code == LE && TARGET_IEEE_FP)
8549 {
8550 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8551 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
8552 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
8553 intcmp_mode = CCmode;
8554 code = LTU;
8555 }
8556 else
8557 {
8558 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
8559 code = NE;
8560 }
8561 break;
8562 case EQ:
8563 case UNEQ:
8564 if (code == EQ && TARGET_IEEE_FP)
8565 {
8566 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8567 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
8568 intcmp_mode = CCmode;
8569 code = EQ;
8570 }
8571 else
8572 {
8573 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
8574 code = NE;
8575 break;
8576 }
8577 break;
8578 case NE:
8579 case LTGT:
8580 if (code == NE && TARGET_IEEE_FP)
8581 {
8582 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8583 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
8584 GEN_INT (0x40)));
8585 code = NE;
8586 }
8587 else
8588 {
8589 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
8590 code = EQ;
8591 }
8592 break;
8593
8594 case UNORDERED:
8595 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
8596 code = NE;
8597 break;
8598 case ORDERED:
8599 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
8600 code = EQ;
8601 break;
8602
8603 default:
8604 abort ();
8605 }
8606 }
8607
8608 /* Return the test that should be put into the flags user, i.e.
8609 the bcc, scc, or cmov instruction. */
8610 return gen_rtx_fmt_ee (code, VOIDmode,
8611 gen_rtx_REG (intcmp_mode, FLAGS_REG),
8612 const0_rtx);
8613 }
8614
8615 rtx
8616 ix86_expand_compare (code, second_test, bypass_test)
8617 enum rtx_code code;
8618 rtx *second_test, *bypass_test;
8619 {
8620 rtx op0, op1, ret;
8621 op0 = ix86_compare_op0;
8622 op1 = ix86_compare_op1;
8623
8624 if (second_test)
8625 *second_test = NULL_RTX;
8626 if (bypass_test)
8627 *bypass_test = NULL_RTX;
8628
8629 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
8630 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
8631 second_test, bypass_test);
8632 else
8633 ret = ix86_expand_int_compare (code, op0, op1);
8634
8635 return ret;
8636 }
8637
8638 /* Return true if the CODE will result in nontrivial jump sequence. */
8639 bool
8640 ix86_fp_jump_nontrivial_p (code)
8641 enum rtx_code code;
8642 {
8643 enum rtx_code bypass_code, first_code, second_code;
8644 if (!TARGET_CMOVE)
8645 return true;
8646 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8647 return bypass_code != NIL || second_code != NIL;
8648 }
8649
8650 void
8651 ix86_expand_branch (code, label)
8652 enum rtx_code code;
8653 rtx label;
8654 {
8655 rtx tmp;
8656
8657 switch (GET_MODE (ix86_compare_op0))
8658 {
8659 case QImode:
8660 case HImode:
8661 case SImode:
8662 simple:
8663 tmp = ix86_expand_compare (code, NULL, NULL);
8664 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
8665 gen_rtx_LABEL_REF (VOIDmode, label),
8666 pc_rtx);
8667 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
8668 return;
8669
8670 case SFmode:
8671 case DFmode:
8672 case XFmode:
8673 case TFmode:
8674 {
8675 rtvec vec;
8676 int use_fcomi;
8677 enum rtx_code bypass_code, first_code, second_code;
8678
8679 code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0,
8680 &ix86_compare_op1);
8681
8682 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8683
8684 /* Check whether we will use the natural sequence with one jump. If
8685 so, we can expand jump early. Otherwise delay expansion by
8686 creating compound insn to not confuse optimizers. */
8687 if (bypass_code == NIL && second_code == NIL
8688 && TARGET_CMOVE)
8689 {
8690 ix86_split_fp_branch (code, ix86_compare_op0, ix86_compare_op1,
8691 gen_rtx_LABEL_REF (VOIDmode, label),
8692 pc_rtx, NULL_RTX);
8693 }
8694 else
8695 {
8696 tmp = gen_rtx_fmt_ee (code, VOIDmode,
8697 ix86_compare_op0, ix86_compare_op1);
8698 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
8699 gen_rtx_LABEL_REF (VOIDmode, label),
8700 pc_rtx);
8701 tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp);
8702
8703 use_fcomi = ix86_use_fcomi_compare (code);
8704 vec = rtvec_alloc (3 + !use_fcomi);
8705 RTVEC_ELT (vec, 0) = tmp;
8706 RTVEC_ELT (vec, 1)
8707 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 18));
8708 RTVEC_ELT (vec, 2)
8709 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 17));
8710 if (! use_fcomi)
8711 RTVEC_ELT (vec, 3)
8712 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode));
8713
8714 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec));
8715 }
8716 return;
8717 }
8718
8719 case DImode:
8720 if (TARGET_64BIT)
8721 goto simple;
8722 /* Expand DImode branch into multiple compare+branch. */
8723 {
8724 rtx lo[2], hi[2], label2;
8725 enum rtx_code code1, code2, code3;
8726
8727 if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
8728 {
8729 tmp = ix86_compare_op0;
8730 ix86_compare_op0 = ix86_compare_op1;
8731 ix86_compare_op1 = tmp;
8732 code = swap_condition (code);
8733 }
8734 split_di (&ix86_compare_op0, 1, lo+0, hi+0);
8735 split_di (&ix86_compare_op1, 1, lo+1, hi+1);
8736
8737 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
8738 avoid two branches. This costs one extra insn, so disable when
8739 optimizing for size. */
8740
8741 if ((code == EQ || code == NE)
8742 && (!optimize_size
8743 || hi[1] == const0_rtx || lo[1] == const0_rtx))
8744 {
8745 rtx xor0, xor1;
8746
8747 xor1 = hi[0];
8748 if (hi[1] != const0_rtx)
8749 xor1 = expand_binop (SImode, xor_optab, xor1, hi[1],
8750 NULL_RTX, 0, OPTAB_WIDEN);
8751
8752 xor0 = lo[0];
8753 if (lo[1] != const0_rtx)
8754 xor0 = expand_binop (SImode, xor_optab, xor0, lo[1],
8755 NULL_RTX, 0, OPTAB_WIDEN);
8756
8757 tmp = expand_binop (SImode, ior_optab, xor1, xor0,
8758 NULL_RTX, 0, OPTAB_WIDEN);
8759
8760 ix86_compare_op0 = tmp;
8761 ix86_compare_op1 = const0_rtx;
8762 ix86_expand_branch (code, label);
8763 return;
8764 }
8765
8766 /* Otherwise, if we are doing less-than or greater-or-equal-than,
8767 op1 is a constant and the low word is zero, then we can just
8768 examine the high word. */
8769
8770 if (GET_CODE (hi[1]) == CONST_INT && lo[1] == const0_rtx)
8771 switch (code)
8772 {
8773 case LT: case LTU: case GE: case GEU:
8774 ix86_compare_op0 = hi[0];
8775 ix86_compare_op1 = hi[1];
8776 ix86_expand_branch (code, label);
8777 return;
8778 default:
8779 break;
8780 }
8781
8782 /* Otherwise, we need two or three jumps. */
8783
8784 label2 = gen_label_rtx ();
8785
8786 code1 = code;
8787 code2 = swap_condition (code);
8788 code3 = unsigned_condition (code);
8789
8790 switch (code)
8791 {
8792 case LT: case GT: case LTU: case GTU:
8793 break;
8794
8795 case LE: code1 = LT; code2 = GT; break;
8796 case GE: code1 = GT; code2 = LT; break;
8797 case LEU: code1 = LTU; code2 = GTU; break;
8798 case GEU: code1 = GTU; code2 = LTU; break;
8799
8800 case EQ: code1 = NIL; code2 = NE; break;
8801 case NE: code2 = NIL; break;
8802
8803 default:
8804 abort ();
8805 }
8806
8807 /*
8808 * a < b =>
8809 * if (hi(a) < hi(b)) goto true;
8810 * if (hi(a) > hi(b)) goto false;
8811 * if (lo(a) < lo(b)) goto true;
8812 * false:
8813 */
8814
8815 ix86_compare_op0 = hi[0];
8816 ix86_compare_op1 = hi[1];
8817
8818 if (code1 != NIL)
8819 ix86_expand_branch (code1, label);
8820 if (code2 != NIL)
8821 ix86_expand_branch (code2, label2);
8822
8823 ix86_compare_op0 = lo[0];
8824 ix86_compare_op1 = lo[1];
8825 ix86_expand_branch (code3, label);
8826
8827 if (code2 != NIL)
8828 emit_label (label2);
8829 return;
8830 }
8831
8832 default:
8833 abort ();
8834 }
8835 }
8836
8837 /* Split branch based on floating point condition. */
8838 void
8839 ix86_split_fp_branch (code, op1, op2, target1, target2, tmp)
8840 enum rtx_code code;
8841 rtx op1, op2, target1, target2, tmp;
8842 {
8843 rtx second, bypass;
8844 rtx label = NULL_RTX;
8845 rtx condition;
8846 int bypass_probability = -1, second_probability = -1, probability = -1;
8847 rtx i;
8848
8849 if (target2 != pc_rtx)
8850 {
8851 rtx tmp = target2;
8852 code = reverse_condition_maybe_unordered (code);
8853 target2 = target1;
8854 target1 = tmp;
8855 }
8856
8857 condition = ix86_expand_fp_compare (code, op1, op2,
8858 tmp, &second, &bypass);
8859
8860 if (split_branch_probability >= 0)
8861 {
8862 /* Distribute the probabilities across the jumps.
8863 Assume the BYPASS and SECOND to be always test
8864 for UNORDERED. */
8865 probability = split_branch_probability;
8866
8867 /* Value of 1 is low enough to make no need for probability
8868 to be updated. Later we may run some experiments and see
8869 if unordered values are more frequent in practice. */
8870 if (bypass)
8871 bypass_probability = 1;
8872 if (second)
8873 second_probability = 1;
8874 }
8875 if (bypass != NULL_RTX)
8876 {
8877 label = gen_label_rtx ();
8878 i = emit_jump_insn (gen_rtx_SET
8879 (VOIDmode, pc_rtx,
8880 gen_rtx_IF_THEN_ELSE (VOIDmode,
8881 bypass,
8882 gen_rtx_LABEL_REF (VOIDmode,
8883 label),
8884 pc_rtx)));
8885 if (bypass_probability >= 0)
8886 REG_NOTES (i)
8887 = gen_rtx_EXPR_LIST (REG_BR_PROB,
8888 GEN_INT (bypass_probability),
8889 REG_NOTES (i));
8890 }
8891 i = emit_jump_insn (gen_rtx_SET
8892 (VOIDmode, pc_rtx,
8893 gen_rtx_IF_THEN_ELSE (VOIDmode,
8894 condition, target1, target2)));
8895 if (probability >= 0)
8896 REG_NOTES (i)
8897 = gen_rtx_EXPR_LIST (REG_BR_PROB,
8898 GEN_INT (probability),
8899 REG_NOTES (i));
8900 if (second != NULL_RTX)
8901 {
8902 i = emit_jump_insn (gen_rtx_SET
8903 (VOIDmode, pc_rtx,
8904 gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1,
8905 target2)));
8906 if (second_probability >= 0)
8907 REG_NOTES (i)
8908 = gen_rtx_EXPR_LIST (REG_BR_PROB,
8909 GEN_INT (second_probability),
8910 REG_NOTES (i));
8911 }
8912 if (label != NULL_RTX)
8913 emit_label (label);
8914 }
8915
8916 int
8917 ix86_expand_setcc (code, dest)
8918 enum rtx_code code;
8919 rtx dest;
8920 {
8921 rtx ret, tmp, tmpreg;
8922 rtx second_test, bypass_test;
8923
8924 if (GET_MODE (ix86_compare_op0) == DImode
8925 && !TARGET_64BIT)
8926 return 0; /* FAIL */
8927
8928 if (GET_MODE (dest) != QImode)
8929 abort ();
8930
8931 ret = ix86_expand_compare (code, &second_test, &bypass_test);
8932 PUT_MODE (ret, QImode);
8933
8934 tmp = dest;
8935 tmpreg = dest;
8936
8937 emit_insn (gen_rtx_SET (VOIDmode, tmp, ret));
8938 if (bypass_test || second_test)
8939 {
8940 rtx test = second_test;
8941 int bypass = 0;
8942 rtx tmp2 = gen_reg_rtx (QImode);
8943 if (bypass_test)
8944 {
8945 if (second_test)
8946 abort ();
8947 test = bypass_test;
8948 bypass = 1;
8949 PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test)));
8950 }
8951 PUT_MODE (test, QImode);
8952 emit_insn (gen_rtx_SET (VOIDmode, tmp2, test));
8953
8954 if (bypass)
8955 emit_insn (gen_andqi3 (tmp, tmpreg, tmp2));
8956 else
8957 emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2));
8958 }
8959
8960 return 1; /* DONE */
8961 }
8962
8963 int
8964 ix86_expand_int_movcc (operands)
8965 rtx operands[];
8966 {
8967 enum rtx_code code = GET_CODE (operands[1]), compare_code;
8968 rtx compare_seq, compare_op;
8969 rtx second_test, bypass_test;
8970 enum machine_mode mode = GET_MODE (operands[0]);
8971
8972 /* When the compare code is not LTU or GEU, we can not use sbbl case.
8973 In case comparsion is done with immediate, we can convert it to LTU or
8974 GEU by altering the integer. */
8975
8976 if ((code == LEU || code == GTU)
8977 && GET_CODE (ix86_compare_op1) == CONST_INT
8978 && mode != HImode
8979 && INTVAL (ix86_compare_op1) != -1
8980 /* For x86-64, the immediate field in the instruction is 32-bit
8981 signed, so we can't increment a DImode value above 0x7fffffff. */
8982 && (!TARGET_64BIT
8983 || GET_MODE (ix86_compare_op0) != DImode
8984 || INTVAL (ix86_compare_op1) != 0x7fffffff)
8985 && GET_CODE (operands[2]) == CONST_INT
8986 && GET_CODE (operands[3]) == CONST_INT)
8987 {
8988 if (code == LEU)
8989 code = LTU;
8990 else
8991 code = GEU;
8992 ix86_compare_op1 = gen_int_mode (INTVAL (ix86_compare_op1) + 1,
8993 GET_MODE (ix86_compare_op0));
8994 }
8995
8996 start_sequence ();
8997 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
8998 compare_seq = get_insns ();
8999 end_sequence ();
9000
9001 compare_code = GET_CODE (compare_op);
9002
9003 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
9004 HImode insns, we'd be swallowed in word prefix ops. */
9005
9006 if (mode != HImode
9007 && (mode != DImode || TARGET_64BIT)
9008 && GET_CODE (operands[2]) == CONST_INT
9009 && GET_CODE (operands[3]) == CONST_INT)
9010 {
9011 rtx out = operands[0];
9012 HOST_WIDE_INT ct = INTVAL (operands[2]);
9013 HOST_WIDE_INT cf = INTVAL (operands[3]);
9014 HOST_WIDE_INT diff;
9015
9016 if ((compare_code == LTU || compare_code == GEU)
9017 && !second_test && !bypass_test)
9018 {
9019 /* Detect overlap between destination and compare sources. */
9020 rtx tmp = out;
9021
9022 /* To simplify rest of code, restrict to the GEU case. */
9023 if (compare_code == LTU)
9024 {
9025 HOST_WIDE_INT tmp = ct;
9026 ct = cf;
9027 cf = tmp;
9028 compare_code = reverse_condition (compare_code);
9029 code = reverse_condition (code);
9030 }
9031 diff = ct - cf;
9032
9033 if (reg_overlap_mentioned_p (out, ix86_compare_op0)
9034 || reg_overlap_mentioned_p (out, ix86_compare_op1))
9035 tmp = gen_reg_rtx (mode);
9036
9037 emit_insn (compare_seq);
9038 if (mode == DImode)
9039 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp));
9040 else
9041 emit_insn (gen_x86_movsicc_0_m1 (tmp));
9042
9043 if (diff == 1)
9044 {
9045 /*
9046 * cmpl op0,op1
9047 * sbbl dest,dest
9048 * [addl dest, ct]
9049 *
9050 * Size 5 - 8.
9051 */
9052 if (ct)
9053 tmp = expand_simple_binop (mode, PLUS,
9054 tmp, GEN_INT (ct),
9055 tmp, 1, OPTAB_DIRECT);
9056 }
9057 else if (cf == -1)
9058 {
9059 /*
9060 * cmpl op0,op1
9061 * sbbl dest,dest
9062 * orl $ct, dest
9063 *
9064 * Size 8.
9065 */
9066 tmp = expand_simple_binop (mode, IOR,
9067 tmp, GEN_INT (ct),
9068 tmp, 1, OPTAB_DIRECT);
9069 }
9070 else if (diff == -1 && ct)
9071 {
9072 /*
9073 * cmpl op0,op1
9074 * sbbl dest,dest
9075 * notl dest
9076 * [addl dest, cf]
9077 *
9078 * Size 8 - 11.
9079 */
9080 tmp = expand_simple_unop (mode, NOT, tmp, tmp, 1);
9081 if (cf)
9082 tmp = expand_simple_binop (mode, PLUS,
9083 tmp, GEN_INT (cf),
9084 tmp, 1, OPTAB_DIRECT);
9085 }
9086 else
9087 {
9088 /*
9089 * cmpl op0,op1
9090 * sbbl dest,dest
9091 * [notl dest]
9092 * andl cf - ct, dest
9093 * [addl dest, ct]
9094 *
9095 * Size 8 - 11.
9096 */
9097
9098 if (cf == 0)
9099 {
9100 cf = ct;
9101 ct = 0;
9102 tmp = expand_simple_unop (mode, NOT, tmp, tmp, 1);
9103 }
9104
9105 tmp = expand_simple_binop (mode, AND,
9106 tmp,
9107 gen_int_mode (cf - ct, mode),
9108 tmp, 1, OPTAB_DIRECT);
9109 if (ct)
9110 tmp = expand_simple_binop (mode, PLUS,
9111 tmp, GEN_INT (ct),
9112 tmp, 1, OPTAB_DIRECT);
9113 }
9114
9115 if (tmp != out)
9116 emit_move_insn (out, tmp);
9117
9118 return 1; /* DONE */
9119 }
9120
9121 diff = ct - cf;
9122 if (diff < 0)
9123 {
9124 HOST_WIDE_INT tmp;
9125 tmp = ct, ct = cf, cf = tmp;
9126 diff = -diff;
9127 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
9128 {
9129 /* We may be reversing unordered compare to normal compare, that
9130 is not valid in general (we may convert non-trapping condition
9131 to trapping one), however on i386 we currently emit all
9132 comparisons unordered. */
9133 compare_code = reverse_condition_maybe_unordered (compare_code);
9134 code = reverse_condition_maybe_unordered (code);
9135 }
9136 else
9137 {
9138 compare_code = reverse_condition (compare_code);
9139 code = reverse_condition (code);
9140 }
9141 }
9142
9143 compare_code = NIL;
9144 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_INT
9145 && GET_CODE (ix86_compare_op1) == CONST_INT)
9146 {
9147 if (ix86_compare_op1 == const0_rtx
9148 && (code == LT || code == GE))
9149 compare_code = code;
9150 else if (ix86_compare_op1 == constm1_rtx)
9151 {
9152 if (code == LE)
9153 compare_code = LT;
9154 else if (code == GT)
9155 compare_code = GE;
9156 }
9157 }
9158
9159 /* Optimize dest = (op0 < 0) ? -1 : cf. */
9160 if (compare_code != NIL
9161 && GET_MODE (ix86_compare_op0) == GET_MODE (out)
9162 && (cf == -1 || ct == -1))
9163 {
9164 /* If lea code below could be used, only optimize
9165 if it results in a 2 insn sequence. */
9166
9167 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
9168 || diff == 3 || diff == 5 || diff == 9)
9169 || (compare_code == LT && ct == -1)
9170 || (compare_code == GE && cf == -1))
9171 {
9172 /*
9173 * notl op1 (if necessary)
9174 * sarl $31, op1
9175 * orl cf, op1
9176 */
9177 if (ct != -1)
9178 {
9179 cf = ct;
9180 ct = -1;
9181 code = reverse_condition (code);
9182 }
9183
9184 out = emit_store_flag (out, code, ix86_compare_op0,
9185 ix86_compare_op1, VOIDmode, 0, -1);
9186
9187 out = expand_simple_binop (mode, IOR,
9188 out, GEN_INT (cf),
9189 out, 1, OPTAB_DIRECT);
9190 if (out != operands[0])
9191 emit_move_insn (operands[0], out);
9192
9193 return 1; /* DONE */
9194 }
9195 }
9196
9197 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
9198 || diff == 3 || diff == 5 || diff == 9)
9199 && (mode != DImode || x86_64_sign_extended_value (GEN_INT (cf))))
9200 {
9201 /*
9202 * xorl dest,dest
9203 * cmpl op1,op2
9204 * setcc dest
9205 * lea cf(dest*(ct-cf)),dest
9206 *
9207 * Size 14.
9208 *
9209 * This also catches the degenerate setcc-only case.
9210 */
9211
9212 rtx tmp;
9213 int nops;
9214
9215 out = emit_store_flag (out, code, ix86_compare_op0,
9216 ix86_compare_op1, VOIDmode, 0, 1);
9217
9218 nops = 0;
9219 /* On x86_64 the lea instruction operates on Pmode, so we need
9220 to get arithmetics done in proper mode to match. */
9221 if (diff == 1)
9222 tmp = out;
9223 else
9224 {
9225 rtx out1;
9226 out1 = out;
9227 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
9228 nops++;
9229 if (diff & 1)
9230 {
9231 tmp = gen_rtx_PLUS (mode, tmp, out1);
9232 nops++;
9233 }
9234 }
9235 if (cf != 0)
9236 {
9237 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
9238 nops++;
9239 }
9240 if (tmp != out
9241 && (GET_CODE (tmp) != SUBREG || SUBREG_REG (tmp) != out))
9242 {
9243 if (nops == 1)
9244 out = force_operand (tmp, out);
9245 else
9246 emit_insn (gen_rtx_SET (VOIDmode, out, tmp));
9247 }
9248 if (out != operands[0])
9249 emit_move_insn (operands[0], copy_rtx (out));
9250
9251 return 1; /* DONE */
9252 }
9253
9254 /*
9255 * General case: Jumpful:
9256 * xorl dest,dest cmpl op1, op2
9257 * cmpl op1, op2 movl ct, dest
9258 * setcc dest jcc 1f
9259 * decl dest movl cf, dest
9260 * andl (cf-ct),dest 1:
9261 * addl ct,dest
9262 *
9263 * Size 20. Size 14.
9264 *
9265 * This is reasonably steep, but branch mispredict costs are
9266 * high on modern cpus, so consider failing only if optimizing
9267 * for space.
9268 *
9269 * %%% Parameterize branch_cost on the tuning architecture, then
9270 * use that. The 80386 couldn't care less about mispredicts.
9271 */
9272
9273 if (!optimize_size && !TARGET_CMOVE)
9274 {
9275 if (cf == 0)
9276 {
9277 cf = ct;
9278 ct = 0;
9279 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
9280 /* We may be reversing unordered compare to normal compare,
9281 that is not valid in general (we may convert non-trapping
9282 condition to trapping one), however on i386 we currently
9283 emit all comparisons unordered. */
9284 code = reverse_condition_maybe_unordered (code);
9285 else
9286 {
9287 code = reverse_condition (code);
9288 if (compare_code != NIL)
9289 compare_code = reverse_condition (compare_code);
9290 }
9291 }
9292
9293 if (compare_code != NIL)
9294 {
9295 /* notl op1 (if needed)
9296 sarl $31, op1
9297 andl (cf-ct), op1
9298 addl ct, op1
9299
9300 For x < 0 (resp. x <= -1) there will be no notl,
9301 so if possible swap the constants to get rid of the
9302 complement.
9303 True/false will be -1/0 while code below (store flag
9304 followed by decrement) is 0/-1, so the constants need
9305 to be exchanged once more. */
9306
9307 if (compare_code == GE || !cf)
9308 {
9309 code = reverse_condition (code);
9310 compare_code = LT;
9311 }
9312 else
9313 {
9314 HOST_WIDE_INT tmp = cf;
9315 cf = ct;
9316 ct = tmp;
9317 }
9318
9319 out = emit_store_flag (out, code, ix86_compare_op0,
9320 ix86_compare_op1, VOIDmode, 0, -1);
9321 }
9322 else
9323 {
9324 out = emit_store_flag (out, code, ix86_compare_op0,
9325 ix86_compare_op1, VOIDmode, 0, 1);
9326
9327 out = expand_simple_binop (mode, PLUS, out, constm1_rtx,
9328 out, 1, OPTAB_DIRECT);
9329 }
9330
9331 out = expand_simple_binop (mode, AND, out,
9332 gen_int_mode (cf - ct, mode),
9333 out, 1, OPTAB_DIRECT);
9334 if (ct)
9335 out = expand_simple_binop (mode, PLUS, out, GEN_INT (ct),
9336 out, 1, OPTAB_DIRECT);
9337 if (out != operands[0])
9338 emit_move_insn (operands[0], out);
9339
9340 return 1; /* DONE */
9341 }
9342 }
9343
9344 if (!TARGET_CMOVE)
9345 {
9346 /* Try a few things more with specific constants and a variable. */
9347
9348 optab op;
9349 rtx var, orig_out, out, tmp;
9350
9351 if (optimize_size)
9352 return 0; /* FAIL */
9353
9354 /* If one of the two operands is an interesting constant, load a
9355 constant with the above and mask it in with a logical operation. */
9356
9357 if (GET_CODE (operands[2]) == CONST_INT)
9358 {
9359 var = operands[3];
9360 if (INTVAL (operands[2]) == 0)
9361 operands[3] = constm1_rtx, op = and_optab;
9362 else if (INTVAL (operands[2]) == -1)
9363 operands[3] = const0_rtx, op = ior_optab;
9364 else
9365 return 0; /* FAIL */
9366 }
9367 else if (GET_CODE (operands[3]) == CONST_INT)
9368 {
9369 var = operands[2];
9370 if (INTVAL (operands[3]) == 0)
9371 operands[2] = constm1_rtx, op = and_optab;
9372 else if (INTVAL (operands[3]) == -1)
9373 operands[2] = const0_rtx, op = ior_optab;
9374 else
9375 return 0; /* FAIL */
9376 }
9377 else
9378 return 0; /* FAIL */
9379
9380 orig_out = operands[0];
9381 tmp = gen_reg_rtx (mode);
9382 operands[0] = tmp;
9383
9384 /* Recurse to get the constant loaded. */
9385 if (ix86_expand_int_movcc (operands) == 0)
9386 return 0; /* FAIL */
9387
9388 /* Mask in the interesting variable. */
9389 out = expand_binop (mode, op, var, tmp, orig_out, 0,
9390 OPTAB_WIDEN);
9391 if (out != orig_out)
9392 emit_move_insn (orig_out, out);
9393
9394 return 1; /* DONE */
9395 }
9396
9397 /*
9398 * For comparison with above,
9399 *
9400 * movl cf,dest
9401 * movl ct,tmp
9402 * cmpl op1,op2
9403 * cmovcc tmp,dest
9404 *
9405 * Size 15.
9406 */
9407
9408 if (! nonimmediate_operand (operands[2], mode))
9409 operands[2] = force_reg (mode, operands[2]);
9410 if (! nonimmediate_operand (operands[3], mode))
9411 operands[3] = force_reg (mode, operands[3]);
9412
9413 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
9414 {
9415 rtx tmp = gen_reg_rtx (mode);
9416 emit_move_insn (tmp, operands[3]);
9417 operands[3] = tmp;
9418 }
9419 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
9420 {
9421 rtx tmp = gen_reg_rtx (mode);
9422 emit_move_insn (tmp, operands[2]);
9423 operands[2] = tmp;
9424 }
9425 if (! register_operand (operands[2], VOIDmode)
9426 && ! register_operand (operands[3], VOIDmode))
9427 operands[2] = force_reg (mode, operands[2]);
9428
9429 emit_insn (compare_seq);
9430 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
9431 gen_rtx_IF_THEN_ELSE (mode,
9432 compare_op, operands[2],
9433 operands[3])));
9434 if (bypass_test)
9435 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
9436 gen_rtx_IF_THEN_ELSE (mode,
9437 bypass_test,
9438 operands[3],
9439 operands[0])));
9440 if (second_test)
9441 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
9442 gen_rtx_IF_THEN_ELSE (mode,
9443 second_test,
9444 operands[2],
9445 operands[0])));
9446
9447 return 1; /* DONE */
9448 }
9449
9450 int
9451 ix86_expand_fp_movcc (operands)
9452 rtx operands[];
9453 {
9454 enum rtx_code code;
9455 rtx tmp;
9456 rtx compare_op, second_test, bypass_test;
9457
9458 /* For SF/DFmode conditional moves based on comparisons
9459 in same mode, we may want to use SSE min/max instructions. */
9460 if (((TARGET_SSE_MATH && GET_MODE (operands[0]) == SFmode)
9461 || (TARGET_SSE2 && TARGET_SSE_MATH && GET_MODE (operands[0]) == DFmode))
9462 && GET_MODE (ix86_compare_op0) == GET_MODE (operands[0])
9463 /* The SSE comparisons does not support the LTGT/UNEQ pair. */
9464 && (!TARGET_IEEE_FP
9465 || (GET_CODE (operands[1]) != LTGT && GET_CODE (operands[1]) != UNEQ))
9466 /* We may be called from the post-reload splitter. */
9467 && (!REG_P (operands[0])
9468 || SSE_REG_P (operands[0])
9469 || REGNO (operands[0]) >= FIRST_PSEUDO_REGISTER))
9470 {
9471 rtx op0 = ix86_compare_op0, op1 = ix86_compare_op1;
9472 code = GET_CODE (operands[1]);
9473
9474 /* See if we have (cross) match between comparison operands and
9475 conditional move operands. */
9476 if (rtx_equal_p (operands[2], op1))
9477 {
9478 rtx tmp = op0;
9479 op0 = op1;
9480 op1 = tmp;
9481 code = reverse_condition_maybe_unordered (code);
9482 }
9483 if (rtx_equal_p (operands[2], op0) && rtx_equal_p (operands[3], op1))
9484 {
9485 /* Check for min operation. */
9486 if (code == LT)
9487 {
9488 operands[0] = force_reg (GET_MODE (operands[0]), operands[0]);
9489 if (memory_operand (op0, VOIDmode))
9490 op0 = force_reg (GET_MODE (operands[0]), op0);
9491 if (GET_MODE (operands[0]) == SFmode)
9492 emit_insn (gen_minsf3 (operands[0], op0, op1));
9493 else
9494 emit_insn (gen_mindf3 (operands[0], op0, op1));
9495 return 1;
9496 }
9497 /* Check for max operation. */
9498 if (code == GT)
9499 {
9500 operands[0] = force_reg (GET_MODE (operands[0]), operands[0]);
9501 if (memory_operand (op0, VOIDmode))
9502 op0 = force_reg (GET_MODE (operands[0]), op0);
9503 if (GET_MODE (operands[0]) == SFmode)
9504 emit_insn (gen_maxsf3 (operands[0], op0, op1));
9505 else
9506 emit_insn (gen_maxdf3 (operands[0], op0, op1));
9507 return 1;
9508 }
9509 }
9510 /* Manage condition to be sse_comparison_operator. In case we are
9511 in non-ieee mode, try to canonicalize the destination operand
9512 to be first in the comparison - this helps reload to avoid extra
9513 moves. */
9514 if (!sse_comparison_operator (operands[1], VOIDmode)
9515 || (rtx_equal_p (operands[0], ix86_compare_op1) && !TARGET_IEEE_FP))
9516 {
9517 rtx tmp = ix86_compare_op0;
9518 ix86_compare_op0 = ix86_compare_op1;
9519 ix86_compare_op1 = tmp;
9520 operands[1] = gen_rtx_fmt_ee (swap_condition (GET_CODE (operands[1])),
9521 VOIDmode, ix86_compare_op0,
9522 ix86_compare_op1);
9523 }
9524 /* Similary try to manage result to be first operand of conditional
9525 move. We also don't support the NE comparison on SSE, so try to
9526 avoid it. */
9527 if ((rtx_equal_p (operands[0], operands[3])
9528 && (!TARGET_IEEE_FP || GET_CODE (operands[1]) != EQ))
9529 || (GET_CODE (operands[1]) == NE && TARGET_IEEE_FP))
9530 {
9531 rtx tmp = operands[2];
9532 operands[2] = operands[3];
9533 operands[3] = tmp;
9534 operands[1] = gen_rtx_fmt_ee (reverse_condition_maybe_unordered
9535 (GET_CODE (operands[1])),
9536 VOIDmode, ix86_compare_op0,
9537 ix86_compare_op1);
9538 }
9539 if (GET_MODE (operands[0]) == SFmode)
9540 emit_insn (gen_sse_movsfcc (operands[0], operands[1],
9541 operands[2], operands[3],
9542 ix86_compare_op0, ix86_compare_op1));
9543 else
9544 emit_insn (gen_sse_movdfcc (operands[0], operands[1],
9545 operands[2], operands[3],
9546 ix86_compare_op0, ix86_compare_op1));
9547 return 1;
9548 }
9549
9550 /* The floating point conditional move instructions don't directly
9551 support conditions resulting from a signed integer comparison. */
9552
9553 code = GET_CODE (operands[1]);
9554 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
9555
9556 /* The floating point conditional move instructions don't directly
9557 support signed integer comparisons. */
9558
9559 if (!fcmov_comparison_operator (compare_op, VOIDmode))
9560 {
9561 if (second_test != NULL || bypass_test != NULL)
9562 abort ();
9563 tmp = gen_reg_rtx (QImode);
9564 ix86_expand_setcc (code, tmp);
9565 code = NE;
9566 ix86_compare_op0 = tmp;
9567 ix86_compare_op1 = const0_rtx;
9568 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
9569 }
9570 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
9571 {
9572 tmp = gen_reg_rtx (GET_MODE (operands[0]));
9573 emit_move_insn (tmp, operands[3]);
9574 operands[3] = tmp;
9575 }
9576 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
9577 {
9578 tmp = gen_reg_rtx (GET_MODE (operands[0]));
9579 emit_move_insn (tmp, operands[2]);
9580 operands[2] = tmp;
9581 }
9582
9583 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
9584 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
9585 compare_op,
9586 operands[2],
9587 operands[3])));
9588 if (bypass_test)
9589 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
9590 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
9591 bypass_test,
9592 operands[3],
9593 operands[0])));
9594 if (second_test)
9595 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
9596 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
9597 second_test,
9598 operands[2],
9599 operands[0])));
9600
9601 return 1;
9602 }
9603
9604 /* Split operands 0 and 1 into SImode parts. Similar to split_di, but
9605 works for floating pointer parameters and nonoffsetable memories.
9606 For pushes, it returns just stack offsets; the values will be saved
9607 in the right order. Maximally three parts are generated. */
9608
9609 static int
9610 ix86_split_to_parts (operand, parts, mode)
9611 rtx operand;
9612 rtx *parts;
9613 enum machine_mode mode;
9614 {
9615 int size;
9616
9617 if (!TARGET_64BIT)
9618 size = mode == TFmode ? 3 : (GET_MODE_SIZE (mode) / 4);
9619 else
9620 size = (GET_MODE_SIZE (mode) + 4) / 8;
9621
9622 if (GET_CODE (operand) == REG && MMX_REGNO_P (REGNO (operand)))
9623 abort ();
9624 if (size < 2 || size > 3)
9625 abort ();
9626
9627 /* Optimize constant pool reference to immediates. This is used by fp
9628 moves, that force all constants to memory to allow combining. */
9629 if (GET_CODE (operand) == MEM && RTX_UNCHANGING_P (operand))
9630 {
9631 rtx tmp = maybe_get_pool_constant (operand);
9632 if (tmp)
9633 operand = tmp;
9634 }
9635
9636 if (GET_CODE (operand) == MEM && !offsettable_memref_p (operand))
9637 {
9638 /* The only non-offsetable memories we handle are pushes. */
9639 if (! push_operand (operand, VOIDmode))
9640 abort ();
9641
9642 operand = copy_rtx (operand);
9643 PUT_MODE (operand, Pmode);
9644 parts[0] = parts[1] = parts[2] = operand;
9645 }
9646 else if (!TARGET_64BIT)
9647 {
9648 if (mode == DImode)
9649 split_di (&operand, 1, &parts[0], &parts[1]);
9650 else
9651 {
9652 if (REG_P (operand))
9653 {
9654 if (!reload_completed)
9655 abort ();
9656 parts[0] = gen_rtx_REG (SImode, REGNO (operand) + 0);
9657 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
9658 if (size == 3)
9659 parts[2] = gen_rtx_REG (SImode, REGNO (operand) + 2);
9660 }
9661 else if (offsettable_memref_p (operand))
9662 {
9663 operand = adjust_address (operand, SImode, 0);
9664 parts[0] = operand;
9665 parts[1] = adjust_address (operand, SImode, 4);
9666 if (size == 3)
9667 parts[2] = adjust_address (operand, SImode, 8);
9668 }
9669 else if (GET_CODE (operand) == CONST_DOUBLE)
9670 {
9671 REAL_VALUE_TYPE r;
9672 long l[4];
9673
9674 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
9675 switch (mode)
9676 {
9677 case XFmode:
9678 case TFmode:
9679 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
9680 parts[2] = gen_int_mode (l[2], SImode);
9681 break;
9682 case DFmode:
9683 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
9684 break;
9685 default:
9686 abort ();
9687 }
9688 parts[1] = gen_int_mode (l[1], SImode);
9689 parts[0] = gen_int_mode (l[0], SImode);
9690 }
9691 else
9692 abort ();
9693 }
9694 }
9695 else
9696 {
9697 if (mode == TImode)
9698 split_ti (&operand, 1, &parts[0], &parts[1]);
9699 if (mode == XFmode || mode == TFmode)
9700 {
9701 if (REG_P (operand))
9702 {
9703 if (!reload_completed)
9704 abort ();
9705 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
9706 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
9707 }
9708 else if (offsettable_memref_p (operand))
9709 {
9710 operand = adjust_address (operand, DImode, 0);
9711 parts[0] = operand;
9712 parts[1] = adjust_address (operand, SImode, 8);
9713 }
9714 else if (GET_CODE (operand) == CONST_DOUBLE)
9715 {
9716 REAL_VALUE_TYPE r;
9717 long l[3];
9718
9719 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
9720 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
9721 /* Do not use shift by 32 to avoid warning on 32bit systems. */
9722 if (HOST_BITS_PER_WIDE_INT >= 64)
9723 parts[0]
9724 = gen_int_mode
9725 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
9726 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
9727 DImode);
9728 else
9729 parts[0] = immed_double_const (l[0], l[1], DImode);
9730 parts[1] = gen_int_mode (l[2], SImode);
9731 }
9732 else
9733 abort ();
9734 }
9735 }
9736
9737 return size;
9738 }
9739
9740 /* Emit insns to perform a move or push of DI, DF, and XF values.
9741 Return false when normal moves are needed; true when all required
9742 insns have been emitted. Operands 2-4 contain the input values
9743 int the correct order; operands 5-7 contain the output values. */
9744
9745 void
9746 ix86_split_long_move (operands)
9747 rtx operands[];
9748 {
9749 rtx part[2][3];
9750 int nparts;
9751 int push = 0;
9752 int collisions = 0;
9753 enum machine_mode mode = GET_MODE (operands[0]);
9754
9755 /* The DFmode expanders may ask us to move double.
9756 For 64bit target this is single move. By hiding the fact
9757 here we simplify i386.md splitters. */
9758 if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT)
9759 {
9760 /* Optimize constant pool reference to immediates. This is used by
9761 fp moves, that force all constants to memory to allow combining. */
9762
9763 if (GET_CODE (operands[1]) == MEM
9764 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
9765 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
9766 operands[1] = get_pool_constant (XEXP (operands[1], 0));
9767 if (push_operand (operands[0], VOIDmode))
9768 {
9769 operands[0] = copy_rtx (operands[0]);
9770 PUT_MODE (operands[0], Pmode);
9771 }
9772 else
9773 operands[0] = gen_lowpart (DImode, operands[0]);
9774 operands[1] = gen_lowpart (DImode, operands[1]);
9775 emit_move_insn (operands[0], operands[1]);
9776 return;
9777 }
9778
9779 /* The only non-offsettable memory we handle is push. */
9780 if (push_operand (operands[0], VOIDmode))
9781 push = 1;
9782 else if (GET_CODE (operands[0]) == MEM
9783 && ! offsettable_memref_p (operands[0]))
9784 abort ();
9785
9786 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
9787 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
9788
9789 /* When emitting push, take care for source operands on the stack. */
9790 if (push && GET_CODE (operands[1]) == MEM
9791 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
9792 {
9793 if (nparts == 3)
9794 part[1][1] = change_address (part[1][1], GET_MODE (part[1][1]),
9795 XEXP (part[1][2], 0));
9796 part[1][0] = change_address (part[1][0], GET_MODE (part[1][0]),
9797 XEXP (part[1][1], 0));
9798 }
9799
9800 /* We need to do copy in the right order in case an address register
9801 of the source overlaps the destination. */
9802 if (REG_P (part[0][0]) && GET_CODE (part[1][0]) == MEM)
9803 {
9804 if (reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0)))
9805 collisions++;
9806 if (reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
9807 collisions++;
9808 if (nparts == 3
9809 && reg_overlap_mentioned_p (part[0][2], XEXP (part[1][0], 0)))
9810 collisions++;
9811
9812 /* Collision in the middle part can be handled by reordering. */
9813 if (collisions == 1 && nparts == 3
9814 && reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
9815 {
9816 rtx tmp;
9817 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
9818 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
9819 }
9820
9821 /* If there are more collisions, we can't handle it by reordering.
9822 Do an lea to the last part and use only one colliding move. */
9823 else if (collisions > 1)
9824 {
9825 collisions = 1;
9826 emit_insn (gen_rtx_SET (VOIDmode, part[0][nparts - 1],
9827 XEXP (part[1][0], 0)));
9828 part[1][0] = change_address (part[1][0],
9829 TARGET_64BIT ? DImode : SImode,
9830 part[0][nparts - 1]);
9831 part[1][1] = adjust_address (part[1][0], VOIDmode, UNITS_PER_WORD);
9832 if (nparts == 3)
9833 part[1][2] = adjust_address (part[1][0], VOIDmode, 8);
9834 }
9835 }
9836
9837 if (push)
9838 {
9839 if (!TARGET_64BIT)
9840 {
9841 if (nparts == 3)
9842 {
9843 /* We use only first 12 bytes of TFmode value, but for pushing we
9844 are required to adjust stack as if we were pushing real 16byte
9845 value. */
9846 if (mode == TFmode && !TARGET_64BIT)
9847 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
9848 GEN_INT (-4)));
9849 emit_move_insn (part[0][2], part[1][2]);
9850 }
9851 }
9852 else
9853 {
9854 /* In 64bit mode we don't have 32bit push available. In case this is
9855 register, it is OK - we will just use larger counterpart. We also
9856 retype memory - these comes from attempt to avoid REX prefix on
9857 moving of second half of TFmode value. */
9858 if (GET_MODE (part[1][1]) == SImode)
9859 {
9860 if (GET_CODE (part[1][1]) == MEM)
9861 part[1][1] = adjust_address (part[1][1], DImode, 0);
9862 else if (REG_P (part[1][1]))
9863 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
9864 else
9865 abort ();
9866 if (GET_MODE (part[1][0]) == SImode)
9867 part[1][0] = part[1][1];
9868 }
9869 }
9870 emit_move_insn (part[0][1], part[1][1]);
9871 emit_move_insn (part[0][0], part[1][0]);
9872 return;
9873 }
9874
9875 /* Choose correct order to not overwrite the source before it is copied. */
9876 if ((REG_P (part[0][0])
9877 && REG_P (part[1][1])
9878 && (REGNO (part[0][0]) == REGNO (part[1][1])
9879 || (nparts == 3
9880 && REGNO (part[0][0]) == REGNO (part[1][2]))))
9881 || (collisions > 0
9882 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
9883 {
9884 if (nparts == 3)
9885 {
9886 operands[2] = part[0][2];
9887 operands[3] = part[0][1];
9888 operands[4] = part[0][0];
9889 operands[5] = part[1][2];
9890 operands[6] = part[1][1];
9891 operands[7] = part[1][0];
9892 }
9893 else
9894 {
9895 operands[2] = part[0][1];
9896 operands[3] = part[0][0];
9897 operands[5] = part[1][1];
9898 operands[6] = part[1][0];
9899 }
9900 }
9901 else
9902 {
9903 if (nparts == 3)
9904 {
9905 operands[2] = part[0][0];
9906 operands[3] = part[0][1];
9907 operands[4] = part[0][2];
9908 operands[5] = part[1][0];
9909 operands[6] = part[1][1];
9910 operands[7] = part[1][2];
9911 }
9912 else
9913 {
9914 operands[2] = part[0][0];
9915 operands[3] = part[0][1];
9916 operands[5] = part[1][0];
9917 operands[6] = part[1][1];
9918 }
9919 }
9920 emit_move_insn (operands[2], operands[5]);
9921 emit_move_insn (operands[3], operands[6]);
9922 if (nparts == 3)
9923 emit_move_insn (operands[4], operands[7]);
9924
9925 return;
9926 }
9927
9928 void
9929 ix86_split_ashldi (operands, scratch)
9930 rtx *operands, scratch;
9931 {
9932 rtx low[2], high[2];
9933 int count;
9934
9935 if (GET_CODE (operands[2]) == CONST_INT)
9936 {
9937 split_di (operands, 2, low, high);
9938 count = INTVAL (operands[2]) & 63;
9939
9940 if (count >= 32)
9941 {
9942 emit_move_insn (high[0], low[1]);
9943 emit_move_insn (low[0], const0_rtx);
9944
9945 if (count > 32)
9946 emit_insn (gen_ashlsi3 (high[0], high[0], GEN_INT (count - 32)));
9947 }
9948 else
9949 {
9950 if (!rtx_equal_p (operands[0], operands[1]))
9951 emit_move_insn (operands[0], operands[1]);
9952 emit_insn (gen_x86_shld_1 (high[0], low[0], GEN_INT (count)));
9953 emit_insn (gen_ashlsi3 (low[0], low[0], GEN_INT (count)));
9954 }
9955 }
9956 else
9957 {
9958 if (!rtx_equal_p (operands[0], operands[1]))
9959 emit_move_insn (operands[0], operands[1]);
9960
9961 split_di (operands, 1, low, high);
9962
9963 emit_insn (gen_x86_shld_1 (high[0], low[0], operands[2]));
9964 emit_insn (gen_ashlsi3 (low[0], low[0], operands[2]));
9965
9966 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
9967 {
9968 if (! no_new_pseudos)
9969 scratch = force_reg (SImode, const0_rtx);
9970 else
9971 emit_move_insn (scratch, const0_rtx);
9972
9973 emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2],
9974 scratch));
9975 }
9976 else
9977 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
9978 }
9979 }
9980
9981 void
9982 ix86_split_ashrdi (operands, scratch)
9983 rtx *operands, scratch;
9984 {
9985 rtx low[2], high[2];
9986 int count;
9987
9988 if (GET_CODE (operands[2]) == CONST_INT)
9989 {
9990 split_di (operands, 2, low, high);
9991 count = INTVAL (operands[2]) & 63;
9992
9993 if (count >= 32)
9994 {
9995 emit_move_insn (low[0], high[1]);
9996
9997 if (! reload_completed)
9998 emit_insn (gen_ashrsi3 (high[0], low[0], GEN_INT (31)));
9999 else
10000 {
10001 emit_move_insn (high[0], low[0]);
10002 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (31)));
10003 }
10004
10005 if (count > 32)
10006 emit_insn (gen_ashrsi3 (low[0], low[0], GEN_INT (count - 32)));
10007 }
10008 else
10009 {
10010 if (!rtx_equal_p (operands[0], operands[1]))
10011 emit_move_insn (operands[0], operands[1]);
10012 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
10013 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (count)));
10014 }
10015 }
10016 else
10017 {
10018 if (!rtx_equal_p (operands[0], operands[1]))
10019 emit_move_insn (operands[0], operands[1]);
10020
10021 split_di (operands, 1, low, high);
10022
10023 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
10024 emit_insn (gen_ashrsi3 (high[0], high[0], operands[2]));
10025
10026 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
10027 {
10028 if (! no_new_pseudos)
10029 scratch = gen_reg_rtx (SImode);
10030 emit_move_insn (scratch, high[0]);
10031 emit_insn (gen_ashrsi3 (scratch, scratch, GEN_INT (31)));
10032 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
10033 scratch));
10034 }
10035 else
10036 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
10037 }
10038 }
10039
10040 void
10041 ix86_split_lshrdi (operands, scratch)
10042 rtx *operands, scratch;
10043 {
10044 rtx low[2], high[2];
10045 int count;
10046
10047 if (GET_CODE (operands[2]) == CONST_INT)
10048 {
10049 split_di (operands, 2, low, high);
10050 count = INTVAL (operands[2]) & 63;
10051
10052 if (count >= 32)
10053 {
10054 emit_move_insn (low[0], high[1]);
10055 emit_move_insn (high[0], const0_rtx);
10056
10057 if (count > 32)
10058 emit_insn (gen_lshrsi3 (low[0], low[0], GEN_INT (count - 32)));
10059 }
10060 else
10061 {
10062 if (!rtx_equal_p (operands[0], operands[1]))
10063 emit_move_insn (operands[0], operands[1]);
10064 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
10065 emit_insn (gen_lshrsi3 (high[0], high[0], GEN_INT (count)));
10066 }
10067 }
10068 else
10069 {
10070 if (!rtx_equal_p (operands[0], operands[1]))
10071 emit_move_insn (operands[0], operands[1]);
10072
10073 split_di (operands, 1, low, high);
10074
10075 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
10076 emit_insn (gen_lshrsi3 (high[0], high[0], operands[2]));
10077
10078 /* Heh. By reversing the arguments, we can reuse this pattern. */
10079 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
10080 {
10081 if (! no_new_pseudos)
10082 scratch = force_reg (SImode, const0_rtx);
10083 else
10084 emit_move_insn (scratch, const0_rtx);
10085
10086 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
10087 scratch));
10088 }
10089 else
10090 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
10091 }
10092 }
10093
10094 /* Helper function for the string operations below. Dest VARIABLE whether
10095 it is aligned to VALUE bytes. If true, jump to the label. */
10096 static rtx
10097 ix86_expand_aligntest (variable, value)
10098 rtx variable;
10099 int value;
10100 {
10101 rtx label = gen_label_rtx ();
10102 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
10103 if (GET_MODE (variable) == DImode)
10104 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
10105 else
10106 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
10107 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
10108 1, label);
10109 return label;
10110 }
10111
10112 /* Adjust COUNTER by the VALUE. */
10113 static void
10114 ix86_adjust_counter (countreg, value)
10115 rtx countreg;
10116 HOST_WIDE_INT value;
10117 {
10118 if (GET_MODE (countreg) == DImode)
10119 emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value)));
10120 else
10121 emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value)));
10122 }
10123
10124 /* Zero extend possibly SImode EXP to Pmode register. */
10125 rtx
10126 ix86_zero_extend_to_Pmode (exp)
10127 rtx exp;
10128 {
10129 rtx r;
10130 if (GET_MODE (exp) == VOIDmode)
10131 return force_reg (Pmode, exp);
10132 if (GET_MODE (exp) == Pmode)
10133 return copy_to_mode_reg (Pmode, exp);
10134 r = gen_reg_rtx (Pmode);
10135 emit_insn (gen_zero_extendsidi2 (r, exp));
10136 return r;
10137 }
10138
10139 /* Expand string move (memcpy) operation. Use i386 string operations when
10140 profitable. expand_clrstr contains similar code. */
10141 int
10142 ix86_expand_movstr (dst, src, count_exp, align_exp)
10143 rtx dst, src, count_exp, align_exp;
10144 {
10145 rtx srcreg, destreg, countreg;
10146 enum machine_mode counter_mode;
10147 HOST_WIDE_INT align = 0;
10148 unsigned HOST_WIDE_INT count = 0;
10149 rtx insns;
10150
10151 start_sequence ();
10152
10153 if (GET_CODE (align_exp) == CONST_INT)
10154 align = INTVAL (align_exp);
10155
10156 /* This simple hack avoids all inlining code and simplifies code below. */
10157 if (!TARGET_ALIGN_STRINGOPS)
10158 align = 64;
10159
10160 if (GET_CODE (count_exp) == CONST_INT)
10161 count = INTVAL (count_exp);
10162
10163 /* Figure out proper mode for counter. For 32bits it is always SImode,
10164 for 64bits use SImode when possible, otherwise DImode.
10165 Set count to number of bytes copied when known at compile time. */
10166 if (!TARGET_64BIT || GET_MODE (count_exp) == SImode
10167 || x86_64_zero_extended_value (count_exp))
10168 counter_mode = SImode;
10169 else
10170 counter_mode = DImode;
10171
10172 if (counter_mode != SImode && counter_mode != DImode)
10173 abort ();
10174
10175 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
10176 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
10177
10178 emit_insn (gen_cld ());
10179
10180 /* When optimizing for size emit simple rep ; movsb instruction for
10181 counts not divisible by 4. */
10182
10183 if ((!optimize || optimize_size) && (count == 0 || (count & 0x03)))
10184 {
10185 countreg = ix86_zero_extend_to_Pmode (count_exp);
10186 if (TARGET_64BIT)
10187 emit_insn (gen_rep_movqi_rex64 (destreg, srcreg, countreg,
10188 destreg, srcreg, countreg));
10189 else
10190 emit_insn (gen_rep_movqi (destreg, srcreg, countreg,
10191 destreg, srcreg, countreg));
10192 }
10193
10194 /* For constant aligned (or small unaligned) copies use rep movsl
10195 followed by code copying the rest. For PentiumPro ensure 8 byte
10196 alignment to allow rep movsl acceleration. */
10197
10198 else if (count != 0
10199 && (align >= 8
10200 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
10201 || optimize_size || count < (unsigned int) 64))
10202 {
10203 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
10204 if (count & ~(size - 1))
10205 {
10206 countreg = copy_to_mode_reg (counter_mode,
10207 GEN_INT ((count >> (size == 4 ? 2 : 3))
10208 & (TARGET_64BIT ? -1 : 0x3fffffff)));
10209 countreg = ix86_zero_extend_to_Pmode (countreg);
10210 if (size == 4)
10211 {
10212 if (TARGET_64BIT)
10213 emit_insn (gen_rep_movsi_rex64 (destreg, srcreg, countreg,
10214 destreg, srcreg, countreg));
10215 else
10216 emit_insn (gen_rep_movsi (destreg, srcreg, countreg,
10217 destreg, srcreg, countreg));
10218 }
10219 else
10220 emit_insn (gen_rep_movdi_rex64 (destreg, srcreg, countreg,
10221 destreg, srcreg, countreg));
10222 }
10223 if (size == 8 && (count & 0x04))
10224 emit_insn (gen_strmovsi (destreg, srcreg));
10225 if (count & 0x02)
10226 emit_insn (gen_strmovhi (destreg, srcreg));
10227 if (count & 0x01)
10228 emit_insn (gen_strmovqi (destreg, srcreg));
10229 }
10230 /* The generic code based on the glibc implementation:
10231 - align destination to 4 bytes (8 byte alignment is used for PentiumPro
10232 allowing accelerated copying there)
10233 - copy the data using rep movsl
10234 - copy the rest. */
10235 else
10236 {
10237 rtx countreg2;
10238 rtx label = NULL;
10239 int desired_alignment = (TARGET_PENTIUMPRO
10240 && (count == 0 || count >= (unsigned int) 260)
10241 ? 8 : UNITS_PER_WORD);
10242
10243 /* In case we don't know anything about the alignment, default to
10244 library version, since it is usually equally fast and result in
10245 shorter code. */
10246 if (!TARGET_INLINE_ALL_STRINGOPS && align < UNITS_PER_WORD)
10247 {
10248 end_sequence ();
10249 return 0;
10250 }
10251
10252 if (TARGET_SINGLE_STRINGOP)
10253 emit_insn (gen_cld ());
10254
10255 countreg2 = gen_reg_rtx (Pmode);
10256 countreg = copy_to_mode_reg (counter_mode, count_exp);
10257
10258 /* We don't use loops to align destination and to copy parts smaller
10259 than 4 bytes, because gcc is able to optimize such code better (in
10260 the case the destination or the count really is aligned, gcc is often
10261 able to predict the branches) and also it is friendlier to the
10262 hardware branch prediction.
10263
10264 Using loops is benefical for generic case, because we can
10265 handle small counts using the loops. Many CPUs (such as Athlon)
10266 have large REP prefix setup costs.
10267
10268 This is quite costy. Maybe we can revisit this decision later or
10269 add some customizability to this code. */
10270
10271 if (count == 0 && align < desired_alignment)
10272 {
10273 label = gen_label_rtx ();
10274 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
10275 LEU, 0, counter_mode, 1, label);
10276 }
10277 if (align <= 1)
10278 {
10279 rtx label = ix86_expand_aligntest (destreg, 1);
10280 emit_insn (gen_strmovqi (destreg, srcreg));
10281 ix86_adjust_counter (countreg, 1);
10282 emit_label (label);
10283 LABEL_NUSES (label) = 1;
10284 }
10285 if (align <= 2)
10286 {
10287 rtx label = ix86_expand_aligntest (destreg, 2);
10288 emit_insn (gen_strmovhi (destreg, srcreg));
10289 ix86_adjust_counter (countreg, 2);
10290 emit_label (label);
10291 LABEL_NUSES (label) = 1;
10292 }
10293 if (align <= 4 && desired_alignment > 4)
10294 {
10295 rtx label = ix86_expand_aligntest (destreg, 4);
10296 emit_insn (gen_strmovsi (destreg, srcreg));
10297 ix86_adjust_counter (countreg, 4);
10298 emit_label (label);
10299 LABEL_NUSES (label) = 1;
10300 }
10301
10302 if (label && desired_alignment > 4 && !TARGET_64BIT)
10303 {
10304 emit_label (label);
10305 LABEL_NUSES (label) = 1;
10306 label = NULL_RTX;
10307 }
10308 if (!TARGET_SINGLE_STRINGOP)
10309 emit_insn (gen_cld ());
10310 if (TARGET_64BIT)
10311 {
10312 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
10313 GEN_INT (3)));
10314 emit_insn (gen_rep_movdi_rex64 (destreg, srcreg, countreg2,
10315 destreg, srcreg, countreg2));
10316 }
10317 else
10318 {
10319 emit_insn (gen_lshrsi3 (countreg2, countreg, GEN_INT (2)));
10320 emit_insn (gen_rep_movsi (destreg, srcreg, countreg2,
10321 destreg, srcreg, countreg2));
10322 }
10323
10324 if (label)
10325 {
10326 emit_label (label);
10327 LABEL_NUSES (label) = 1;
10328 }
10329 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
10330 emit_insn (gen_strmovsi (destreg, srcreg));
10331 if ((align <= 4 || count == 0) && TARGET_64BIT)
10332 {
10333 rtx label = ix86_expand_aligntest (countreg, 4);
10334 emit_insn (gen_strmovsi (destreg, srcreg));
10335 emit_label (label);
10336 LABEL_NUSES (label) = 1;
10337 }
10338 if (align > 2 && count != 0 && (count & 2))
10339 emit_insn (gen_strmovhi (destreg, srcreg));
10340 if (align <= 2 || count == 0)
10341 {
10342 rtx label = ix86_expand_aligntest (countreg, 2);
10343 emit_insn (gen_strmovhi (destreg, srcreg));
10344 emit_label (label);
10345 LABEL_NUSES (label) = 1;
10346 }
10347 if (align > 1 && count != 0 && (count & 1))
10348 emit_insn (gen_strmovqi (destreg, srcreg));
10349 if (align <= 1 || count == 0)
10350 {
10351 rtx label = ix86_expand_aligntest (countreg, 1);
10352 emit_insn (gen_strmovqi (destreg, srcreg));
10353 emit_label (label);
10354 LABEL_NUSES (label) = 1;
10355 }
10356 }
10357
10358 insns = get_insns ();
10359 end_sequence ();
10360
10361 ix86_set_move_mem_attrs (insns, dst, src, destreg, srcreg);
10362 emit_insn (insns);
10363 return 1;
10364 }
10365
10366 /* Expand string clear operation (bzero). Use i386 string operations when
10367 profitable. expand_movstr contains similar code. */
10368 int
10369 ix86_expand_clrstr (src, count_exp, align_exp)
10370 rtx src, count_exp, align_exp;
10371 {
10372 rtx destreg, zeroreg, countreg;
10373 enum machine_mode counter_mode;
10374 HOST_WIDE_INT align = 0;
10375 unsigned HOST_WIDE_INT count = 0;
10376
10377 if (GET_CODE (align_exp) == CONST_INT)
10378 align = INTVAL (align_exp);
10379
10380 /* This simple hack avoids all inlining code and simplifies code below. */
10381 if (!TARGET_ALIGN_STRINGOPS)
10382 align = 32;
10383
10384 if (GET_CODE (count_exp) == CONST_INT)
10385 count = INTVAL (count_exp);
10386 /* Figure out proper mode for counter. For 32bits it is always SImode,
10387 for 64bits use SImode when possible, otherwise DImode.
10388 Set count to number of bytes copied when known at compile time. */
10389 if (!TARGET_64BIT || GET_MODE (count_exp) == SImode
10390 || x86_64_zero_extended_value (count_exp))
10391 counter_mode = SImode;
10392 else
10393 counter_mode = DImode;
10394
10395 destreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
10396
10397 emit_insn (gen_cld ());
10398
10399 /* When optimizing for size emit simple rep ; movsb instruction for
10400 counts not divisible by 4. */
10401
10402 if ((!optimize || optimize_size) && (count == 0 || (count & 0x03)))
10403 {
10404 countreg = ix86_zero_extend_to_Pmode (count_exp);
10405 zeroreg = copy_to_mode_reg (QImode, const0_rtx);
10406 if (TARGET_64BIT)
10407 emit_insn (gen_rep_stosqi_rex64 (destreg, countreg, zeroreg,
10408 destreg, countreg));
10409 else
10410 emit_insn (gen_rep_stosqi (destreg, countreg, zeroreg,
10411 destreg, countreg));
10412 }
10413 else if (count != 0
10414 && (align >= 8
10415 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
10416 || optimize_size || count < (unsigned int) 64))
10417 {
10418 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
10419 zeroreg = copy_to_mode_reg (size == 4 ? SImode : DImode, const0_rtx);
10420 if (count & ~(size - 1))
10421 {
10422 countreg = copy_to_mode_reg (counter_mode,
10423 GEN_INT ((count >> (size == 4 ? 2 : 3))
10424 & (TARGET_64BIT ? -1 : 0x3fffffff)));
10425 countreg = ix86_zero_extend_to_Pmode (countreg);
10426 if (size == 4)
10427 {
10428 if (TARGET_64BIT)
10429 emit_insn (gen_rep_stossi_rex64 (destreg, countreg, zeroreg,
10430 destreg, countreg));
10431 else
10432 emit_insn (gen_rep_stossi (destreg, countreg, zeroreg,
10433 destreg, countreg));
10434 }
10435 else
10436 emit_insn (gen_rep_stosdi_rex64 (destreg, countreg, zeroreg,
10437 destreg, countreg));
10438 }
10439 if (size == 8 && (count & 0x04))
10440 emit_insn (gen_strsetsi (destreg,
10441 gen_rtx_SUBREG (SImode, zeroreg, 0)));
10442 if (count & 0x02)
10443 emit_insn (gen_strsethi (destreg,
10444 gen_rtx_SUBREG (HImode, zeroreg, 0)));
10445 if (count & 0x01)
10446 emit_insn (gen_strsetqi (destreg,
10447 gen_rtx_SUBREG (QImode, zeroreg, 0)));
10448 }
10449 else
10450 {
10451 rtx countreg2;
10452 rtx label = NULL;
10453 /* Compute desired alignment of the string operation. */
10454 int desired_alignment = (TARGET_PENTIUMPRO
10455 && (count == 0 || count >= (unsigned int) 260)
10456 ? 8 : UNITS_PER_WORD);
10457
10458 /* In case we don't know anything about the alignment, default to
10459 library version, since it is usually equally fast and result in
10460 shorter code. */
10461 if (!TARGET_INLINE_ALL_STRINGOPS && align < UNITS_PER_WORD)
10462 return 0;
10463
10464 if (TARGET_SINGLE_STRINGOP)
10465 emit_insn (gen_cld ());
10466
10467 countreg2 = gen_reg_rtx (Pmode);
10468 countreg = copy_to_mode_reg (counter_mode, count_exp);
10469 zeroreg = copy_to_mode_reg (Pmode, const0_rtx);
10470
10471 if (count == 0 && align < desired_alignment)
10472 {
10473 label = gen_label_rtx ();
10474 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
10475 LEU, 0, counter_mode, 1, label);
10476 }
10477 if (align <= 1)
10478 {
10479 rtx label = ix86_expand_aligntest (destreg, 1);
10480 emit_insn (gen_strsetqi (destreg,
10481 gen_rtx_SUBREG (QImode, zeroreg, 0)));
10482 ix86_adjust_counter (countreg, 1);
10483 emit_label (label);
10484 LABEL_NUSES (label) = 1;
10485 }
10486 if (align <= 2)
10487 {
10488 rtx label = ix86_expand_aligntest (destreg, 2);
10489 emit_insn (gen_strsethi (destreg,
10490 gen_rtx_SUBREG (HImode, zeroreg, 0)));
10491 ix86_adjust_counter (countreg, 2);
10492 emit_label (label);
10493 LABEL_NUSES (label) = 1;
10494 }
10495 if (align <= 4 && desired_alignment > 4)
10496 {
10497 rtx label = ix86_expand_aligntest (destreg, 4);
10498 emit_insn (gen_strsetsi (destreg, (TARGET_64BIT
10499 ? gen_rtx_SUBREG (SImode, zeroreg, 0)
10500 : zeroreg)));
10501 ix86_adjust_counter (countreg, 4);
10502 emit_label (label);
10503 LABEL_NUSES (label) = 1;
10504 }
10505
10506 if (label && desired_alignment > 4 && !TARGET_64BIT)
10507 {
10508 emit_label (label);
10509 LABEL_NUSES (label) = 1;
10510 label = NULL_RTX;
10511 }
10512
10513 if (!TARGET_SINGLE_STRINGOP)
10514 emit_insn (gen_cld ());
10515 if (TARGET_64BIT)
10516 {
10517 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
10518 GEN_INT (3)));
10519 emit_insn (gen_rep_stosdi_rex64 (destreg, countreg2, zeroreg,
10520 destreg, countreg2));
10521 }
10522 else
10523 {
10524 emit_insn (gen_lshrsi3 (countreg2, countreg, GEN_INT (2)));
10525 emit_insn (gen_rep_stossi (destreg, countreg2, zeroreg,
10526 destreg, countreg2));
10527 }
10528 if (label)
10529 {
10530 emit_label (label);
10531 LABEL_NUSES (label) = 1;
10532 }
10533
10534 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
10535 emit_insn (gen_strsetsi (destreg,
10536 gen_rtx_SUBREG (SImode, zeroreg, 0)));
10537 if (TARGET_64BIT && (align <= 4 || count == 0))
10538 {
10539 rtx label = ix86_expand_aligntest (countreg, 4);
10540 emit_insn (gen_strsetsi (destreg,
10541 gen_rtx_SUBREG (SImode, zeroreg, 0)));
10542 emit_label (label);
10543 LABEL_NUSES (label) = 1;
10544 }
10545 if (align > 2 && count != 0 && (count & 2))
10546 emit_insn (gen_strsethi (destreg,
10547 gen_rtx_SUBREG (HImode, zeroreg, 0)));
10548 if (align <= 2 || count == 0)
10549 {
10550 rtx label = ix86_expand_aligntest (countreg, 2);
10551 emit_insn (gen_strsethi (destreg,
10552 gen_rtx_SUBREG (HImode, zeroreg, 0)));
10553 emit_label (label);
10554 LABEL_NUSES (label) = 1;
10555 }
10556 if (align > 1 && count != 0 && (count & 1))
10557 emit_insn (gen_strsetqi (destreg,
10558 gen_rtx_SUBREG (QImode, zeroreg, 0)));
10559 if (align <= 1 || count == 0)
10560 {
10561 rtx label = ix86_expand_aligntest (countreg, 1);
10562 emit_insn (gen_strsetqi (destreg,
10563 gen_rtx_SUBREG (QImode, zeroreg, 0)));
10564 emit_label (label);
10565 LABEL_NUSES (label) = 1;
10566 }
10567 }
10568 return 1;
10569 }
10570 /* Expand strlen. */
10571 int
10572 ix86_expand_strlen (out, src, eoschar, align)
10573 rtx out, src, eoschar, align;
10574 {
10575 rtx addr, scratch1, scratch2, scratch3, scratch4;
10576
10577 /* The generic case of strlen expander is long. Avoid it's
10578 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
10579
10580 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
10581 && !TARGET_INLINE_ALL_STRINGOPS
10582 && !optimize_size
10583 && (GET_CODE (align) != CONST_INT || INTVAL (align) < 4))
10584 return 0;
10585
10586 addr = force_reg (Pmode, XEXP (src, 0));
10587 scratch1 = gen_reg_rtx (Pmode);
10588
10589 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
10590 && !optimize_size)
10591 {
10592 /* Well it seems that some optimizer does not combine a call like
10593 foo(strlen(bar), strlen(bar));
10594 when the move and the subtraction is done here. It does calculate
10595 the length just once when these instructions are done inside of
10596 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
10597 often used and I use one fewer register for the lifetime of
10598 output_strlen_unroll() this is better. */
10599
10600 emit_move_insn (out, addr);
10601
10602 ix86_expand_strlensi_unroll_1 (out, align);
10603
10604 /* strlensi_unroll_1 returns the address of the zero at the end of
10605 the string, like memchr(), so compute the length by subtracting
10606 the start address. */
10607 if (TARGET_64BIT)
10608 emit_insn (gen_subdi3 (out, out, addr));
10609 else
10610 emit_insn (gen_subsi3 (out, out, addr));
10611 }
10612 else
10613 {
10614 scratch2 = gen_reg_rtx (Pmode);
10615 scratch3 = gen_reg_rtx (Pmode);
10616 scratch4 = force_reg (Pmode, constm1_rtx);
10617
10618 emit_move_insn (scratch3, addr);
10619 eoschar = force_reg (QImode, eoschar);
10620
10621 emit_insn (gen_cld ());
10622 if (TARGET_64BIT)
10623 {
10624 emit_insn (gen_strlenqi_rex_1 (scratch1, scratch3, eoschar,
10625 align, scratch4, scratch3));
10626 emit_insn (gen_one_cmpldi2 (scratch2, scratch1));
10627 emit_insn (gen_adddi3 (out, scratch2, constm1_rtx));
10628 }
10629 else
10630 {
10631 emit_insn (gen_strlenqi_1 (scratch1, scratch3, eoschar,
10632 align, scratch4, scratch3));
10633 emit_insn (gen_one_cmplsi2 (scratch2, scratch1));
10634 emit_insn (gen_addsi3 (out, scratch2, constm1_rtx));
10635 }
10636 }
10637 return 1;
10638 }
10639
10640 /* Expand the appropriate insns for doing strlen if not just doing
10641 repnz; scasb
10642
10643 out = result, initialized with the start address
10644 align_rtx = alignment of the address.
10645 scratch = scratch register, initialized with the startaddress when
10646 not aligned, otherwise undefined
10647
10648 This is just the body. It needs the initialisations mentioned above and
10649 some address computing at the end. These things are done in i386.md. */
10650
10651 static void
10652 ix86_expand_strlensi_unroll_1 (out, align_rtx)
10653 rtx out, align_rtx;
10654 {
10655 int align;
10656 rtx tmp;
10657 rtx align_2_label = NULL_RTX;
10658 rtx align_3_label = NULL_RTX;
10659 rtx align_4_label = gen_label_rtx ();
10660 rtx end_0_label = gen_label_rtx ();
10661 rtx mem;
10662 rtx tmpreg = gen_reg_rtx (SImode);
10663 rtx scratch = gen_reg_rtx (SImode);
10664
10665 align = 0;
10666 if (GET_CODE (align_rtx) == CONST_INT)
10667 align = INTVAL (align_rtx);
10668
10669 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
10670
10671 /* Is there a known alignment and is it less than 4? */
10672 if (align < 4)
10673 {
10674 rtx scratch1 = gen_reg_rtx (Pmode);
10675 emit_move_insn (scratch1, out);
10676 /* Is there a known alignment and is it not 2? */
10677 if (align != 2)
10678 {
10679 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
10680 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
10681
10682 /* Leave just the 3 lower bits. */
10683 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
10684 NULL_RTX, 0, OPTAB_WIDEN);
10685
10686 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
10687 Pmode, 1, align_4_label);
10688 emit_cmp_and_jump_insns (align_rtx, GEN_INT (2), EQ, NULL,
10689 Pmode, 1, align_2_label);
10690 emit_cmp_and_jump_insns (align_rtx, GEN_INT (2), GTU, NULL,
10691 Pmode, 1, align_3_label);
10692 }
10693 else
10694 {
10695 /* Since the alignment is 2, we have to check 2 or 0 bytes;
10696 check if is aligned to 4 - byte. */
10697
10698 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (2),
10699 NULL_RTX, 0, OPTAB_WIDEN);
10700
10701 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
10702 Pmode, 1, align_4_label);
10703 }
10704
10705 mem = gen_rtx_MEM (QImode, out);
10706
10707 /* Now compare the bytes. */
10708
10709 /* Compare the first n unaligned byte on a byte per byte basis. */
10710 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
10711 QImode, 1, end_0_label);
10712
10713 /* Increment the address. */
10714 if (TARGET_64BIT)
10715 emit_insn (gen_adddi3 (out, out, const1_rtx));
10716 else
10717 emit_insn (gen_addsi3 (out, out, const1_rtx));
10718
10719 /* Not needed with an alignment of 2 */
10720 if (align != 2)
10721 {
10722 emit_label (align_2_label);
10723
10724 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
10725 end_0_label);
10726
10727 if (TARGET_64BIT)
10728 emit_insn (gen_adddi3 (out, out, const1_rtx));
10729 else
10730 emit_insn (gen_addsi3 (out, out, const1_rtx));
10731
10732 emit_label (align_3_label);
10733 }
10734
10735 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
10736 end_0_label);
10737
10738 if (TARGET_64BIT)
10739 emit_insn (gen_adddi3 (out, out, const1_rtx));
10740 else
10741 emit_insn (gen_addsi3 (out, out, const1_rtx));
10742 }
10743
10744 /* Generate loop to check 4 bytes at a time. It is not a good idea to
10745 align this loop. It gives only huge programs, but does not help to
10746 speed up. */
10747 emit_label (align_4_label);
10748
10749 mem = gen_rtx_MEM (SImode, out);
10750 emit_move_insn (scratch, mem);
10751 if (TARGET_64BIT)
10752 emit_insn (gen_adddi3 (out, out, GEN_INT (4)));
10753 else
10754 emit_insn (gen_addsi3 (out, out, GEN_INT (4)));
10755
10756 /* This formula yields a nonzero result iff one of the bytes is zero.
10757 This saves three branches inside loop and many cycles. */
10758
10759 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
10760 emit_insn (gen_one_cmplsi2 (scratch, scratch));
10761 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
10762 emit_insn (gen_andsi3 (tmpreg, tmpreg,
10763 gen_int_mode (0x80808080, SImode)));
10764 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
10765 align_4_label);
10766
10767 if (TARGET_CMOVE)
10768 {
10769 rtx reg = gen_reg_rtx (SImode);
10770 rtx reg2 = gen_reg_rtx (Pmode);
10771 emit_move_insn (reg, tmpreg);
10772 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
10773
10774 /* If zero is not in the first two bytes, move two bytes forward. */
10775 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
10776 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
10777 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
10778 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
10779 gen_rtx_IF_THEN_ELSE (SImode, tmp,
10780 reg,
10781 tmpreg)));
10782 /* Emit lea manually to avoid clobbering of flags. */
10783 emit_insn (gen_rtx_SET (SImode, reg2,
10784 gen_rtx_PLUS (Pmode, out, GEN_INT (2))));
10785
10786 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
10787 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
10788 emit_insn (gen_rtx_SET (VOIDmode, out,
10789 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
10790 reg2,
10791 out)));
10792
10793 }
10794 else
10795 {
10796 rtx end_2_label = gen_label_rtx ();
10797 /* Is zero in the first two bytes? */
10798
10799 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
10800 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
10801 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
10802 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
10803 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
10804 pc_rtx);
10805 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
10806 JUMP_LABEL (tmp) = end_2_label;
10807
10808 /* Not in the first two. Move two bytes forward. */
10809 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
10810 if (TARGET_64BIT)
10811 emit_insn (gen_adddi3 (out, out, GEN_INT (2)));
10812 else
10813 emit_insn (gen_addsi3 (out, out, GEN_INT (2)));
10814
10815 emit_label (end_2_label);
10816
10817 }
10818
10819 /* Avoid branch in fixing the byte. */
10820 tmpreg = gen_lowpart (QImode, tmpreg);
10821 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
10822 if (TARGET_64BIT)
10823 emit_insn (gen_subdi3_carry_rex64 (out, out, GEN_INT (3)));
10824 else
10825 emit_insn (gen_subsi3_carry (out, out, GEN_INT (3)));
10826
10827 emit_label (end_0_label);
10828 }
10829
10830 void
10831 ix86_expand_call (retval, fnaddr, callarg1, callarg2, pop)
10832 rtx retval, fnaddr, callarg1, callarg2, pop;
10833 {
10834 rtx use = NULL, call;
10835
10836 if (pop == const0_rtx)
10837 pop = NULL;
10838 if (TARGET_64BIT && pop)
10839 abort ();
10840
10841 #if TARGET_MACHO
10842 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
10843 fnaddr = machopic_indirect_call_target (fnaddr);
10844 #else
10845 /* Static functions and indirect calls don't need the pic register. */
10846 if (! TARGET_64BIT && flag_pic
10847 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
10848 && ! SYMBOL_REF_FLAG (XEXP (fnaddr, 0)))
10849 use_reg (&use, pic_offset_table_rtx);
10850
10851 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
10852 {
10853 rtx al = gen_rtx_REG (QImode, 0);
10854 emit_move_insn (al, callarg2);
10855 use_reg (&use, al);
10856 }
10857 #endif /* TARGET_MACHO */
10858
10859 if (! call_insn_operand (XEXP (fnaddr, 0), Pmode))
10860 {
10861 fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
10862 fnaddr = gen_rtx_MEM (QImode, fnaddr);
10863 }
10864
10865 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
10866 if (retval)
10867 call = gen_rtx_SET (VOIDmode, retval, call);
10868 if (pop)
10869 {
10870 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
10871 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
10872 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop));
10873 }
10874
10875 call = emit_call_insn (call);
10876 if (use)
10877 CALL_INSN_FUNCTION_USAGE (call) = use;
10878 }
10879
10880 \f
10881 /* Clear stack slot assignments remembered from previous functions.
10882 This is called from INIT_EXPANDERS once before RTL is emitted for each
10883 function. */
10884
10885 static struct machine_function *
10886 ix86_init_machine_status ()
10887 {
10888 return ggc_alloc_cleared (sizeof (struct machine_function));
10889 }
10890
10891 /* Return a MEM corresponding to a stack slot with mode MODE.
10892 Allocate a new slot if necessary.
10893
10894 The RTL for a function can have several slots available: N is
10895 which slot to use. */
10896
10897 rtx
10898 assign_386_stack_local (mode, n)
10899 enum machine_mode mode;
10900 int n;
10901 {
10902 if (n < 0 || n >= MAX_386_STACK_LOCALS)
10903 abort ();
10904
10905 if (ix86_stack_locals[(int) mode][n] == NULL_RTX)
10906 ix86_stack_locals[(int) mode][n]
10907 = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
10908
10909 return ix86_stack_locals[(int) mode][n];
10910 }
10911
10912 /* Construct the SYMBOL_REF for the tls_get_addr function. */
10913
10914 static GTY(()) rtx ix86_tls_symbol;
10915 rtx
10916 ix86_tls_get_addr ()
10917 {
10918
10919 if (!ix86_tls_symbol)
10920 {
10921 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode,
10922 (TARGET_GNU_TLS && !TARGET_64BIT)
10923 ? "___tls_get_addr"
10924 : "__tls_get_addr");
10925 }
10926
10927 return ix86_tls_symbol;
10928 }
10929 \f
10930 /* Calculate the length of the memory address in the instruction
10931 encoding. Does not include the one-byte modrm, opcode, or prefix. */
10932
10933 static int
10934 memory_address_length (addr)
10935 rtx addr;
10936 {
10937 struct ix86_address parts;
10938 rtx base, index, disp;
10939 int len;
10940
10941 if (GET_CODE (addr) == PRE_DEC
10942 || GET_CODE (addr) == POST_INC
10943 || GET_CODE (addr) == PRE_MODIFY
10944 || GET_CODE (addr) == POST_MODIFY)
10945 return 0;
10946
10947 if (! ix86_decompose_address (addr, &parts))
10948 abort ();
10949
10950 base = parts.base;
10951 index = parts.index;
10952 disp = parts.disp;
10953 len = 0;
10954
10955 /* Register Indirect. */
10956 if (base && !index && !disp)
10957 {
10958 /* Special cases: ebp and esp need the two-byte modrm form. */
10959 if (addr == stack_pointer_rtx
10960 || addr == arg_pointer_rtx
10961 || addr == frame_pointer_rtx
10962 || addr == hard_frame_pointer_rtx)
10963 len = 1;
10964 }
10965
10966 /* Direct Addressing. */
10967 else if (disp && !base && !index)
10968 len = 4;
10969
10970 else
10971 {
10972 /* Find the length of the displacement constant. */
10973 if (disp)
10974 {
10975 if (GET_CODE (disp) == CONST_INT
10976 && CONST_OK_FOR_LETTER_P (INTVAL (disp), 'K'))
10977 len = 1;
10978 else
10979 len = 4;
10980 }
10981
10982 /* An index requires the two-byte modrm form. */
10983 if (index)
10984 len += 1;
10985 }
10986
10987 return len;
10988 }
10989
10990 /* Compute default value for "length_immediate" attribute. When SHORTFORM
10991 is set, expect that insn have 8bit immediate alternative. */
10992 int
10993 ix86_attr_length_immediate_default (insn, shortform)
10994 rtx insn;
10995 int shortform;
10996 {
10997 int len = 0;
10998 int i;
10999 extract_insn_cached (insn);
11000 for (i = recog_data.n_operands - 1; i >= 0; --i)
11001 if (CONSTANT_P (recog_data.operand[i]))
11002 {
11003 if (len)
11004 abort ();
11005 if (shortform
11006 && GET_CODE (recog_data.operand[i]) == CONST_INT
11007 && CONST_OK_FOR_LETTER_P (INTVAL (recog_data.operand[i]), 'K'))
11008 len = 1;
11009 else
11010 {
11011 switch (get_attr_mode (insn))
11012 {
11013 case MODE_QI:
11014 len+=1;
11015 break;
11016 case MODE_HI:
11017 len+=2;
11018 break;
11019 case MODE_SI:
11020 len+=4;
11021 break;
11022 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
11023 case MODE_DI:
11024 len+=4;
11025 break;
11026 default:
11027 fatal_insn ("unknown insn mode", insn);
11028 }
11029 }
11030 }
11031 return len;
11032 }
11033 /* Compute default value for "length_address" attribute. */
11034 int
11035 ix86_attr_length_address_default (insn)
11036 rtx insn;
11037 {
11038 int i;
11039 extract_insn_cached (insn);
11040 for (i = recog_data.n_operands - 1; i >= 0; --i)
11041 if (GET_CODE (recog_data.operand[i]) == MEM)
11042 {
11043 return memory_address_length (XEXP (recog_data.operand[i], 0));
11044 break;
11045 }
11046 return 0;
11047 }
11048 \f
11049 /* Return the maximum number of instructions a cpu can issue. */
11050
11051 static int
11052 ix86_issue_rate ()
11053 {
11054 switch (ix86_cpu)
11055 {
11056 case PROCESSOR_PENTIUM:
11057 case PROCESSOR_K6:
11058 return 2;
11059
11060 case PROCESSOR_PENTIUMPRO:
11061 case PROCESSOR_PENTIUM4:
11062 case PROCESSOR_ATHLON:
11063 return 3;
11064
11065 default:
11066 return 1;
11067 }
11068 }
11069
11070 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
11071 by DEP_INSN and nothing set by DEP_INSN. */
11072
11073 static int
11074 ix86_flags_dependant (insn, dep_insn, insn_type)
11075 rtx insn, dep_insn;
11076 enum attr_type insn_type;
11077 {
11078 rtx set, set2;
11079
11080 /* Simplify the test for uninteresting insns. */
11081 if (insn_type != TYPE_SETCC
11082 && insn_type != TYPE_ICMOV
11083 && insn_type != TYPE_FCMOV
11084 && insn_type != TYPE_IBR)
11085 return 0;
11086
11087 if ((set = single_set (dep_insn)) != 0)
11088 {
11089 set = SET_DEST (set);
11090 set2 = NULL_RTX;
11091 }
11092 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
11093 && XVECLEN (PATTERN (dep_insn), 0) == 2
11094 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
11095 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
11096 {
11097 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
11098 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
11099 }
11100 else
11101 return 0;
11102
11103 if (GET_CODE (set) != REG || REGNO (set) != FLAGS_REG)
11104 return 0;
11105
11106 /* This test is true if the dependent insn reads the flags but
11107 not any other potentially set register. */
11108 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
11109 return 0;
11110
11111 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
11112 return 0;
11113
11114 return 1;
11115 }
11116
11117 /* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
11118 address with operands set by DEP_INSN. */
11119
11120 static int
11121 ix86_agi_dependant (insn, dep_insn, insn_type)
11122 rtx insn, dep_insn;
11123 enum attr_type insn_type;
11124 {
11125 rtx addr;
11126
11127 if (insn_type == TYPE_LEA
11128 && TARGET_PENTIUM)
11129 {
11130 addr = PATTERN (insn);
11131 if (GET_CODE (addr) == SET)
11132 ;
11133 else if (GET_CODE (addr) == PARALLEL
11134 && GET_CODE (XVECEXP (addr, 0, 0)) == SET)
11135 addr = XVECEXP (addr, 0, 0);
11136 else
11137 abort ();
11138 addr = SET_SRC (addr);
11139 }
11140 else
11141 {
11142 int i;
11143 extract_insn_cached (insn);
11144 for (i = recog_data.n_operands - 1; i >= 0; --i)
11145 if (GET_CODE (recog_data.operand[i]) == MEM)
11146 {
11147 addr = XEXP (recog_data.operand[i], 0);
11148 goto found;
11149 }
11150 return 0;
11151 found:;
11152 }
11153
11154 return modified_in_p (addr, dep_insn);
11155 }
11156
11157 static int
11158 ix86_adjust_cost (insn, link, dep_insn, cost)
11159 rtx insn, link, dep_insn;
11160 int cost;
11161 {
11162 enum attr_type insn_type, dep_insn_type;
11163 enum attr_memory memory, dep_memory;
11164 rtx set, set2;
11165 int dep_insn_code_number;
11166
11167 /* Anti and output depenancies have zero cost on all CPUs. */
11168 if (REG_NOTE_KIND (link) != 0)
11169 return 0;
11170
11171 dep_insn_code_number = recog_memoized (dep_insn);
11172
11173 /* If we can't recognize the insns, we can't really do anything. */
11174 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
11175 return cost;
11176
11177 insn_type = get_attr_type (insn);
11178 dep_insn_type = get_attr_type (dep_insn);
11179
11180 switch (ix86_cpu)
11181 {
11182 case PROCESSOR_PENTIUM:
11183 /* Address Generation Interlock adds a cycle of latency. */
11184 if (ix86_agi_dependant (insn, dep_insn, insn_type))
11185 cost += 1;
11186
11187 /* ??? Compares pair with jump/setcc. */
11188 if (ix86_flags_dependant (insn, dep_insn, insn_type))
11189 cost = 0;
11190
11191 /* Floating point stores require value to be ready one cycle ealier. */
11192 if (insn_type == TYPE_FMOV
11193 && get_attr_memory (insn) == MEMORY_STORE
11194 && !ix86_agi_dependant (insn, dep_insn, insn_type))
11195 cost += 1;
11196 break;
11197
11198 case PROCESSOR_PENTIUMPRO:
11199 memory = get_attr_memory (insn);
11200 dep_memory = get_attr_memory (dep_insn);
11201
11202 /* Since we can't represent delayed latencies of load+operation,
11203 increase the cost here for non-imov insns. */
11204 if (dep_insn_type != TYPE_IMOV
11205 && dep_insn_type != TYPE_FMOV
11206 && (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH))
11207 cost += 1;
11208
11209 /* INT->FP conversion is expensive. */
11210 if (get_attr_fp_int_src (dep_insn))
11211 cost += 5;
11212
11213 /* There is one cycle extra latency between an FP op and a store. */
11214 if (insn_type == TYPE_FMOV
11215 && (set = single_set (dep_insn)) != NULL_RTX
11216 && (set2 = single_set (insn)) != NULL_RTX
11217 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
11218 && GET_CODE (SET_DEST (set2)) == MEM)
11219 cost += 1;
11220
11221 /* Show ability of reorder buffer to hide latency of load by executing
11222 in parallel with previous instruction in case
11223 previous instruction is not needed to compute the address. */
11224 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
11225 && !ix86_agi_dependant (insn, dep_insn, insn_type))
11226 {
11227 /* Claim moves to take one cycle, as core can issue one load
11228 at time and the next load can start cycle later. */
11229 if (dep_insn_type == TYPE_IMOV
11230 || dep_insn_type == TYPE_FMOV)
11231 cost = 1;
11232 else if (cost > 1)
11233 cost--;
11234 }
11235 break;
11236
11237 case PROCESSOR_K6:
11238 memory = get_attr_memory (insn);
11239 dep_memory = get_attr_memory (dep_insn);
11240 /* The esp dependency is resolved before the instruction is really
11241 finished. */
11242 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
11243 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
11244 return 1;
11245
11246 /* Since we can't represent delayed latencies of load+operation,
11247 increase the cost here for non-imov insns. */
11248 if (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH)
11249 cost += (dep_insn_type != TYPE_IMOV) ? 2 : 1;
11250
11251 /* INT->FP conversion is expensive. */
11252 if (get_attr_fp_int_src (dep_insn))
11253 cost += 5;
11254
11255 /* Show ability of reorder buffer to hide latency of load by executing
11256 in parallel with previous instruction in case
11257 previous instruction is not needed to compute the address. */
11258 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
11259 && !ix86_agi_dependant (insn, dep_insn, insn_type))
11260 {
11261 /* Claim moves to take one cycle, as core can issue one load
11262 at time and the next load can start cycle later. */
11263 if (dep_insn_type == TYPE_IMOV
11264 || dep_insn_type == TYPE_FMOV)
11265 cost = 1;
11266 else if (cost > 2)
11267 cost -= 2;
11268 else
11269 cost = 1;
11270 }
11271 break;
11272
11273 case PROCESSOR_ATHLON:
11274 memory = get_attr_memory (insn);
11275 dep_memory = get_attr_memory (dep_insn);
11276
11277 if (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH)
11278 {
11279 if (dep_insn_type == TYPE_IMOV || dep_insn_type == TYPE_FMOV)
11280 cost += 2;
11281 else
11282 cost += 3;
11283 }
11284 /* Show ability of reorder buffer to hide latency of load by executing
11285 in parallel with previous instruction in case
11286 previous instruction is not needed to compute the address. */
11287 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
11288 && !ix86_agi_dependant (insn, dep_insn, insn_type))
11289 {
11290 /* Claim moves to take one cycle, as core can issue one load
11291 at time and the next load can start cycle later. */
11292 if (dep_insn_type == TYPE_IMOV
11293 || dep_insn_type == TYPE_FMOV)
11294 cost = 0;
11295 else if (cost >= 3)
11296 cost -= 3;
11297 else
11298 cost = 0;
11299 }
11300
11301 default:
11302 break;
11303 }
11304
11305 return cost;
11306 }
11307
11308 static union
11309 {
11310 struct ppro_sched_data
11311 {
11312 rtx decode[3];
11313 int issued_this_cycle;
11314 } ppro;
11315 } ix86_sched_data;
11316
11317 static enum attr_ppro_uops
11318 ix86_safe_ppro_uops (insn)
11319 rtx insn;
11320 {
11321 if (recog_memoized (insn) >= 0)
11322 return get_attr_ppro_uops (insn);
11323 else
11324 return PPRO_UOPS_MANY;
11325 }
11326
11327 static void
11328 ix86_dump_ppro_packet (dump)
11329 FILE *dump;
11330 {
11331 if (ix86_sched_data.ppro.decode[0])
11332 {
11333 fprintf (dump, "PPRO packet: %d",
11334 INSN_UID (ix86_sched_data.ppro.decode[0]));
11335 if (ix86_sched_data.ppro.decode[1])
11336 fprintf (dump, " %d", INSN_UID (ix86_sched_data.ppro.decode[1]));
11337 if (ix86_sched_data.ppro.decode[2])
11338 fprintf (dump, " %d", INSN_UID (ix86_sched_data.ppro.decode[2]));
11339 fputc ('\n', dump);
11340 }
11341 }
11342
11343 /* We're beginning a new block. Initialize data structures as necessary. */
11344
11345 static void
11346 ix86_sched_init (dump, sched_verbose, veclen)
11347 FILE *dump ATTRIBUTE_UNUSED;
11348 int sched_verbose ATTRIBUTE_UNUSED;
11349 int veclen ATTRIBUTE_UNUSED;
11350 {
11351 memset (&ix86_sched_data, 0, sizeof (ix86_sched_data));
11352 }
11353
11354 /* Shift INSN to SLOT, and shift everything else down. */
11355
11356 static void
11357 ix86_reorder_insn (insnp, slot)
11358 rtx *insnp, *slot;
11359 {
11360 if (insnp != slot)
11361 {
11362 rtx insn = *insnp;
11363 do
11364 insnp[0] = insnp[1];
11365 while (++insnp != slot);
11366 *insnp = insn;
11367 }
11368 }
11369
11370 static void
11371 ix86_sched_reorder_ppro (ready, e_ready)
11372 rtx *ready;
11373 rtx *e_ready;
11374 {
11375 rtx decode[3];
11376 enum attr_ppro_uops cur_uops;
11377 int issued_this_cycle;
11378 rtx *insnp;
11379 int i;
11380
11381 /* At this point .ppro.decode contains the state of the three
11382 decoders from last "cycle". That is, those insns that were
11383 actually independent. But here we're scheduling for the
11384 decoder, and we may find things that are decodable in the
11385 same cycle. */
11386
11387 memcpy (decode, ix86_sched_data.ppro.decode, sizeof (decode));
11388 issued_this_cycle = 0;
11389
11390 insnp = e_ready;
11391 cur_uops = ix86_safe_ppro_uops (*insnp);
11392
11393 /* If the decoders are empty, and we've a complex insn at the
11394 head of the priority queue, let it issue without complaint. */
11395 if (decode[0] == NULL)
11396 {
11397 if (cur_uops == PPRO_UOPS_MANY)
11398 {
11399 decode[0] = *insnp;
11400 goto ppro_done;
11401 }
11402
11403 /* Otherwise, search for a 2-4 uop unsn to issue. */
11404 while (cur_uops != PPRO_UOPS_FEW)
11405 {
11406 if (insnp == ready)
11407 break;
11408 cur_uops = ix86_safe_ppro_uops (*--insnp);
11409 }
11410
11411 /* If so, move it to the head of the line. */
11412 if (cur_uops == PPRO_UOPS_FEW)
11413 ix86_reorder_insn (insnp, e_ready);
11414
11415 /* Issue the head of the queue. */
11416 issued_this_cycle = 1;
11417 decode[0] = *e_ready--;
11418 }
11419
11420 /* Look for simple insns to fill in the other two slots. */
11421 for (i = 1; i < 3; ++i)
11422 if (decode[i] == NULL)
11423 {
11424 if (ready > e_ready)
11425 goto ppro_done;
11426
11427 insnp = e_ready;
11428 cur_uops = ix86_safe_ppro_uops (*insnp);
11429 while (cur_uops != PPRO_UOPS_ONE)
11430 {
11431 if (insnp == ready)
11432 break;
11433 cur_uops = ix86_safe_ppro_uops (*--insnp);
11434 }
11435
11436 /* Found one. Move it to the head of the queue and issue it. */
11437 if (cur_uops == PPRO_UOPS_ONE)
11438 {
11439 ix86_reorder_insn (insnp, e_ready);
11440 decode[i] = *e_ready--;
11441 issued_this_cycle++;
11442 continue;
11443 }
11444
11445 /* ??? Didn't find one. Ideally, here we would do a lazy split
11446 of 2-uop insns, issue one and queue the other. */
11447 }
11448
11449 ppro_done:
11450 if (issued_this_cycle == 0)
11451 issued_this_cycle = 1;
11452 ix86_sched_data.ppro.issued_this_cycle = issued_this_cycle;
11453 }
11454
11455 /* We are about to being issuing insns for this clock cycle.
11456 Override the default sort algorithm to better slot instructions. */
11457 static int
11458 ix86_sched_reorder (dump, sched_verbose, ready, n_readyp, clock_var)
11459 FILE *dump ATTRIBUTE_UNUSED;
11460 int sched_verbose ATTRIBUTE_UNUSED;
11461 rtx *ready;
11462 int *n_readyp;
11463 int clock_var ATTRIBUTE_UNUSED;
11464 {
11465 int n_ready = *n_readyp;
11466 rtx *e_ready = ready + n_ready - 1;
11467
11468 /* Make sure to go ahead and initialize key items in
11469 ix86_sched_data if we are not going to bother trying to
11470 reorder the ready queue. */
11471 if (n_ready < 2)
11472 {
11473 ix86_sched_data.ppro.issued_this_cycle = 1;
11474 goto out;
11475 }
11476
11477 switch (ix86_cpu)
11478 {
11479 default:
11480 break;
11481
11482 case PROCESSOR_PENTIUMPRO:
11483 ix86_sched_reorder_ppro (ready, e_ready);
11484 break;
11485 }
11486
11487 out:
11488 return ix86_issue_rate ();
11489 }
11490
11491 /* We are about to issue INSN. Return the number of insns left on the
11492 ready queue that can be issued this cycle. */
11493
11494 static int
11495 ix86_variable_issue (dump, sched_verbose, insn, can_issue_more)
11496 FILE *dump;
11497 int sched_verbose;
11498 rtx insn;
11499 int can_issue_more;
11500 {
11501 int i;
11502 switch (ix86_cpu)
11503 {
11504 default:
11505 return can_issue_more - 1;
11506
11507 case PROCESSOR_PENTIUMPRO:
11508 {
11509 enum attr_ppro_uops uops = ix86_safe_ppro_uops (insn);
11510
11511 if (uops == PPRO_UOPS_MANY)
11512 {
11513 if (sched_verbose)
11514 ix86_dump_ppro_packet (dump);
11515 ix86_sched_data.ppro.decode[0] = insn;
11516 ix86_sched_data.ppro.decode[1] = NULL;
11517 ix86_sched_data.ppro.decode[2] = NULL;
11518 if (sched_verbose)
11519 ix86_dump_ppro_packet (dump);
11520 ix86_sched_data.ppro.decode[0] = NULL;
11521 }
11522 else if (uops == PPRO_UOPS_FEW)
11523 {
11524 if (sched_verbose)
11525 ix86_dump_ppro_packet (dump);
11526 ix86_sched_data.ppro.decode[0] = insn;
11527 ix86_sched_data.ppro.decode[1] = NULL;
11528 ix86_sched_data.ppro.decode[2] = NULL;
11529 }
11530 else
11531 {
11532 for (i = 0; i < 3; ++i)
11533 if (ix86_sched_data.ppro.decode[i] == NULL)
11534 {
11535 ix86_sched_data.ppro.decode[i] = insn;
11536 break;
11537 }
11538 if (i == 3)
11539 abort ();
11540 if (i == 2)
11541 {
11542 if (sched_verbose)
11543 ix86_dump_ppro_packet (dump);
11544 ix86_sched_data.ppro.decode[0] = NULL;
11545 ix86_sched_data.ppro.decode[1] = NULL;
11546 ix86_sched_data.ppro.decode[2] = NULL;
11547 }
11548 }
11549 }
11550 return --ix86_sched_data.ppro.issued_this_cycle;
11551 }
11552 }
11553
11554 static int
11555 ia32_use_dfa_pipeline_interface ()
11556 {
11557 if (ix86_cpu == PROCESSOR_PENTIUM)
11558 return 1;
11559 return 0;
11560 }
11561
11562 /* How many alternative schedules to try. This should be as wide as the
11563 scheduling freedom in the DFA, but no wider. Making this value too
11564 large results extra work for the scheduler. */
11565
11566 static int
11567 ia32_multipass_dfa_lookahead ()
11568 {
11569 if (ix86_cpu == PROCESSOR_PENTIUM)
11570 return 2;
11571 else
11572 return 0;
11573 }
11574
11575 \f
11576 /* Walk through INSNS and look for MEM references whose address is DSTREG or
11577 SRCREG and set the memory attribute to those of DSTREF and SRCREF, as
11578 appropriate. */
11579
11580 void
11581 ix86_set_move_mem_attrs (insns, dstref, srcref, dstreg, srcreg)
11582 rtx insns;
11583 rtx dstref, srcref, dstreg, srcreg;
11584 {
11585 rtx insn;
11586
11587 for (insn = insns; insn != 0 ; insn = NEXT_INSN (insn))
11588 if (INSN_P (insn))
11589 ix86_set_move_mem_attrs_1 (PATTERN (insn), dstref, srcref,
11590 dstreg, srcreg);
11591 }
11592
11593 /* Subroutine of above to actually do the updating by recursively walking
11594 the rtx. */
11595
11596 static void
11597 ix86_set_move_mem_attrs_1 (x, dstref, srcref, dstreg, srcreg)
11598 rtx x;
11599 rtx dstref, srcref, dstreg, srcreg;
11600 {
11601 enum rtx_code code = GET_CODE (x);
11602 const char *format_ptr = GET_RTX_FORMAT (code);
11603 int i, j;
11604
11605 if (code == MEM && XEXP (x, 0) == dstreg)
11606 MEM_COPY_ATTRIBUTES (x, dstref);
11607 else if (code == MEM && XEXP (x, 0) == srcreg)
11608 MEM_COPY_ATTRIBUTES (x, srcref);
11609
11610 for (i = 0; i < GET_RTX_LENGTH (code); i++, format_ptr++)
11611 {
11612 if (*format_ptr == 'e')
11613 ix86_set_move_mem_attrs_1 (XEXP (x, i), dstref, srcref,
11614 dstreg, srcreg);
11615 else if (*format_ptr == 'E')
11616 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
11617 ix86_set_move_mem_attrs_1 (XVECEXP (x, i, j), dstref, srcref,
11618 dstreg, srcreg);
11619 }
11620 }
11621 \f
11622 /* Compute the alignment given to a constant that is being placed in memory.
11623 EXP is the constant and ALIGN is the alignment that the object would
11624 ordinarily have.
11625 The value of this function is used instead of that alignment to align
11626 the object. */
11627
11628 int
11629 ix86_constant_alignment (exp, align)
11630 tree exp;
11631 int align;
11632 {
11633 if (TREE_CODE (exp) == REAL_CST)
11634 {
11635 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
11636 return 64;
11637 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
11638 return 128;
11639 }
11640 else if (TREE_CODE (exp) == STRING_CST && TREE_STRING_LENGTH (exp) >= 31
11641 && align < 256)
11642 return 256;
11643
11644 return align;
11645 }
11646
11647 /* Compute the alignment for a static variable.
11648 TYPE is the data type, and ALIGN is the alignment that
11649 the object would ordinarily have. The value of this function is used
11650 instead of that alignment to align the object. */
11651
11652 int
11653 ix86_data_alignment (type, align)
11654 tree type;
11655 int align;
11656 {
11657 if (AGGREGATE_TYPE_P (type)
11658 && TYPE_SIZE (type)
11659 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
11660 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 256
11661 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 256)
11662 return 256;
11663
11664 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
11665 to 16byte boundary. */
11666 if (TARGET_64BIT)
11667 {
11668 if (AGGREGATE_TYPE_P (type)
11669 && TYPE_SIZE (type)
11670 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
11671 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
11672 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
11673 return 128;
11674 }
11675
11676 if (TREE_CODE (type) == ARRAY_TYPE)
11677 {
11678 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
11679 return 64;
11680 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
11681 return 128;
11682 }
11683 else if (TREE_CODE (type) == COMPLEX_TYPE)
11684 {
11685
11686 if (TYPE_MODE (type) == DCmode && align < 64)
11687 return 64;
11688 if (TYPE_MODE (type) == XCmode && align < 128)
11689 return 128;
11690 }
11691 else if ((TREE_CODE (type) == RECORD_TYPE
11692 || TREE_CODE (type) == UNION_TYPE
11693 || TREE_CODE (type) == QUAL_UNION_TYPE)
11694 && TYPE_FIELDS (type))
11695 {
11696 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
11697 return 64;
11698 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
11699 return 128;
11700 }
11701 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
11702 || TREE_CODE (type) == INTEGER_TYPE)
11703 {
11704 if (TYPE_MODE (type) == DFmode && align < 64)
11705 return 64;
11706 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
11707 return 128;
11708 }
11709
11710 return align;
11711 }
11712
11713 /* Compute the alignment for a local variable.
11714 TYPE is the data type, and ALIGN is the alignment that
11715 the object would ordinarily have. The value of this macro is used
11716 instead of that alignment to align the object. */
11717
11718 int
11719 ix86_local_alignment (type, align)
11720 tree type;
11721 int align;
11722 {
11723 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
11724 to 16byte boundary. */
11725 if (TARGET_64BIT)
11726 {
11727 if (AGGREGATE_TYPE_P (type)
11728 && TYPE_SIZE (type)
11729 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
11730 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
11731 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
11732 return 128;
11733 }
11734 if (TREE_CODE (type) == ARRAY_TYPE)
11735 {
11736 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
11737 return 64;
11738 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
11739 return 128;
11740 }
11741 else if (TREE_CODE (type) == COMPLEX_TYPE)
11742 {
11743 if (TYPE_MODE (type) == DCmode && align < 64)
11744 return 64;
11745 if (TYPE_MODE (type) == XCmode && align < 128)
11746 return 128;
11747 }
11748 else if ((TREE_CODE (type) == RECORD_TYPE
11749 || TREE_CODE (type) == UNION_TYPE
11750 || TREE_CODE (type) == QUAL_UNION_TYPE)
11751 && TYPE_FIELDS (type))
11752 {
11753 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
11754 return 64;
11755 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
11756 return 128;
11757 }
11758 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
11759 || TREE_CODE (type) == INTEGER_TYPE)
11760 {
11761
11762 if (TYPE_MODE (type) == DFmode && align < 64)
11763 return 64;
11764 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
11765 return 128;
11766 }
11767 return align;
11768 }
11769 \f
11770 /* Emit RTL insns to initialize the variable parts of a trampoline.
11771 FNADDR is an RTX for the address of the function's pure code.
11772 CXT is an RTX for the static chain value for the function. */
11773 void
11774 x86_initialize_trampoline (tramp, fnaddr, cxt)
11775 rtx tramp, fnaddr, cxt;
11776 {
11777 if (!TARGET_64BIT)
11778 {
11779 /* Compute offset from the end of the jmp to the target function. */
11780 rtx disp = expand_binop (SImode, sub_optab, fnaddr,
11781 plus_constant (tramp, 10),
11782 NULL_RTX, 1, OPTAB_DIRECT);
11783 emit_move_insn (gen_rtx_MEM (QImode, tramp),
11784 gen_int_mode (0xb9, QImode));
11785 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 1)), cxt);
11786 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, 5)),
11787 gen_int_mode (0xe9, QImode));
11788 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 6)), disp);
11789 }
11790 else
11791 {
11792 int offset = 0;
11793 /* Try to load address using shorter movl instead of movabs.
11794 We may want to support movq for kernel mode, but kernel does not use
11795 trampolines at the moment. */
11796 if (x86_64_zero_extended_value (fnaddr))
11797 {
11798 fnaddr = copy_to_mode_reg (DImode, fnaddr);
11799 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
11800 gen_int_mode (0xbb41, HImode));
11801 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, offset + 2)),
11802 gen_lowpart (SImode, fnaddr));
11803 offset += 6;
11804 }
11805 else
11806 {
11807 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
11808 gen_int_mode (0xbb49, HImode));
11809 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
11810 fnaddr);
11811 offset += 10;
11812 }
11813 /* Load static chain using movabs to r10. */
11814 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
11815 gen_int_mode (0xba49, HImode));
11816 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
11817 cxt);
11818 offset += 10;
11819 /* Jump to the r11 */
11820 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
11821 gen_int_mode (0xff49, HImode));
11822 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, offset+2)),
11823 gen_int_mode (0xe3, QImode));
11824 offset += 3;
11825 if (offset > TRAMPOLINE_SIZE)
11826 abort ();
11827 }
11828
11829 #ifdef TRANSFER_FROM_TRAMPOLINE
11830 emit_library_call (gen_rtx (SYMBOL_REF, Pmode, "__enable_execute_stack"),
11831 LCT_NORMAL, VOIDmode, 1, tramp, Pmode);
11832 #endif
11833 }
11834 \f
11835 #define def_builtin(MASK, NAME, TYPE, CODE) \
11836 do { \
11837 if ((MASK) & target_flags) \
11838 builtin_function ((NAME), (TYPE), (CODE), BUILT_IN_MD, \
11839 NULL, NULL_TREE); \
11840 } while (0)
11841
11842 struct builtin_description
11843 {
11844 const unsigned int mask;
11845 const enum insn_code icode;
11846 const char *const name;
11847 const enum ix86_builtins code;
11848 const enum rtx_code comparison;
11849 const unsigned int flag;
11850 };
11851
11852 /* Used for builtins that are enabled both by -msse and -msse2. */
11853 #define MASK_SSE1 (MASK_SSE | MASK_SSE2)
11854
11855 static const struct builtin_description bdesc_comi[] =
11856 {
11857 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
11858 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
11859 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
11860 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
11861 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
11862 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
11863 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
11864 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
11865 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
11866 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
11867 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
11868 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
11869 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
11870 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
11871 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
11872 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
11873 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
11874 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
11875 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
11876 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
11877 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
11878 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
11879 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
11880 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
11881 };
11882
11883 static const struct builtin_description bdesc_2arg[] =
11884 {
11885 /* SSE */
11886 { MASK_SSE1, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, 0, 0 },
11887 { MASK_SSE1, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, 0, 0 },
11888 { MASK_SSE1, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, 0, 0 },
11889 { MASK_SSE1, CODE_FOR_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, 0, 0 },
11890 { MASK_SSE1, CODE_FOR_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, 0, 0 },
11891 { MASK_SSE1, CODE_FOR_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, 0, 0 },
11892 { MASK_SSE1, CODE_FOR_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, 0, 0 },
11893 { MASK_SSE1, CODE_FOR_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, 0, 0 },
11894
11895 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, 0 },
11896 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, 0 },
11897 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, 0 },
11898 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, 1 },
11899 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, 1 },
11900 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, 0 },
11901 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, EQ, 0 },
11902 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, LT, 0 },
11903 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, LE, 0 },
11904 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, LT, 1 },
11905 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, LE, 1 },
11906 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, UNORDERED, 0 },
11907 { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, 0 },
11908 { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, 0 },
11909 { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, 0 },
11910 { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, 0 },
11911 { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, EQ, 0 },
11912 { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, LT, 0 },
11913 { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, LE, 0 },
11914 { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, UNORDERED, 0 },
11915
11916 { MASK_SSE1, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, 0, 0 },
11917 { MASK_SSE1, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, 0, 0 },
11918 { MASK_SSE1, CODE_FOR_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, 0, 0 },
11919 { MASK_SSE1, CODE_FOR_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, 0, 0 },
11920
11921 { MASK_SSE1, CODE_FOR_sse_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, 0, 0 },
11922 { MASK_SSE1, CODE_FOR_sse_nandv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, 0, 0 },
11923 { MASK_SSE1, CODE_FOR_sse_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, 0, 0 },
11924 { MASK_SSE1, CODE_FOR_sse_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, 0, 0 },
11925
11926 { MASK_SSE1, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, 0, 0 },
11927 { MASK_SSE1, CODE_FOR_sse_movhlps, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, 0, 0 },
11928 { MASK_SSE1, CODE_FOR_sse_movlhps, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, 0, 0 },
11929 { MASK_SSE1, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, 0, 0 },
11930 { MASK_SSE1, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, 0, 0 },
11931
11932 /* MMX */
11933 { MASK_MMX, CODE_FOR_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, 0, 0 },
11934 { MASK_MMX, CODE_FOR_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, 0, 0 },
11935 { MASK_MMX, CODE_FOR_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, 0, 0 },
11936 { MASK_MMX, CODE_FOR_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, 0, 0 },
11937 { MASK_MMX, CODE_FOR_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, 0, 0 },
11938 { MASK_MMX, CODE_FOR_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, 0, 0 },
11939
11940 { MASK_MMX, CODE_FOR_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, 0, 0 },
11941 { MASK_MMX, CODE_FOR_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, 0, 0 },
11942 { MASK_MMX, CODE_FOR_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, 0, 0 },
11943 { MASK_MMX, CODE_FOR_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, 0, 0 },
11944 { MASK_MMX, CODE_FOR_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, 0, 0 },
11945 { MASK_MMX, CODE_FOR_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, 0, 0 },
11946 { MASK_MMX, CODE_FOR_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, 0, 0 },
11947 { MASK_MMX, CODE_FOR_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, 0, 0 },
11948
11949 { MASK_MMX, CODE_FOR_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, 0, 0 },
11950 { MASK_MMX, CODE_FOR_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, 0, 0 },
11951 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, 0, 0 },
11952
11953 { MASK_MMX, CODE_FOR_mmx_anddi3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, 0, 0 },
11954 { MASK_MMX, CODE_FOR_mmx_nanddi3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, 0, 0 },
11955 { MASK_MMX, CODE_FOR_mmx_iordi3, "__builtin_ia32_por", IX86_BUILTIN_POR, 0, 0 },
11956 { MASK_MMX, CODE_FOR_mmx_xordi3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, 0, 0 },
11957
11958 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, 0, 0 },
11959 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, 0, 0 },
11960
11961 { MASK_MMX, CODE_FOR_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, 0, 0 },
11962 { MASK_MMX, CODE_FOR_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, 0, 0 },
11963 { MASK_MMX, CODE_FOR_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, 0, 0 },
11964 { MASK_MMX, CODE_FOR_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, 0, 0 },
11965 { MASK_MMX, CODE_FOR_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, 0, 0 },
11966 { MASK_MMX, CODE_FOR_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, 0, 0 },
11967
11968 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, 0, 0 },
11969 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, 0, 0 },
11970 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, 0, 0 },
11971 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, 0, 0 },
11972
11973 { MASK_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, 0, 0 },
11974 { MASK_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, 0, 0 },
11975 { MASK_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, 0, 0 },
11976 { MASK_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, 0, 0 },
11977 { MASK_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, 0, 0 },
11978 { MASK_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, 0, 0 },
11979
11980 /* Special. */
11981 { MASK_MMX, CODE_FOR_mmx_packsswb, 0, IX86_BUILTIN_PACKSSWB, 0, 0 },
11982 { MASK_MMX, CODE_FOR_mmx_packssdw, 0, IX86_BUILTIN_PACKSSDW, 0, 0 },
11983 { MASK_MMX, CODE_FOR_mmx_packuswb, 0, IX86_BUILTIN_PACKUSWB, 0, 0 },
11984
11985 { MASK_SSE1, CODE_FOR_cvtpi2ps, 0, IX86_BUILTIN_CVTPI2PS, 0, 0 },
11986 { MASK_SSE1, CODE_FOR_cvtsi2ss, 0, IX86_BUILTIN_CVTSI2SS, 0, 0 },
11987
11988 { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLW, 0, 0 },
11989 { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLWI, 0, 0 },
11990 { MASK_MMX, CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLD, 0, 0 },
11991 { MASK_MMX, CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLDI, 0, 0 },
11992 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQ, 0, 0 },
11993 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQI, 0, 0 },
11994
11995 { MASK_MMX, CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLW, 0, 0 },
11996 { MASK_MMX, CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLWI, 0, 0 },
11997 { MASK_MMX, CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLD, 0, 0 },
11998 { MASK_MMX, CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLDI, 0, 0 },
11999 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQ, 0, 0 },
12000 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQI, 0, 0 },
12001
12002 { MASK_MMX, CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAW, 0, 0 },
12003 { MASK_MMX, CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAWI, 0, 0 },
12004 { MASK_MMX, CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRAD, 0, 0 },
12005 { MASK_MMX, CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRADI, 0, 0 },
12006
12007 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_mmx_psadbw, 0, IX86_BUILTIN_PSADBW, 0, 0 },
12008 { MASK_MMX, CODE_FOR_mmx_pmaddwd, 0, IX86_BUILTIN_PMADDWD, 0, 0 },
12009
12010 /* SSE2 */
12011 { MASK_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, 0, 0 },
12012 { MASK_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, 0, 0 },
12013 { MASK_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, 0, 0 },
12014 { MASK_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, 0, 0 },
12015 { MASK_SSE2, CODE_FOR_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, 0, 0 },
12016 { MASK_SSE2, CODE_FOR_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, 0, 0 },
12017 { MASK_SSE2, CODE_FOR_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, 0, 0 },
12018 { MASK_SSE2, CODE_FOR_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, 0, 0 },
12019
12020 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, 0 },
12021 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, 0 },
12022 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, 0 },
12023 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, 1 },
12024 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, 1 },
12025 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, 0 },
12026 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, EQ, 0 },
12027 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, LT, 0 },
12028 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, LE, 0 },
12029 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, LT, 1 },
12030 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, LE, 1 },
12031 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, UNORDERED, 0 },
12032 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, 0 },
12033 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, 0 },
12034 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, 0 },
12035 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, 0 },
12036 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, EQ, 0 },
12037 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, LT, 0 },
12038 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, LE, 0 },
12039 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, UNORDERED, 0 },
12040
12041 { MASK_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, 0, 0 },
12042 { MASK_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, 0, 0 },
12043 { MASK_SSE2, CODE_FOR_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, 0, 0 },
12044 { MASK_SSE2, CODE_FOR_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, 0, 0 },
12045
12046 { MASK_SSE2, CODE_FOR_sse2_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, 0, 0 },
12047 { MASK_SSE2, CODE_FOR_sse2_nandv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, 0, 0 },
12048 { MASK_SSE2, CODE_FOR_sse2_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, 0, 0 },
12049 { MASK_SSE2, CODE_FOR_sse2_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, 0, 0 },
12050
12051 { MASK_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, 0, 0 },
12052 { MASK_SSE2, CODE_FOR_sse2_unpckhpd, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, 0, 0 },
12053 { MASK_SSE2, CODE_FOR_sse2_unpcklpd, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, 0, 0 },
12054
12055 /* SSE2 MMX */
12056 { MASK_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, 0, 0 },
12057 { MASK_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, 0, 0 },
12058 { MASK_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, 0, 0 },
12059 { MASK_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, 0, 0 },
12060 { MASK_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, 0, 0 },
12061 { MASK_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, 0, 0 },
12062 { MASK_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, 0, 0 },
12063 { MASK_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, 0, 0 },
12064
12065 { MASK_MMX, CODE_FOR_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, 0, 0 },
12066 { MASK_MMX, CODE_FOR_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, 0, 0 },
12067 { MASK_MMX, CODE_FOR_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, 0, 0 },
12068 { MASK_MMX, CODE_FOR_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, 0, 0 },
12069 { MASK_MMX, CODE_FOR_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, 0, 0 },
12070 { MASK_MMX, CODE_FOR_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, 0, 0 },
12071 { MASK_MMX, CODE_FOR_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, 0, 0 },
12072 { MASK_MMX, CODE_FOR_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, 0, 0 },
12073
12074 { MASK_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, 0, 0 },
12075 { MASK_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, 0, 0 },
12076 { MASK_SSE2, CODE_FOR_sse2_umulsidi3, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ, 0, 0 },
12077 { MASK_SSE2, CODE_FOR_sse2_umulv2siv2di3, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128, 0, 0 },
12078
12079 { MASK_SSE2, CODE_FOR_sse2_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, 0, 0 },
12080 { MASK_SSE2, CODE_FOR_sse2_nandv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, 0, 0 },
12081 { MASK_SSE2, CODE_FOR_sse2_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, 0, 0 },
12082 { MASK_SSE2, CODE_FOR_sse2_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, 0, 0 },
12083
12084 { MASK_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, 0, 0 },
12085 { MASK_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, 0, 0 },
12086
12087 { MASK_SSE2, CODE_FOR_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, 0, 0 },
12088 { MASK_SSE2, CODE_FOR_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, 0, 0 },
12089 { MASK_SSE2, CODE_FOR_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, 0, 0 },
12090 { MASK_SSE2, CODE_FOR_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, 0, 0 },
12091 { MASK_SSE2, CODE_FOR_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, 0, 0 },
12092 { MASK_SSE2, CODE_FOR_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, 0, 0 },
12093
12094 { MASK_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, 0, 0 },
12095 { MASK_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, 0, 0 },
12096 { MASK_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, 0, 0 },
12097 { MASK_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, 0, 0 },
12098
12099 { MASK_SSE2, CODE_FOR_sse2_punpckhbw, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, 0, 0 },
12100 { MASK_SSE2, CODE_FOR_sse2_punpckhwd, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, 0, 0 },
12101 { MASK_SSE2, CODE_FOR_sse2_punpckhdq, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, 0, 0 },
12102 { MASK_SSE2, CODE_FOR_sse2_punpckhqdq, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, 0, 0 },
12103 { MASK_SSE2, CODE_FOR_sse2_punpcklbw, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, 0, 0 },
12104 { MASK_SSE2, CODE_FOR_sse2_punpcklwd, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, 0, 0 },
12105 { MASK_SSE2, CODE_FOR_sse2_punpckldq, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, 0, 0 },
12106 { MASK_SSE2, CODE_FOR_sse2_punpcklqdq, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, 0, 0 },
12107
12108 { MASK_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, 0, 0 },
12109 { MASK_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, 0, 0 },
12110 { MASK_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, 0, 0 },
12111
12112 { MASK_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, 0, 0 },
12113 { MASK_SSE2, CODE_FOR_sse2_psadbw, 0, IX86_BUILTIN_PSADBW128, 0, 0 },
12114
12115 { MASK_SSE2, CODE_FOR_ashlv8hi3_ti, 0, IX86_BUILTIN_PSLLW128, 0, 0 },
12116 { MASK_SSE2, CODE_FOR_ashlv8hi3, 0, IX86_BUILTIN_PSLLWI128, 0, 0 },
12117 { MASK_SSE2, CODE_FOR_ashlv4si3_ti, 0, IX86_BUILTIN_PSLLD128, 0, 0 },
12118 { MASK_SSE2, CODE_FOR_ashlv4si3, 0, IX86_BUILTIN_PSLLDI128, 0, 0 },
12119 { MASK_SSE2, CODE_FOR_ashlv2di3_ti, 0, IX86_BUILTIN_PSLLQ128, 0, 0 },
12120 { MASK_SSE2, CODE_FOR_ashlv2di3, 0, IX86_BUILTIN_PSLLQI128, 0, 0 },
12121
12122 { MASK_SSE2, CODE_FOR_lshrv8hi3_ti, 0, IX86_BUILTIN_PSRLW128, 0, 0 },
12123 { MASK_SSE2, CODE_FOR_lshrv8hi3, 0, IX86_BUILTIN_PSRLWI128, 0, 0 },
12124 { MASK_SSE2, CODE_FOR_lshrv4si3_ti, 0, IX86_BUILTIN_PSRLD128, 0, 0 },
12125 { MASK_SSE2, CODE_FOR_lshrv4si3, 0, IX86_BUILTIN_PSRLDI128, 0, 0 },
12126 { MASK_SSE2, CODE_FOR_lshrv2di3_ti, 0, IX86_BUILTIN_PSRLQ128, 0, 0 },
12127 { MASK_SSE2, CODE_FOR_lshrv2di3, 0, IX86_BUILTIN_PSRLQI128, 0, 0 },
12128
12129 { MASK_SSE2, CODE_FOR_ashrv8hi3_ti, 0, IX86_BUILTIN_PSRAW128, 0, 0 },
12130 { MASK_SSE2, CODE_FOR_ashrv8hi3, 0, IX86_BUILTIN_PSRAWI128, 0, 0 },
12131 { MASK_SSE2, CODE_FOR_ashrv4si3_ti, 0, IX86_BUILTIN_PSRAD128, 0, 0 },
12132 { MASK_SSE2, CODE_FOR_ashrv4si3, 0, IX86_BUILTIN_PSRADI128, 0, 0 },
12133
12134 { MASK_SSE2, CODE_FOR_sse2_pmaddwd, 0, IX86_BUILTIN_PMADDWD128, 0, 0 },
12135
12136 { MASK_SSE2, CODE_FOR_cvtsi2sd, 0, IX86_BUILTIN_CVTSI2SD, 0, 0 },
12137 { MASK_SSE2, CODE_FOR_cvtsd2ss, 0, IX86_BUILTIN_CVTSD2SS, 0, 0 },
12138 { MASK_SSE2, CODE_FOR_cvtss2sd, 0, IX86_BUILTIN_CVTSS2SD, 0, 0 }
12139 };
12140
12141 static const struct builtin_description bdesc_1arg[] =
12142 {
12143 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_mmx_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB, 0, 0 },
12144 { MASK_SSE1, CODE_FOR_sse_movmskps, 0, IX86_BUILTIN_MOVMSKPS, 0, 0 },
12145
12146 { MASK_SSE1, CODE_FOR_sqrtv4sf2, 0, IX86_BUILTIN_SQRTPS, 0, 0 },
12147 { MASK_SSE1, CODE_FOR_rsqrtv4sf2, 0, IX86_BUILTIN_RSQRTPS, 0, 0 },
12148 { MASK_SSE1, CODE_FOR_rcpv4sf2, 0, IX86_BUILTIN_RCPPS, 0, 0 },
12149
12150 { MASK_SSE1, CODE_FOR_cvtps2pi, 0, IX86_BUILTIN_CVTPS2PI, 0, 0 },
12151 { MASK_SSE1, CODE_FOR_cvtss2si, 0, IX86_BUILTIN_CVTSS2SI, 0, 0 },
12152 { MASK_SSE1, CODE_FOR_cvttps2pi, 0, IX86_BUILTIN_CVTTPS2PI, 0, 0 },
12153 { MASK_SSE1, CODE_FOR_cvttss2si, 0, IX86_BUILTIN_CVTTSS2SI, 0, 0 },
12154
12155 { MASK_SSE2, CODE_FOR_sse2_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB128, 0, 0 },
12156 { MASK_SSE2, CODE_FOR_sse2_movmskpd, 0, IX86_BUILTIN_MOVMSKPD, 0, 0 },
12157 { MASK_SSE2, CODE_FOR_sse2_movq2dq, 0, IX86_BUILTIN_MOVQ2DQ, 0, 0 },
12158 { MASK_SSE2, CODE_FOR_sse2_movdq2q, 0, IX86_BUILTIN_MOVDQ2Q, 0, 0 },
12159
12160 { MASK_SSE2, CODE_FOR_sqrtv2df2, 0, IX86_BUILTIN_SQRTPD, 0, 0 },
12161
12162 { MASK_SSE2, CODE_FOR_cvtdq2pd, 0, IX86_BUILTIN_CVTDQ2PD, 0, 0 },
12163 { MASK_SSE2, CODE_FOR_cvtdq2ps, 0, IX86_BUILTIN_CVTDQ2PS, 0, 0 },
12164
12165 { MASK_SSE2, CODE_FOR_cvtpd2dq, 0, IX86_BUILTIN_CVTPD2DQ, 0, 0 },
12166 { MASK_SSE2, CODE_FOR_cvtpd2pi, 0, IX86_BUILTIN_CVTPD2PI, 0, 0 },
12167 { MASK_SSE2, CODE_FOR_cvtpd2ps, 0, IX86_BUILTIN_CVTPD2PS, 0, 0 },
12168 { MASK_SSE2, CODE_FOR_cvttpd2dq, 0, IX86_BUILTIN_CVTTPD2DQ, 0, 0 },
12169 { MASK_SSE2, CODE_FOR_cvttpd2pi, 0, IX86_BUILTIN_CVTTPD2PI, 0, 0 },
12170
12171 { MASK_SSE2, CODE_FOR_cvtpi2pd, 0, IX86_BUILTIN_CVTPI2PD, 0, 0 },
12172
12173 { MASK_SSE2, CODE_FOR_cvtsd2si, 0, IX86_BUILTIN_CVTSD2SI, 0, 0 },
12174 { MASK_SSE2, CODE_FOR_cvttsd2si, 0, IX86_BUILTIN_CVTTSD2SI, 0, 0 },
12175
12176 { MASK_SSE2, CODE_FOR_cvtps2dq, 0, IX86_BUILTIN_CVTPS2DQ, 0, 0 },
12177 { MASK_SSE2, CODE_FOR_cvtps2pd, 0, IX86_BUILTIN_CVTPS2PD, 0, 0 },
12178 { MASK_SSE2, CODE_FOR_cvttps2dq, 0, IX86_BUILTIN_CVTTPS2DQ, 0, 0 },
12179
12180 { MASK_SSE2, CODE_FOR_sse2_movq, 0, IX86_BUILTIN_MOVQ, 0, 0 }
12181 };
12182
12183 void
12184 ix86_init_builtins ()
12185 {
12186 if (TARGET_MMX)
12187 ix86_init_mmx_sse_builtins ();
12188 }
12189
12190 /* Set up all the MMX/SSE builtins. This is not called if TARGET_MMX
12191 is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX
12192 builtins. */
12193 static void
12194 ix86_init_mmx_sse_builtins ()
12195 {
12196 const struct builtin_description * d;
12197 size_t i;
12198
12199 tree pchar_type_node = build_pointer_type (char_type_node);
12200 tree pfloat_type_node = build_pointer_type (float_type_node);
12201 tree pv2si_type_node = build_pointer_type (V2SI_type_node);
12202 tree pv2di_type_node = build_pointer_type (V2DI_type_node);
12203 tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node);
12204
12205 /* Comparisons. */
12206 tree int_ftype_v4sf_v4sf
12207 = build_function_type_list (integer_type_node,
12208 V4SF_type_node, V4SF_type_node, NULL_TREE);
12209 tree v4si_ftype_v4sf_v4sf
12210 = build_function_type_list (V4SI_type_node,
12211 V4SF_type_node, V4SF_type_node, NULL_TREE);
12212 /* MMX/SSE/integer conversions. */
12213 tree int_ftype_v4sf
12214 = build_function_type_list (integer_type_node,
12215 V4SF_type_node, NULL_TREE);
12216 tree int_ftype_v8qi
12217 = build_function_type_list (integer_type_node, V8QI_type_node, NULL_TREE);
12218 tree v4sf_ftype_v4sf_int
12219 = build_function_type_list (V4SF_type_node,
12220 V4SF_type_node, integer_type_node, NULL_TREE);
12221 tree v4sf_ftype_v4sf_v2si
12222 = build_function_type_list (V4SF_type_node,
12223 V4SF_type_node, V2SI_type_node, NULL_TREE);
12224 tree int_ftype_v4hi_int
12225 = build_function_type_list (integer_type_node,
12226 V4HI_type_node, integer_type_node, NULL_TREE);
12227 tree v4hi_ftype_v4hi_int_int
12228 = build_function_type_list (V4HI_type_node, V4HI_type_node,
12229 integer_type_node, integer_type_node,
12230 NULL_TREE);
12231 /* Miscellaneous. */
12232 tree v8qi_ftype_v4hi_v4hi
12233 = build_function_type_list (V8QI_type_node,
12234 V4HI_type_node, V4HI_type_node, NULL_TREE);
12235 tree v4hi_ftype_v2si_v2si
12236 = build_function_type_list (V4HI_type_node,
12237 V2SI_type_node, V2SI_type_node, NULL_TREE);
12238 tree v4sf_ftype_v4sf_v4sf_int
12239 = build_function_type_list (V4SF_type_node,
12240 V4SF_type_node, V4SF_type_node,
12241 integer_type_node, NULL_TREE);
12242 tree v2si_ftype_v4hi_v4hi
12243 = build_function_type_list (V2SI_type_node,
12244 V4HI_type_node, V4HI_type_node, NULL_TREE);
12245 tree v4hi_ftype_v4hi_int
12246 = build_function_type_list (V4HI_type_node,
12247 V4HI_type_node, integer_type_node, NULL_TREE);
12248 tree v4hi_ftype_v4hi_di
12249 = build_function_type_list (V4HI_type_node,
12250 V4HI_type_node, long_long_unsigned_type_node,
12251 NULL_TREE);
12252 tree v2si_ftype_v2si_di
12253 = build_function_type_list (V2SI_type_node,
12254 V2SI_type_node, long_long_unsigned_type_node,
12255 NULL_TREE);
12256 tree void_ftype_void
12257 = build_function_type (void_type_node, void_list_node);
12258 tree void_ftype_unsigned
12259 = build_function_type_list (void_type_node, unsigned_type_node, NULL_TREE);
12260 tree unsigned_ftype_void
12261 = build_function_type (unsigned_type_node, void_list_node);
12262 tree di_ftype_void
12263 = build_function_type (long_long_unsigned_type_node, void_list_node);
12264 tree v4sf_ftype_void
12265 = build_function_type (V4SF_type_node, void_list_node);
12266 tree v2si_ftype_v4sf
12267 = build_function_type_list (V2SI_type_node, V4SF_type_node, NULL_TREE);
12268 /* Loads/stores. */
12269 tree void_ftype_v8qi_v8qi_pchar
12270 = build_function_type_list (void_type_node,
12271 V8QI_type_node, V8QI_type_node,
12272 pchar_type_node, NULL_TREE);
12273 tree v4sf_ftype_pfloat
12274 = build_function_type_list (V4SF_type_node, pfloat_type_node, NULL_TREE);
12275 /* @@@ the type is bogus */
12276 tree v4sf_ftype_v4sf_pv2si
12277 = build_function_type_list (V4SF_type_node,
12278 V4SF_type_node, pv2si_type_node, NULL_TREE);
12279 tree void_ftype_pv2si_v4sf
12280 = build_function_type_list (void_type_node,
12281 pv2si_type_node, V4SF_type_node, NULL_TREE);
12282 tree void_ftype_pfloat_v4sf
12283 = build_function_type_list (void_type_node,
12284 pfloat_type_node, V4SF_type_node, NULL_TREE);
12285 tree void_ftype_pdi_di
12286 = build_function_type_list (void_type_node,
12287 pdi_type_node, long_long_unsigned_type_node,
12288 NULL_TREE);
12289 tree void_ftype_pv2di_v2di
12290 = build_function_type_list (void_type_node,
12291 pv2di_type_node, V2DI_type_node, NULL_TREE);
12292 /* Normal vector unops. */
12293 tree v4sf_ftype_v4sf
12294 = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE);
12295
12296 /* Normal vector binops. */
12297 tree v4sf_ftype_v4sf_v4sf
12298 = build_function_type_list (V4SF_type_node,
12299 V4SF_type_node, V4SF_type_node, NULL_TREE);
12300 tree v8qi_ftype_v8qi_v8qi
12301 = build_function_type_list (V8QI_type_node,
12302 V8QI_type_node, V8QI_type_node, NULL_TREE);
12303 tree v4hi_ftype_v4hi_v4hi
12304 = build_function_type_list (V4HI_type_node,
12305 V4HI_type_node, V4HI_type_node, NULL_TREE);
12306 tree v2si_ftype_v2si_v2si
12307 = build_function_type_list (V2SI_type_node,
12308 V2SI_type_node, V2SI_type_node, NULL_TREE);
12309 tree di_ftype_di_di
12310 = build_function_type_list (long_long_unsigned_type_node,
12311 long_long_unsigned_type_node,
12312 long_long_unsigned_type_node, NULL_TREE);
12313
12314 tree v2si_ftype_v2sf
12315 = build_function_type_list (V2SI_type_node, V2SF_type_node, NULL_TREE);
12316 tree v2sf_ftype_v2si
12317 = build_function_type_list (V2SF_type_node, V2SI_type_node, NULL_TREE);
12318 tree v2si_ftype_v2si
12319 = build_function_type_list (V2SI_type_node, V2SI_type_node, NULL_TREE);
12320 tree v2sf_ftype_v2sf
12321 = build_function_type_list (V2SF_type_node, V2SF_type_node, NULL_TREE);
12322 tree v2sf_ftype_v2sf_v2sf
12323 = build_function_type_list (V2SF_type_node,
12324 V2SF_type_node, V2SF_type_node, NULL_TREE);
12325 tree v2si_ftype_v2sf_v2sf
12326 = build_function_type_list (V2SI_type_node,
12327 V2SF_type_node, V2SF_type_node, NULL_TREE);
12328 tree pint_type_node = build_pointer_type (integer_type_node);
12329 tree pdouble_type_node = build_pointer_type (double_type_node);
12330 tree int_ftype_v2df_v2df
12331 = build_function_type_list (integer_type_node,
12332 V2DF_type_node, V2DF_type_node, NULL_TREE);
12333
12334 tree ti_ftype_void
12335 = build_function_type (intTI_type_node, void_list_node);
12336 tree v2di_ftype_void
12337 = build_function_type (V2DI_type_node, void_list_node);
12338 tree ti_ftype_ti_ti
12339 = build_function_type_list (intTI_type_node,
12340 intTI_type_node, intTI_type_node, NULL_TREE);
12341 tree void_ftype_pvoid
12342 = build_function_type_list (void_type_node, ptr_type_node, NULL_TREE);
12343 tree v2di_ftype_di
12344 = build_function_type_list (V2DI_type_node,
12345 long_long_unsigned_type_node, NULL_TREE);
12346 tree di_ftype_v2di
12347 = build_function_type_list (long_long_unsigned_type_node,
12348 V2DI_type_node, NULL_TREE);
12349 tree v4sf_ftype_v4si
12350 = build_function_type_list (V4SF_type_node, V4SI_type_node, NULL_TREE);
12351 tree v4si_ftype_v4sf
12352 = build_function_type_list (V4SI_type_node, V4SF_type_node, NULL_TREE);
12353 tree v2df_ftype_v4si
12354 = build_function_type_list (V2DF_type_node, V4SI_type_node, NULL_TREE);
12355 tree v4si_ftype_v2df
12356 = build_function_type_list (V4SI_type_node, V2DF_type_node, NULL_TREE);
12357 tree v2si_ftype_v2df
12358 = build_function_type_list (V2SI_type_node, V2DF_type_node, NULL_TREE);
12359 tree v4sf_ftype_v2df
12360 = build_function_type_list (V4SF_type_node, V2DF_type_node, NULL_TREE);
12361 tree v2df_ftype_v2si
12362 = build_function_type_list (V2DF_type_node, V2SI_type_node, NULL_TREE);
12363 tree v2df_ftype_v4sf
12364 = build_function_type_list (V2DF_type_node, V4SF_type_node, NULL_TREE);
12365 tree int_ftype_v2df
12366 = build_function_type_list (integer_type_node, V2DF_type_node, NULL_TREE);
12367 tree v2df_ftype_v2df_int
12368 = build_function_type_list (V2DF_type_node,
12369 V2DF_type_node, integer_type_node, NULL_TREE);
12370 tree v4sf_ftype_v4sf_v2df
12371 = build_function_type_list (V4SF_type_node,
12372 V4SF_type_node, V2DF_type_node, NULL_TREE);
12373 tree v2df_ftype_v2df_v4sf
12374 = build_function_type_list (V2DF_type_node,
12375 V2DF_type_node, V4SF_type_node, NULL_TREE);
12376 tree v2df_ftype_v2df_v2df_int
12377 = build_function_type_list (V2DF_type_node,
12378 V2DF_type_node, V2DF_type_node,
12379 integer_type_node,
12380 NULL_TREE);
12381 tree v2df_ftype_v2df_pv2si
12382 = build_function_type_list (V2DF_type_node,
12383 V2DF_type_node, pv2si_type_node, NULL_TREE);
12384 tree void_ftype_pv2si_v2df
12385 = build_function_type_list (void_type_node,
12386 pv2si_type_node, V2DF_type_node, NULL_TREE);
12387 tree void_ftype_pdouble_v2df
12388 = build_function_type_list (void_type_node,
12389 pdouble_type_node, V2DF_type_node, NULL_TREE);
12390 tree void_ftype_pint_int
12391 = build_function_type_list (void_type_node,
12392 pint_type_node, integer_type_node, NULL_TREE);
12393 tree void_ftype_v16qi_v16qi_pchar
12394 = build_function_type_list (void_type_node,
12395 V16QI_type_node, V16QI_type_node,
12396 pchar_type_node, NULL_TREE);
12397 tree v2df_ftype_pdouble
12398 = build_function_type_list (V2DF_type_node, pdouble_type_node, NULL_TREE);
12399 tree v2df_ftype_v2df_v2df
12400 = build_function_type_list (V2DF_type_node,
12401 V2DF_type_node, V2DF_type_node, NULL_TREE);
12402 tree v16qi_ftype_v16qi_v16qi
12403 = build_function_type_list (V16QI_type_node,
12404 V16QI_type_node, V16QI_type_node, NULL_TREE);
12405 tree v8hi_ftype_v8hi_v8hi
12406 = build_function_type_list (V8HI_type_node,
12407 V8HI_type_node, V8HI_type_node, NULL_TREE);
12408 tree v4si_ftype_v4si_v4si
12409 = build_function_type_list (V4SI_type_node,
12410 V4SI_type_node, V4SI_type_node, NULL_TREE);
12411 tree v2di_ftype_v2di_v2di
12412 = build_function_type_list (V2DI_type_node,
12413 V2DI_type_node, V2DI_type_node, NULL_TREE);
12414 tree v2di_ftype_v2df_v2df
12415 = build_function_type_list (V2DI_type_node,
12416 V2DF_type_node, V2DF_type_node, NULL_TREE);
12417 tree v2df_ftype_v2df
12418 = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE);
12419 tree v2df_ftype_double
12420 = build_function_type_list (V2DF_type_node, double_type_node, NULL_TREE);
12421 tree v2df_ftype_double_double
12422 = build_function_type_list (V2DF_type_node,
12423 double_type_node, double_type_node, NULL_TREE);
12424 tree int_ftype_v8hi_int
12425 = build_function_type_list (integer_type_node,
12426 V8HI_type_node, integer_type_node, NULL_TREE);
12427 tree v8hi_ftype_v8hi_int_int
12428 = build_function_type_list (V8HI_type_node,
12429 V8HI_type_node, integer_type_node,
12430 integer_type_node, NULL_TREE);
12431 tree v2di_ftype_v2di_int
12432 = build_function_type_list (V2DI_type_node,
12433 V2DI_type_node, integer_type_node, NULL_TREE);
12434 tree v4si_ftype_v4si_int
12435 = build_function_type_list (V4SI_type_node,
12436 V4SI_type_node, integer_type_node, NULL_TREE);
12437 tree v8hi_ftype_v8hi_int
12438 = build_function_type_list (V8HI_type_node,
12439 V8HI_type_node, integer_type_node, NULL_TREE);
12440 tree v8hi_ftype_v8hi_v2di
12441 = build_function_type_list (V8HI_type_node,
12442 V8HI_type_node, V2DI_type_node, NULL_TREE);
12443 tree v4si_ftype_v4si_v2di
12444 = build_function_type_list (V4SI_type_node,
12445 V4SI_type_node, V2DI_type_node, NULL_TREE);
12446 tree v4si_ftype_v8hi_v8hi
12447 = build_function_type_list (V4SI_type_node,
12448 V8HI_type_node, V8HI_type_node, NULL_TREE);
12449 tree di_ftype_v8qi_v8qi
12450 = build_function_type_list (long_long_unsigned_type_node,
12451 V8QI_type_node, V8QI_type_node, NULL_TREE);
12452 tree v2di_ftype_v16qi_v16qi
12453 = build_function_type_list (V2DI_type_node,
12454 V16QI_type_node, V16QI_type_node, NULL_TREE);
12455 tree int_ftype_v16qi
12456 = build_function_type_list (integer_type_node, V16QI_type_node, NULL_TREE);
12457 tree v16qi_ftype_pchar
12458 = build_function_type_list (V16QI_type_node, pchar_type_node, NULL_TREE);
12459 tree void_ftype_pchar_v16qi
12460 = build_function_type_list (void_type_node,
12461 pchar_type_node, V16QI_type_node, NULL_TREE);
12462 tree v4si_ftype_pchar
12463 = build_function_type_list (V4SI_type_node, pchar_type_node, NULL_TREE);
12464 tree void_ftype_pchar_v4si
12465 = build_function_type_list (void_type_node,
12466 pchar_type_node, V4SI_type_node, NULL_TREE);
12467 tree v2di_ftype_v2di
12468 = build_function_type_list (V2DI_type_node, V2DI_type_node, NULL_TREE);
12469
12470 /* Add all builtins that are more or less simple operations on two
12471 operands. */
12472 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
12473 {
12474 /* Use one of the operands; the target can have a different mode for
12475 mask-generating compares. */
12476 enum machine_mode mode;
12477 tree type;
12478
12479 if (d->name == 0)
12480 continue;
12481 mode = insn_data[d->icode].operand[1].mode;
12482
12483 switch (mode)
12484 {
12485 case V16QImode:
12486 type = v16qi_ftype_v16qi_v16qi;
12487 break;
12488 case V8HImode:
12489 type = v8hi_ftype_v8hi_v8hi;
12490 break;
12491 case V4SImode:
12492 type = v4si_ftype_v4si_v4si;
12493 break;
12494 case V2DImode:
12495 type = v2di_ftype_v2di_v2di;
12496 break;
12497 case V2DFmode:
12498 type = v2df_ftype_v2df_v2df;
12499 break;
12500 case TImode:
12501 type = ti_ftype_ti_ti;
12502 break;
12503 case V4SFmode:
12504 type = v4sf_ftype_v4sf_v4sf;
12505 break;
12506 case V8QImode:
12507 type = v8qi_ftype_v8qi_v8qi;
12508 break;
12509 case V4HImode:
12510 type = v4hi_ftype_v4hi_v4hi;
12511 break;
12512 case V2SImode:
12513 type = v2si_ftype_v2si_v2si;
12514 break;
12515 case DImode:
12516 type = di_ftype_di_di;
12517 break;
12518
12519 default:
12520 abort ();
12521 }
12522
12523 /* Override for comparisons. */
12524 if (d->icode == CODE_FOR_maskcmpv4sf3
12525 || d->icode == CODE_FOR_maskncmpv4sf3
12526 || d->icode == CODE_FOR_vmmaskcmpv4sf3
12527 || d->icode == CODE_FOR_vmmaskncmpv4sf3)
12528 type = v4si_ftype_v4sf_v4sf;
12529
12530 if (d->icode == CODE_FOR_maskcmpv2df3
12531 || d->icode == CODE_FOR_maskncmpv2df3
12532 || d->icode == CODE_FOR_vmmaskcmpv2df3
12533 || d->icode == CODE_FOR_vmmaskncmpv2df3)
12534 type = v2di_ftype_v2df_v2df;
12535
12536 def_builtin (d->mask, d->name, type, d->code);
12537 }
12538
12539 /* Add the remaining MMX insns with somewhat more complicated types. */
12540 def_builtin (MASK_MMX, "__builtin_ia32_mmx_zero", di_ftype_void, IX86_BUILTIN_MMX_ZERO);
12541 def_builtin (MASK_MMX, "__builtin_ia32_emms", void_ftype_void, IX86_BUILTIN_EMMS);
12542 def_builtin (MASK_MMX, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
12543 def_builtin (MASK_MMX, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
12544 def_builtin (MASK_MMX, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSLLW);
12545 def_builtin (MASK_MMX, "__builtin_ia32_pslld", v2si_ftype_v2si_di, IX86_BUILTIN_PSLLD);
12546 def_builtin (MASK_MMX, "__builtin_ia32_psllq", di_ftype_di_di, IX86_BUILTIN_PSLLQ);
12547
12548 def_builtin (MASK_MMX, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRLW);
12549 def_builtin (MASK_MMX, "__builtin_ia32_psrld", v2si_ftype_v2si_di, IX86_BUILTIN_PSRLD);
12550 def_builtin (MASK_MMX, "__builtin_ia32_psrlq", di_ftype_di_di, IX86_BUILTIN_PSRLQ);
12551
12552 def_builtin (MASK_MMX, "__builtin_ia32_psraw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRAW);
12553 def_builtin (MASK_MMX, "__builtin_ia32_psrad", v2si_ftype_v2si_di, IX86_BUILTIN_PSRAD);
12554
12555 def_builtin (MASK_MMX, "__builtin_ia32_pshufw", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSHUFW);
12556 def_builtin (MASK_MMX, "__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi, IX86_BUILTIN_PMADDWD);
12557
12558 /* comi/ucomi insns. */
12559 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
12560 if (d->mask == MASK_SSE2)
12561 def_builtin (d->mask, d->name, int_ftype_v2df_v2df, d->code);
12562 else
12563 def_builtin (d->mask, d->name, int_ftype_v4sf_v4sf, d->code);
12564
12565 def_builtin (MASK_MMX, "__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKSSWB);
12566 def_builtin (MASK_MMX, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si, IX86_BUILTIN_PACKSSDW);
12567 def_builtin (MASK_MMX, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKUSWB);
12568
12569 def_builtin (MASK_SSE1, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si, IX86_BUILTIN_CVTPI2PS);
12570 def_builtin (MASK_SSE1, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTPS2PI);
12571 def_builtin (MASK_SSE1, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int, IX86_BUILTIN_CVTSI2SS);
12572 def_builtin (MASK_SSE1, "__builtin_ia32_cvtss2si", int_ftype_v4sf, IX86_BUILTIN_CVTSS2SI);
12573 def_builtin (MASK_SSE1, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTTPS2PI);
12574 def_builtin (MASK_SSE1, "__builtin_ia32_cvttss2si", int_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI);
12575
12576 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_pextrw", int_ftype_v4hi_int, IX86_BUILTIN_PEXTRW);
12577 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_pinsrw", v4hi_ftype_v4hi_int_int, IX86_BUILTIN_PINSRW);
12578
12579 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
12580
12581 def_builtin (MASK_SSE1, "__builtin_ia32_loadaps", v4sf_ftype_pfloat, IX86_BUILTIN_LOADAPS);
12582 def_builtin (MASK_SSE1, "__builtin_ia32_loadups", v4sf_ftype_pfloat, IX86_BUILTIN_LOADUPS);
12583 def_builtin (MASK_SSE1, "__builtin_ia32_loadss", v4sf_ftype_pfloat, IX86_BUILTIN_LOADSS);
12584 def_builtin (MASK_SSE1, "__builtin_ia32_storeaps", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREAPS);
12585 def_builtin (MASK_SSE1, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREUPS);
12586 def_builtin (MASK_SSE1, "__builtin_ia32_storess", void_ftype_pfloat_v4sf, IX86_BUILTIN_STORESS);
12587
12588 def_builtin (MASK_SSE1, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADHPS);
12589 def_builtin (MASK_SSE1, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADLPS);
12590 def_builtin (MASK_SSE1, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STOREHPS);
12591 def_builtin (MASK_SSE1, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STORELPS);
12592
12593 def_builtin (MASK_SSE1, "__builtin_ia32_movmskps", int_ftype_v4sf, IX86_BUILTIN_MOVMSKPS);
12594 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_pmovmskb", int_ftype_v8qi, IX86_BUILTIN_PMOVMSKB);
12595 def_builtin (MASK_SSE1, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTPS);
12596 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_movntq", void_ftype_pdi_di, IX86_BUILTIN_MOVNTQ);
12597
12598 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_sfence", void_ftype_void, IX86_BUILTIN_SFENCE);
12599
12600 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_psadbw", di_ftype_v8qi_v8qi, IX86_BUILTIN_PSADBW);
12601
12602 def_builtin (MASK_SSE1, "__builtin_ia32_rcpps", v4sf_ftype_v4sf, IX86_BUILTIN_RCPPS);
12603 def_builtin (MASK_SSE1, "__builtin_ia32_rcpss", v4sf_ftype_v4sf, IX86_BUILTIN_RCPSS);
12604 def_builtin (MASK_SSE1, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTPS);
12605 def_builtin (MASK_SSE1, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTSS);
12606 def_builtin (MASK_SSE1, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTPS);
12607 def_builtin (MASK_SSE1, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTSS);
12608
12609 def_builtin (MASK_SSE1, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_SHUFPS);
12610
12611 /* Original 3DNow! */
12612 def_builtin (MASK_3DNOW, "__builtin_ia32_femms", void_ftype_void, IX86_BUILTIN_FEMMS);
12613 def_builtin (MASK_3DNOW, "__builtin_ia32_pavgusb", v8qi_ftype_v8qi_v8qi, IX86_BUILTIN_PAVGUSB);
12614 def_builtin (MASK_3DNOW, "__builtin_ia32_pf2id", v2si_ftype_v2sf, IX86_BUILTIN_PF2ID);
12615 def_builtin (MASK_3DNOW, "__builtin_ia32_pfacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFACC);
12616 def_builtin (MASK_3DNOW, "__builtin_ia32_pfadd", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFADD);
12617 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpeq", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPEQ);
12618 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpge", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGE);
12619 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpgt", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGT);
12620 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmax", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMAX);
12621 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmin", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMIN);
12622 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmul", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMUL);
12623 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcp", v2sf_ftype_v2sf, IX86_BUILTIN_PFRCP);
12624 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT1);
12625 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit2", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT2);
12626 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqrt", v2sf_ftype_v2sf, IX86_BUILTIN_PFRSQRT);
12627 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRSQIT1);
12628 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsub", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUB);
12629 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUBR);
12630 def_builtin (MASK_3DNOW, "__builtin_ia32_pi2fd", v2sf_ftype_v2si, IX86_BUILTIN_PI2FD);
12631 def_builtin (MASK_3DNOW, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi, IX86_BUILTIN_PMULHRW);
12632
12633 /* 3DNow! extension as used in the Athlon CPU. */
12634 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pf2iw", v2si_ftype_v2sf, IX86_BUILTIN_PF2IW);
12635 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFNACC);
12636 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfpnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFPNACC);
12637 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pi2fw", v2sf_ftype_v2si, IX86_BUILTIN_PI2FW);
12638 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf, IX86_BUILTIN_PSWAPDSF);
12639 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsi", v2si_ftype_v2si, IX86_BUILTIN_PSWAPDSI);
12640
12641 def_builtin (MASK_SSE1, "__builtin_ia32_setzerops", v4sf_ftype_void, IX86_BUILTIN_SSE_ZERO);
12642
12643 /* SSE2 */
12644 def_builtin (MASK_SSE2, "__builtin_ia32_pextrw128", int_ftype_v8hi_int, IX86_BUILTIN_PEXTRW128);
12645 def_builtin (MASK_SSE2, "__builtin_ia32_pinsrw128", v8hi_ftype_v8hi_int_int, IX86_BUILTIN_PINSRW128);
12646
12647 def_builtin (MASK_SSE2, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar, IX86_BUILTIN_MASKMOVDQU);
12648 def_builtin (MASK_SSE2, "__builtin_ia32_movq2dq", v2di_ftype_di, IX86_BUILTIN_MOVQ2DQ);
12649 def_builtin (MASK_SSE2, "__builtin_ia32_movdq2q", di_ftype_v2di, IX86_BUILTIN_MOVDQ2Q);
12650
12651 def_builtin (MASK_SSE2, "__builtin_ia32_loadapd", v2df_ftype_pdouble, IX86_BUILTIN_LOADAPD);
12652 def_builtin (MASK_SSE2, "__builtin_ia32_loadupd", v2df_ftype_pdouble, IX86_BUILTIN_LOADUPD);
12653 def_builtin (MASK_SSE2, "__builtin_ia32_loadsd", v2df_ftype_pdouble, IX86_BUILTIN_LOADSD);
12654 def_builtin (MASK_SSE2, "__builtin_ia32_storeapd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREAPD);
12655 def_builtin (MASK_SSE2, "__builtin_ia32_storeupd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREUPD);
12656 def_builtin (MASK_SSE2, "__builtin_ia32_storesd", void_ftype_pdouble_v2df, IX86_BUILTIN_STORESD);
12657
12658 def_builtin (MASK_SSE2, "__builtin_ia32_loadhpd", v2df_ftype_v2df_pv2si, IX86_BUILTIN_LOADHPD);
12659 def_builtin (MASK_SSE2, "__builtin_ia32_loadlpd", v2df_ftype_v2df_pv2si, IX86_BUILTIN_LOADLPD);
12660 def_builtin (MASK_SSE2, "__builtin_ia32_storehpd", void_ftype_pv2si_v2df, IX86_BUILTIN_STOREHPD);
12661 def_builtin (MASK_SSE2, "__builtin_ia32_storelpd", void_ftype_pv2si_v2df, IX86_BUILTIN_STORELPD);
12662
12663 def_builtin (MASK_SSE2, "__builtin_ia32_movmskpd", int_ftype_v2df, IX86_BUILTIN_MOVMSKPD);
12664 def_builtin (MASK_SSE2, "__builtin_ia32_pmovmskb128", int_ftype_v16qi, IX86_BUILTIN_PMOVMSKB128);
12665 def_builtin (MASK_SSE2, "__builtin_ia32_movnti", void_ftype_pint_int, IX86_BUILTIN_MOVNTI);
12666 def_builtin (MASK_SSE2, "__builtin_ia32_movntpd", void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTPD);
12667 def_builtin (MASK_SSE2, "__builtin_ia32_movntdq", void_ftype_pv2di_v2di, IX86_BUILTIN_MOVNTDQ);
12668
12669 def_builtin (MASK_SSE2, "__builtin_ia32_pshufd", v4si_ftype_v4si_int, IX86_BUILTIN_PSHUFD);
12670 def_builtin (MASK_SSE2, "__builtin_ia32_pshuflw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFLW);
12671 def_builtin (MASK_SSE2, "__builtin_ia32_pshufhw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFHW);
12672 def_builtin (MASK_SSE2, "__builtin_ia32_psadbw128", v2di_ftype_v16qi_v16qi, IX86_BUILTIN_PSADBW128);
12673
12674 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtpd", v2df_ftype_v2df, IX86_BUILTIN_SQRTPD);
12675 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtsd", v2df_ftype_v2df, IX86_BUILTIN_SQRTSD);
12676
12677 def_builtin (MASK_SSE2, "__builtin_ia32_shufpd", v2df_ftype_v2df_v2df_int, IX86_BUILTIN_SHUFPD);
12678
12679 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2pd", v2df_ftype_v4si, IX86_BUILTIN_CVTDQ2PD);
12680 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2ps", v4sf_ftype_v4si, IX86_BUILTIN_CVTDQ2PS);
12681
12682 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTPD2DQ);
12683 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTPD2PI);
12684 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2ps", v4sf_ftype_v2df, IX86_BUILTIN_CVTPD2PS);
12685 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTTPD2DQ);
12686 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTTPD2PI);
12687
12688 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpi2pd", v2df_ftype_v2si, IX86_BUILTIN_CVTPI2PD);
12689
12690 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2si", int_ftype_v2df, IX86_BUILTIN_CVTSD2SI);
12691 def_builtin (MASK_SSE2, "__builtin_ia32_cvttsd2si", int_ftype_v2df, IX86_BUILTIN_CVTTSD2SI);
12692
12693 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTPS2DQ);
12694 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2pd", v2df_ftype_v4sf, IX86_BUILTIN_CVTPS2PD);
12695 def_builtin (MASK_SSE2, "__builtin_ia32_cvttps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTTPS2DQ);
12696
12697 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsi2sd", v2df_ftype_v2df_int, IX86_BUILTIN_CVTSI2SD);
12698 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2ss", v4sf_ftype_v4sf_v2df, IX86_BUILTIN_CVTSD2SS);
12699 def_builtin (MASK_SSE2, "__builtin_ia32_cvtss2sd", v2df_ftype_v2df_v4sf, IX86_BUILTIN_CVTSS2SD);
12700
12701 def_builtin (MASK_SSE2, "__builtin_ia32_setpd1", v2df_ftype_double, IX86_BUILTIN_SETPD1);
12702 def_builtin (MASK_SSE2, "__builtin_ia32_setpd", v2df_ftype_double_double, IX86_BUILTIN_SETPD);
12703 def_builtin (MASK_SSE2, "__builtin_ia32_setzeropd", ti_ftype_void, IX86_BUILTIN_CLRPD);
12704 def_builtin (MASK_SSE2, "__builtin_ia32_loadpd1", v2df_ftype_pdouble, IX86_BUILTIN_LOADPD1);
12705 def_builtin (MASK_SSE2, "__builtin_ia32_loadrpd", v2df_ftype_pdouble, IX86_BUILTIN_LOADRPD);
12706 def_builtin (MASK_SSE2, "__builtin_ia32_storepd1", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREPD1);
12707 def_builtin (MASK_SSE2, "__builtin_ia32_storerpd", void_ftype_pdouble_v2df, IX86_BUILTIN_STORERPD);
12708
12709 def_builtin (MASK_SSE2, "__builtin_ia32_clflush", void_ftype_pvoid, IX86_BUILTIN_CLFLUSH);
12710 def_builtin (MASK_SSE2, "__builtin_ia32_lfence", void_ftype_void, IX86_BUILTIN_LFENCE);
12711 def_builtin (MASK_SSE2, "__builtin_ia32_mfence", void_ftype_void, IX86_BUILTIN_MFENCE);
12712
12713 def_builtin (MASK_SSE2, "__builtin_ia32_loaddqa", v16qi_ftype_pchar, IX86_BUILTIN_LOADDQA);
12714 def_builtin (MASK_SSE2, "__builtin_ia32_loaddqu", v16qi_ftype_pchar, IX86_BUILTIN_LOADDQU);
12715 def_builtin (MASK_SSE2, "__builtin_ia32_loadd", v4si_ftype_pchar, IX86_BUILTIN_LOADD);
12716 def_builtin (MASK_SSE2, "__builtin_ia32_storedqa", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQA);
12717 def_builtin (MASK_SSE2, "__builtin_ia32_storedqu", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQU);
12718 def_builtin (MASK_SSE2, "__builtin_ia32_stored", void_ftype_pchar_v4si, IX86_BUILTIN_STORED);
12719 def_builtin (MASK_SSE2, "__builtin_ia32_movq", v2di_ftype_v2di, IX86_BUILTIN_MOVQ);
12720
12721 def_builtin (MASK_SSE1, "__builtin_ia32_setzero128", v2di_ftype_void, IX86_BUILTIN_CLRTI);
12722
12723 def_builtin (MASK_SSE2, "__builtin_ia32_psllw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSLLW128);
12724 def_builtin (MASK_SSE2, "__builtin_ia32_pslld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSLLD128);
12725 def_builtin (MASK_SSE2, "__builtin_ia32_psllq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSLLQ128);
12726
12727 def_builtin (MASK_SSE2, "__builtin_ia32_psrlw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRLW128);
12728 def_builtin (MASK_SSE2, "__builtin_ia32_psrld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRLD128);
12729 def_builtin (MASK_SSE2, "__builtin_ia32_psrlq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSRLQ128);
12730
12731 def_builtin (MASK_SSE2, "__builtin_ia32_psraw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRAW128);
12732 def_builtin (MASK_SSE2, "__builtin_ia32_psrad128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRAD128);
12733
12734 def_builtin (MASK_SSE2, "__builtin_ia32_pslldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLDQI128);
12735 def_builtin (MASK_SSE2, "__builtin_ia32_psllwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSLLWI128);
12736 def_builtin (MASK_SSE2, "__builtin_ia32_pslldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSLLDI128);
12737 def_builtin (MASK_SSE2, "__builtin_ia32_psllqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLQI128);
12738
12739 def_builtin (MASK_SSE2, "__builtin_ia32_psrldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLDQI128);
12740 def_builtin (MASK_SSE2, "__builtin_ia32_psrlwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRLWI128);
12741 def_builtin (MASK_SSE2, "__builtin_ia32_psrldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRLDI128);
12742 def_builtin (MASK_SSE2, "__builtin_ia32_psrlqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLQI128);
12743
12744 def_builtin (MASK_SSE2, "__builtin_ia32_psrawi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRAWI128);
12745 def_builtin (MASK_SSE2, "__builtin_ia32_psradi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRADI128);
12746
12747 def_builtin (MASK_SSE2, "__builtin_ia32_pmaddwd128", v4si_ftype_v8hi_v8hi, IX86_BUILTIN_PMADDWD128);
12748 }
12749
12750 /* Errors in the source file can cause expand_expr to return const0_rtx
12751 where we expect a vector. To avoid crashing, use one of the vector
12752 clear instructions. */
12753 static rtx
12754 safe_vector_operand (x, mode)
12755 rtx x;
12756 enum machine_mode mode;
12757 {
12758 if (x != const0_rtx)
12759 return x;
12760 x = gen_reg_rtx (mode);
12761
12762 if (VALID_MMX_REG_MODE (mode) || VALID_MMX_REG_MODE_3DNOW (mode))
12763 emit_insn (gen_mmx_clrdi (mode == DImode ? x
12764 : gen_rtx_SUBREG (DImode, x, 0)));
12765 else
12766 emit_insn (gen_sse_clrv4sf (mode == V4SFmode ? x
12767 : gen_rtx_SUBREG (V4SFmode, x, 0)));
12768 return x;
12769 }
12770
12771 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
12772
12773 static rtx
12774 ix86_expand_binop_builtin (icode, arglist, target)
12775 enum insn_code icode;
12776 tree arglist;
12777 rtx target;
12778 {
12779 rtx pat;
12780 tree arg0 = TREE_VALUE (arglist);
12781 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12782 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12783 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12784 enum machine_mode tmode = insn_data[icode].operand[0].mode;
12785 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
12786 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
12787
12788 if (VECTOR_MODE_P (mode0))
12789 op0 = safe_vector_operand (op0, mode0);
12790 if (VECTOR_MODE_P (mode1))
12791 op1 = safe_vector_operand (op1, mode1);
12792
12793 if (! target
12794 || GET_MODE (target) != tmode
12795 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12796 target = gen_reg_rtx (tmode);
12797
12798 /* In case the insn wants input operands in modes different from
12799 the result, abort. */
12800 if (GET_MODE (op0) != mode0 || GET_MODE (op1) != mode1)
12801 abort ();
12802
12803 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
12804 op0 = copy_to_mode_reg (mode0, op0);
12805 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
12806 op1 = copy_to_mode_reg (mode1, op1);
12807
12808 /* In the commutative cases, both op0 and op1 are nonimmediate_operand,
12809 yet one of the two must not be a memory. This is normally enforced
12810 by expanders, but we didn't bother to create one here. */
12811 if (GET_CODE (op0) == MEM && GET_CODE (op1) == MEM)
12812 op0 = copy_to_mode_reg (mode0, op0);
12813
12814 pat = GEN_FCN (icode) (target, op0, op1);
12815 if (! pat)
12816 return 0;
12817 emit_insn (pat);
12818 return target;
12819 }
12820
12821 /* Subroutine of ix86_expand_builtin to take care of stores. */
12822
12823 static rtx
12824 ix86_expand_store_builtin (icode, arglist)
12825 enum insn_code icode;
12826 tree arglist;
12827 {
12828 rtx pat;
12829 tree arg0 = TREE_VALUE (arglist);
12830 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12831 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12832 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12833 enum machine_mode mode0 = insn_data[icode].operand[0].mode;
12834 enum machine_mode mode1 = insn_data[icode].operand[1].mode;
12835
12836 if (VECTOR_MODE_P (mode1))
12837 op1 = safe_vector_operand (op1, mode1);
12838
12839 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
12840
12841 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
12842 op1 = copy_to_mode_reg (mode1, op1);
12843
12844 pat = GEN_FCN (icode) (op0, op1);
12845 if (pat)
12846 emit_insn (pat);
12847 return 0;
12848 }
12849
12850 /* Subroutine of ix86_expand_builtin to take care of unop insns. */
12851
12852 static rtx
12853 ix86_expand_unop_builtin (icode, arglist, target, do_load)
12854 enum insn_code icode;
12855 tree arglist;
12856 rtx target;
12857 int do_load;
12858 {
12859 rtx pat;
12860 tree arg0 = TREE_VALUE (arglist);
12861 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12862 enum machine_mode tmode = insn_data[icode].operand[0].mode;
12863 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
12864
12865 if (! target
12866 || GET_MODE (target) != tmode
12867 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12868 target = gen_reg_rtx (tmode);
12869 if (do_load)
12870 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
12871 else
12872 {
12873 if (VECTOR_MODE_P (mode0))
12874 op0 = safe_vector_operand (op0, mode0);
12875
12876 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
12877 op0 = copy_to_mode_reg (mode0, op0);
12878 }
12879
12880 pat = GEN_FCN (icode) (target, op0);
12881 if (! pat)
12882 return 0;
12883 emit_insn (pat);
12884 return target;
12885 }
12886
12887 /* Subroutine of ix86_expand_builtin to take care of three special unop insns:
12888 sqrtss, rsqrtss, rcpss. */
12889
12890 static rtx
12891 ix86_expand_unop1_builtin (icode, arglist, target)
12892 enum insn_code icode;
12893 tree arglist;
12894 rtx target;
12895 {
12896 rtx pat;
12897 tree arg0 = TREE_VALUE (arglist);
12898 rtx op1, op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12899 enum machine_mode tmode = insn_data[icode].operand[0].mode;
12900 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
12901
12902 if (! target
12903 || GET_MODE (target) != tmode
12904 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12905 target = gen_reg_rtx (tmode);
12906
12907 if (VECTOR_MODE_P (mode0))
12908 op0 = safe_vector_operand (op0, mode0);
12909
12910 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
12911 op0 = copy_to_mode_reg (mode0, op0);
12912
12913 op1 = op0;
12914 if (! (*insn_data[icode].operand[2].predicate) (op1, mode0))
12915 op1 = copy_to_mode_reg (mode0, op1);
12916
12917 pat = GEN_FCN (icode) (target, op0, op1);
12918 if (! pat)
12919 return 0;
12920 emit_insn (pat);
12921 return target;
12922 }
12923
12924 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
12925
12926 static rtx
12927 ix86_expand_sse_compare (d, arglist, target)
12928 const struct builtin_description *d;
12929 tree arglist;
12930 rtx target;
12931 {
12932 rtx pat;
12933 tree arg0 = TREE_VALUE (arglist);
12934 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12935 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12936 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12937 rtx op2;
12938 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
12939 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
12940 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
12941 enum rtx_code comparison = d->comparison;
12942
12943 if (VECTOR_MODE_P (mode0))
12944 op0 = safe_vector_operand (op0, mode0);
12945 if (VECTOR_MODE_P (mode1))
12946 op1 = safe_vector_operand (op1, mode1);
12947
12948 /* Swap operands if we have a comparison that isn't available in
12949 hardware. */
12950 if (d->flag)
12951 {
12952 rtx tmp = gen_reg_rtx (mode1);
12953 emit_move_insn (tmp, op1);
12954 op1 = op0;
12955 op0 = tmp;
12956 }
12957
12958 if (! target
12959 || GET_MODE (target) != tmode
12960 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode))
12961 target = gen_reg_rtx (tmode);
12962
12963 if (! (*insn_data[d->icode].operand[1].predicate) (op0, mode0))
12964 op0 = copy_to_mode_reg (mode0, op0);
12965 if (! (*insn_data[d->icode].operand[2].predicate) (op1, mode1))
12966 op1 = copy_to_mode_reg (mode1, op1);
12967
12968 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
12969 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
12970 if (! pat)
12971 return 0;
12972 emit_insn (pat);
12973 return target;
12974 }
12975
12976 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
12977
12978 static rtx
12979 ix86_expand_sse_comi (d, arglist, target)
12980 const struct builtin_description *d;
12981 tree arglist;
12982 rtx target;
12983 {
12984 rtx pat;
12985 tree arg0 = TREE_VALUE (arglist);
12986 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12987 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12988 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12989 rtx op2;
12990 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
12991 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
12992 enum rtx_code comparison = d->comparison;
12993
12994 if (VECTOR_MODE_P (mode0))
12995 op0 = safe_vector_operand (op0, mode0);
12996 if (VECTOR_MODE_P (mode1))
12997 op1 = safe_vector_operand (op1, mode1);
12998
12999 /* Swap operands if we have a comparison that isn't available in
13000 hardware. */
13001 if (d->flag)
13002 {
13003 rtx tmp = op1;
13004 op1 = op0;
13005 op0 = tmp;
13006 }
13007
13008 target = gen_reg_rtx (SImode);
13009 emit_move_insn (target, const0_rtx);
13010 target = gen_rtx_SUBREG (QImode, target, 0);
13011
13012 if (! (*insn_data[d->icode].operand[0].predicate) (op0, mode0))
13013 op0 = copy_to_mode_reg (mode0, op0);
13014 if (! (*insn_data[d->icode].operand[1].predicate) (op1, mode1))
13015 op1 = copy_to_mode_reg (mode1, op1);
13016
13017 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
13018 pat = GEN_FCN (d->icode) (op0, op1);
13019 if (! pat)
13020 return 0;
13021 emit_insn (pat);
13022 emit_insn (gen_rtx_SET (VOIDmode,
13023 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
13024 gen_rtx_fmt_ee (comparison, QImode,
13025 SET_DEST (pat),
13026 const0_rtx)));
13027
13028 return SUBREG_REG (target);
13029 }
13030
13031 /* Expand an expression EXP that calls a built-in function,
13032 with result going to TARGET if that's convenient
13033 (and in mode MODE if that's convenient).
13034 SUBTARGET may be used as the target for computing one of EXP's operands.
13035 IGNORE is nonzero if the value is to be ignored. */
13036
13037 rtx
13038 ix86_expand_builtin (exp, target, subtarget, mode, ignore)
13039 tree exp;
13040 rtx target;
13041 rtx subtarget ATTRIBUTE_UNUSED;
13042 enum machine_mode mode ATTRIBUTE_UNUSED;
13043 int ignore ATTRIBUTE_UNUSED;
13044 {
13045 const struct builtin_description *d;
13046 size_t i;
13047 enum insn_code icode;
13048 tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
13049 tree arglist = TREE_OPERAND (exp, 1);
13050 tree arg0, arg1, arg2;
13051 rtx op0, op1, op2, pat;
13052 enum machine_mode tmode, mode0, mode1, mode2;
13053 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
13054
13055 switch (fcode)
13056 {
13057 case IX86_BUILTIN_EMMS:
13058 emit_insn (gen_emms ());
13059 return 0;
13060
13061 case IX86_BUILTIN_SFENCE:
13062 emit_insn (gen_sfence ());
13063 return 0;
13064
13065 case IX86_BUILTIN_PEXTRW:
13066 case IX86_BUILTIN_PEXTRW128:
13067 icode = (fcode == IX86_BUILTIN_PEXTRW
13068 ? CODE_FOR_mmx_pextrw
13069 : CODE_FOR_sse2_pextrw);
13070 arg0 = TREE_VALUE (arglist);
13071 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13072 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13073 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13074 tmode = insn_data[icode].operand[0].mode;
13075 mode0 = insn_data[icode].operand[1].mode;
13076 mode1 = insn_data[icode].operand[2].mode;
13077
13078 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13079 op0 = copy_to_mode_reg (mode0, op0);
13080 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13081 {
13082 /* @@@ better error message */
13083 error ("selector must be an immediate");
13084 return gen_reg_rtx (tmode);
13085 }
13086 if (target == 0
13087 || GET_MODE (target) != tmode
13088 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13089 target = gen_reg_rtx (tmode);
13090 pat = GEN_FCN (icode) (target, op0, op1);
13091 if (! pat)
13092 return 0;
13093 emit_insn (pat);
13094 return target;
13095
13096 case IX86_BUILTIN_PINSRW:
13097 case IX86_BUILTIN_PINSRW128:
13098 icode = (fcode == IX86_BUILTIN_PINSRW
13099 ? CODE_FOR_mmx_pinsrw
13100 : CODE_FOR_sse2_pinsrw);
13101 arg0 = TREE_VALUE (arglist);
13102 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13103 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
13104 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13105 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13106 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
13107 tmode = insn_data[icode].operand[0].mode;
13108 mode0 = insn_data[icode].operand[1].mode;
13109 mode1 = insn_data[icode].operand[2].mode;
13110 mode2 = insn_data[icode].operand[3].mode;
13111
13112 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13113 op0 = copy_to_mode_reg (mode0, op0);
13114 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13115 op1 = copy_to_mode_reg (mode1, op1);
13116 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
13117 {
13118 /* @@@ better error message */
13119 error ("selector must be an immediate");
13120 return const0_rtx;
13121 }
13122 if (target == 0
13123 || GET_MODE (target) != tmode
13124 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13125 target = gen_reg_rtx (tmode);
13126 pat = GEN_FCN (icode) (target, op0, op1, op2);
13127 if (! pat)
13128 return 0;
13129 emit_insn (pat);
13130 return target;
13131
13132 case IX86_BUILTIN_MASKMOVQ:
13133 case IX86_BUILTIN_MASKMOVDQU:
13134 icode = (fcode == IX86_BUILTIN_MASKMOVQ
13135 ? (TARGET_64BIT ? CODE_FOR_mmx_maskmovq_rex : CODE_FOR_mmx_maskmovq)
13136 : (TARGET_64BIT ? CODE_FOR_sse2_maskmovdqu_rex64
13137 : CODE_FOR_sse2_maskmovdqu));
13138 /* Note the arg order is different from the operand order. */
13139 arg1 = TREE_VALUE (arglist);
13140 arg2 = TREE_VALUE (TREE_CHAIN (arglist));
13141 arg0 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
13142 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13143 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13144 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
13145 mode0 = insn_data[icode].operand[0].mode;
13146 mode1 = insn_data[icode].operand[1].mode;
13147 mode2 = insn_data[icode].operand[2].mode;
13148
13149 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
13150 op0 = copy_to_mode_reg (mode0, op0);
13151 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
13152 op1 = copy_to_mode_reg (mode1, op1);
13153 if (! (*insn_data[icode].operand[2].predicate) (op2, mode2))
13154 op2 = copy_to_mode_reg (mode2, op2);
13155 pat = GEN_FCN (icode) (op0, op1, op2);
13156 if (! pat)
13157 return 0;
13158 emit_insn (pat);
13159 return 0;
13160
13161 case IX86_BUILTIN_SQRTSS:
13162 return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv4sf2, arglist, target);
13163 case IX86_BUILTIN_RSQRTSS:
13164 return ix86_expand_unop1_builtin (CODE_FOR_vmrsqrtv4sf2, arglist, target);
13165 case IX86_BUILTIN_RCPSS:
13166 return ix86_expand_unop1_builtin (CODE_FOR_vmrcpv4sf2, arglist, target);
13167
13168 case IX86_BUILTIN_LOADAPS:
13169 return ix86_expand_unop_builtin (CODE_FOR_sse_movaps, arglist, target, 1);
13170
13171 case IX86_BUILTIN_LOADUPS:
13172 return ix86_expand_unop_builtin (CODE_FOR_sse_movups, arglist, target, 1);
13173
13174 case IX86_BUILTIN_STOREAPS:
13175 return ix86_expand_store_builtin (CODE_FOR_sse_movaps, arglist);
13176
13177 case IX86_BUILTIN_STOREUPS:
13178 return ix86_expand_store_builtin (CODE_FOR_sse_movups, arglist);
13179
13180 case IX86_BUILTIN_LOADSS:
13181 return ix86_expand_unop_builtin (CODE_FOR_sse_loadss, arglist, target, 1);
13182
13183 case IX86_BUILTIN_STORESS:
13184 return ix86_expand_store_builtin (CODE_FOR_sse_storess, arglist);
13185
13186 case IX86_BUILTIN_LOADHPS:
13187 case IX86_BUILTIN_LOADLPS:
13188 case IX86_BUILTIN_LOADHPD:
13189 case IX86_BUILTIN_LOADLPD:
13190 icode = (fcode == IX86_BUILTIN_LOADHPS ? CODE_FOR_sse_movhps
13191 : fcode == IX86_BUILTIN_LOADLPS ? CODE_FOR_sse_movlps
13192 : fcode == IX86_BUILTIN_LOADHPD ? CODE_FOR_sse2_movhpd
13193 : CODE_FOR_sse2_movlpd);
13194 arg0 = TREE_VALUE (arglist);
13195 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13196 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13197 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13198 tmode = insn_data[icode].operand[0].mode;
13199 mode0 = insn_data[icode].operand[1].mode;
13200 mode1 = insn_data[icode].operand[2].mode;
13201
13202 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13203 op0 = copy_to_mode_reg (mode0, op0);
13204 op1 = gen_rtx_MEM (mode1, copy_to_mode_reg (Pmode, op1));
13205 if (target == 0
13206 || GET_MODE (target) != tmode
13207 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13208 target = gen_reg_rtx (tmode);
13209 pat = GEN_FCN (icode) (target, op0, op1);
13210 if (! pat)
13211 return 0;
13212 emit_insn (pat);
13213 return target;
13214
13215 case IX86_BUILTIN_STOREHPS:
13216 case IX86_BUILTIN_STORELPS:
13217 case IX86_BUILTIN_STOREHPD:
13218 case IX86_BUILTIN_STORELPD:
13219 icode = (fcode == IX86_BUILTIN_STOREHPS ? CODE_FOR_sse_movhps
13220 : fcode == IX86_BUILTIN_STORELPS ? CODE_FOR_sse_movlps
13221 : fcode == IX86_BUILTIN_STOREHPD ? CODE_FOR_sse2_movhpd
13222 : CODE_FOR_sse2_movlpd);
13223 arg0 = TREE_VALUE (arglist);
13224 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13225 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13226 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13227 mode0 = insn_data[icode].operand[1].mode;
13228 mode1 = insn_data[icode].operand[2].mode;
13229
13230 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
13231 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13232 op1 = copy_to_mode_reg (mode1, op1);
13233
13234 pat = GEN_FCN (icode) (op0, op0, op1);
13235 if (! pat)
13236 return 0;
13237 emit_insn (pat);
13238 return 0;
13239
13240 case IX86_BUILTIN_MOVNTPS:
13241 return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf, arglist);
13242 case IX86_BUILTIN_MOVNTQ:
13243 return ix86_expand_store_builtin (CODE_FOR_sse_movntdi, arglist);
13244
13245 case IX86_BUILTIN_LDMXCSR:
13246 op0 = expand_expr (TREE_VALUE (arglist), NULL_RTX, VOIDmode, 0);
13247 target = assign_386_stack_local (SImode, 0);
13248 emit_move_insn (target, op0);
13249 emit_insn (gen_ldmxcsr (target));
13250 return 0;
13251
13252 case IX86_BUILTIN_STMXCSR:
13253 target = assign_386_stack_local (SImode, 0);
13254 emit_insn (gen_stmxcsr (target));
13255 return copy_to_mode_reg (SImode, target);
13256
13257 case IX86_BUILTIN_SHUFPS:
13258 case IX86_BUILTIN_SHUFPD:
13259 icode = (fcode == IX86_BUILTIN_SHUFPS
13260 ? CODE_FOR_sse_shufps
13261 : CODE_FOR_sse2_shufpd);
13262 arg0 = TREE_VALUE (arglist);
13263 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13264 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
13265 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13266 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13267 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
13268 tmode = insn_data[icode].operand[0].mode;
13269 mode0 = insn_data[icode].operand[1].mode;
13270 mode1 = insn_data[icode].operand[2].mode;
13271 mode2 = insn_data[icode].operand[3].mode;
13272
13273 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13274 op0 = copy_to_mode_reg (mode0, op0);
13275 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13276 op1 = copy_to_mode_reg (mode1, op1);
13277 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
13278 {
13279 /* @@@ better error message */
13280 error ("mask must be an immediate");
13281 return gen_reg_rtx (tmode);
13282 }
13283 if (target == 0
13284 || GET_MODE (target) != tmode
13285 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13286 target = gen_reg_rtx (tmode);
13287 pat = GEN_FCN (icode) (target, op0, op1, op2);
13288 if (! pat)
13289 return 0;
13290 emit_insn (pat);
13291 return target;
13292
13293 case IX86_BUILTIN_PSHUFW:
13294 case IX86_BUILTIN_PSHUFD:
13295 case IX86_BUILTIN_PSHUFHW:
13296 case IX86_BUILTIN_PSHUFLW:
13297 icode = ( fcode == IX86_BUILTIN_PSHUFHW ? CODE_FOR_sse2_pshufhw
13298 : fcode == IX86_BUILTIN_PSHUFLW ? CODE_FOR_sse2_pshuflw
13299 : fcode == IX86_BUILTIN_PSHUFD ? CODE_FOR_sse2_pshufd
13300 : CODE_FOR_mmx_pshufw);
13301 arg0 = TREE_VALUE (arglist);
13302 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13303 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13304 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13305 tmode = insn_data[icode].operand[0].mode;
13306 mode1 = insn_data[icode].operand[1].mode;
13307 mode2 = insn_data[icode].operand[2].mode;
13308
13309 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
13310 op0 = copy_to_mode_reg (mode1, op0);
13311 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
13312 {
13313 /* @@@ better error message */
13314 error ("mask must be an immediate");
13315 return const0_rtx;
13316 }
13317 if (target == 0
13318 || GET_MODE (target) != tmode
13319 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13320 target = gen_reg_rtx (tmode);
13321 pat = GEN_FCN (icode) (target, op0, op1);
13322 if (! pat)
13323 return 0;
13324 emit_insn (pat);
13325 return target;
13326
13327 case IX86_BUILTIN_PSLLDQI128:
13328 case IX86_BUILTIN_PSRLDQI128:
13329 icode = ( fcode == IX86_BUILTIN_PSLLDQI128 ? CODE_FOR_sse2_ashlti3
13330 : CODE_FOR_sse2_lshrti3);
13331 arg0 = TREE_VALUE (arglist);
13332 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13333 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13334 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13335 tmode = insn_data[icode].operand[0].mode;
13336 mode1 = insn_data[icode].operand[1].mode;
13337 mode2 = insn_data[icode].operand[2].mode;
13338
13339 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
13340 {
13341 op0 = copy_to_reg (op0);
13342 op0 = simplify_gen_subreg (mode1, op0, GET_MODE (op0), 0);
13343 }
13344 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
13345 {
13346 error ("shift must be an immediate");
13347 return const0_rtx;
13348 }
13349 target = gen_reg_rtx (V2DImode);
13350 pat = GEN_FCN (icode) (simplify_gen_subreg (tmode, target, V2DImode, 0), op0, op1);
13351 if (! pat)
13352 return 0;
13353 emit_insn (pat);
13354 return target;
13355
13356 case IX86_BUILTIN_FEMMS:
13357 emit_insn (gen_femms ());
13358 return NULL_RTX;
13359
13360 case IX86_BUILTIN_PAVGUSB:
13361 return ix86_expand_binop_builtin (CODE_FOR_pavgusb, arglist, target);
13362
13363 case IX86_BUILTIN_PF2ID:
13364 return ix86_expand_unop_builtin (CODE_FOR_pf2id, arglist, target, 0);
13365
13366 case IX86_BUILTIN_PFACC:
13367 return ix86_expand_binop_builtin (CODE_FOR_pfacc, arglist, target);
13368
13369 case IX86_BUILTIN_PFADD:
13370 return ix86_expand_binop_builtin (CODE_FOR_addv2sf3, arglist, target);
13371
13372 case IX86_BUILTIN_PFCMPEQ:
13373 return ix86_expand_binop_builtin (CODE_FOR_eqv2sf3, arglist, target);
13374
13375 case IX86_BUILTIN_PFCMPGE:
13376 return ix86_expand_binop_builtin (CODE_FOR_gev2sf3, arglist, target);
13377
13378 case IX86_BUILTIN_PFCMPGT:
13379 return ix86_expand_binop_builtin (CODE_FOR_gtv2sf3, arglist, target);
13380
13381 case IX86_BUILTIN_PFMAX:
13382 return ix86_expand_binop_builtin (CODE_FOR_pfmaxv2sf3, arglist, target);
13383
13384 case IX86_BUILTIN_PFMIN:
13385 return ix86_expand_binop_builtin (CODE_FOR_pfminv2sf3, arglist, target);
13386
13387 case IX86_BUILTIN_PFMUL:
13388 return ix86_expand_binop_builtin (CODE_FOR_mulv2sf3, arglist, target);
13389
13390 case IX86_BUILTIN_PFRCP:
13391 return ix86_expand_unop_builtin (CODE_FOR_pfrcpv2sf2, arglist, target, 0);
13392
13393 case IX86_BUILTIN_PFRCPIT1:
13394 return ix86_expand_binop_builtin (CODE_FOR_pfrcpit1v2sf3, arglist, target);
13395
13396 case IX86_BUILTIN_PFRCPIT2:
13397 return ix86_expand_binop_builtin (CODE_FOR_pfrcpit2v2sf3, arglist, target);
13398
13399 case IX86_BUILTIN_PFRSQIT1:
13400 return ix86_expand_binop_builtin (CODE_FOR_pfrsqit1v2sf3, arglist, target);
13401
13402 case IX86_BUILTIN_PFRSQRT:
13403 return ix86_expand_unop_builtin (CODE_FOR_pfrsqrtv2sf2, arglist, target, 0);
13404
13405 case IX86_BUILTIN_PFSUB:
13406 return ix86_expand_binop_builtin (CODE_FOR_subv2sf3, arglist, target);
13407
13408 case IX86_BUILTIN_PFSUBR:
13409 return ix86_expand_binop_builtin (CODE_FOR_subrv2sf3, arglist, target);
13410
13411 case IX86_BUILTIN_PI2FD:
13412 return ix86_expand_unop_builtin (CODE_FOR_floatv2si2, arglist, target, 0);
13413
13414 case IX86_BUILTIN_PMULHRW:
13415 return ix86_expand_binop_builtin (CODE_FOR_pmulhrwv4hi3, arglist, target);
13416
13417 case IX86_BUILTIN_PF2IW:
13418 return ix86_expand_unop_builtin (CODE_FOR_pf2iw, arglist, target, 0);
13419
13420 case IX86_BUILTIN_PFNACC:
13421 return ix86_expand_binop_builtin (CODE_FOR_pfnacc, arglist, target);
13422
13423 case IX86_BUILTIN_PFPNACC:
13424 return ix86_expand_binop_builtin (CODE_FOR_pfpnacc, arglist, target);
13425
13426 case IX86_BUILTIN_PI2FW:
13427 return ix86_expand_unop_builtin (CODE_FOR_pi2fw, arglist, target, 0);
13428
13429 case IX86_BUILTIN_PSWAPDSI:
13430 return ix86_expand_unop_builtin (CODE_FOR_pswapdv2si2, arglist, target, 0);
13431
13432 case IX86_BUILTIN_PSWAPDSF:
13433 return ix86_expand_unop_builtin (CODE_FOR_pswapdv2sf2, arglist, target, 0);
13434
13435 case IX86_BUILTIN_SSE_ZERO:
13436 target = gen_reg_rtx (V4SFmode);
13437 emit_insn (gen_sse_clrv4sf (target));
13438 return target;
13439
13440 case IX86_BUILTIN_MMX_ZERO:
13441 target = gen_reg_rtx (DImode);
13442 emit_insn (gen_mmx_clrdi (target));
13443 return target;
13444
13445 case IX86_BUILTIN_CLRTI:
13446 target = gen_reg_rtx (V2DImode);
13447 emit_insn (gen_sse2_clrti (simplify_gen_subreg (TImode, target, V2DImode, 0)));
13448 return target;
13449
13450
13451 case IX86_BUILTIN_SQRTSD:
13452 return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv2df2, arglist, target);
13453 case IX86_BUILTIN_LOADAPD:
13454 return ix86_expand_unop_builtin (CODE_FOR_sse2_movapd, arglist, target, 1);
13455 case IX86_BUILTIN_LOADUPD:
13456 return ix86_expand_unop_builtin (CODE_FOR_sse2_movupd, arglist, target, 1);
13457
13458 case IX86_BUILTIN_STOREAPD:
13459 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
13460 case IX86_BUILTIN_STOREUPD:
13461 return ix86_expand_store_builtin (CODE_FOR_sse2_movupd, arglist);
13462
13463 case IX86_BUILTIN_LOADSD:
13464 return ix86_expand_unop_builtin (CODE_FOR_sse2_loadsd, arglist, target, 1);
13465
13466 case IX86_BUILTIN_STORESD:
13467 return ix86_expand_store_builtin (CODE_FOR_sse2_storesd, arglist);
13468
13469 case IX86_BUILTIN_SETPD1:
13470 target = assign_386_stack_local (DFmode, 0);
13471 arg0 = TREE_VALUE (arglist);
13472 emit_move_insn (adjust_address (target, DFmode, 0),
13473 expand_expr (arg0, NULL_RTX, VOIDmode, 0));
13474 op0 = gen_reg_rtx (V2DFmode);
13475 emit_insn (gen_sse2_loadsd (op0, adjust_address (target, V2DFmode, 0)));
13476 emit_insn (gen_sse2_shufpd (op0, op0, op0, GEN_INT (0)));
13477 return op0;
13478
13479 case IX86_BUILTIN_SETPD:
13480 target = assign_386_stack_local (V2DFmode, 0);
13481 arg0 = TREE_VALUE (arglist);
13482 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13483 emit_move_insn (adjust_address (target, DFmode, 0),
13484 expand_expr (arg0, NULL_RTX, VOIDmode, 0));
13485 emit_move_insn (adjust_address (target, DFmode, 8),
13486 expand_expr (arg1, NULL_RTX, VOIDmode, 0));
13487 op0 = gen_reg_rtx (V2DFmode);
13488 emit_insn (gen_sse2_movapd (op0, target));
13489 return op0;
13490
13491 case IX86_BUILTIN_LOADRPD:
13492 target = ix86_expand_unop_builtin (CODE_FOR_sse2_movapd, arglist,
13493 gen_reg_rtx (V2DFmode), 1);
13494 emit_insn (gen_sse2_shufpd (target, target, target, GEN_INT (1)));
13495 return target;
13496
13497 case IX86_BUILTIN_LOADPD1:
13498 target = ix86_expand_unop_builtin (CODE_FOR_sse2_loadsd, arglist,
13499 gen_reg_rtx (V2DFmode), 1);
13500 emit_insn (gen_sse2_shufpd (target, target, target, const0_rtx));
13501 return target;
13502
13503 case IX86_BUILTIN_STOREPD1:
13504 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
13505 case IX86_BUILTIN_STORERPD:
13506 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
13507
13508 case IX86_BUILTIN_CLRPD:
13509 target = gen_reg_rtx (V2DFmode);
13510 emit_insn (gen_sse_clrv2df (target));
13511 return target;
13512
13513 case IX86_BUILTIN_MFENCE:
13514 emit_insn (gen_sse2_mfence ());
13515 return 0;
13516 case IX86_BUILTIN_LFENCE:
13517 emit_insn (gen_sse2_lfence ());
13518 return 0;
13519
13520 case IX86_BUILTIN_CLFLUSH:
13521 arg0 = TREE_VALUE (arglist);
13522 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13523 icode = CODE_FOR_sse2_clflush;
13524 if (! (*insn_data[icode].operand[0].predicate) (op0, Pmode))
13525 op0 = copy_to_mode_reg (Pmode, op0);
13526
13527 emit_insn (gen_sse2_clflush (op0));
13528 return 0;
13529
13530 case IX86_BUILTIN_MOVNTPD:
13531 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2df, arglist);
13532 case IX86_BUILTIN_MOVNTDQ:
13533 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2di, arglist);
13534 case IX86_BUILTIN_MOVNTI:
13535 return ix86_expand_store_builtin (CODE_FOR_sse2_movntsi, arglist);
13536
13537 case IX86_BUILTIN_LOADDQA:
13538 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqa, arglist, target, 1);
13539 case IX86_BUILTIN_LOADDQU:
13540 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqu, arglist, target, 1);
13541 case IX86_BUILTIN_LOADD:
13542 return ix86_expand_unop_builtin (CODE_FOR_sse2_loadd, arglist, target, 1);
13543
13544 case IX86_BUILTIN_STOREDQA:
13545 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqa, arglist);
13546 case IX86_BUILTIN_STOREDQU:
13547 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqu, arglist);
13548 case IX86_BUILTIN_STORED:
13549 return ix86_expand_store_builtin (CODE_FOR_sse2_stored, arglist);
13550
13551 default:
13552 break;
13553 }
13554
13555 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
13556 if (d->code == fcode)
13557 {
13558 /* Compares are treated specially. */
13559 if (d->icode == CODE_FOR_maskcmpv4sf3
13560 || d->icode == CODE_FOR_vmmaskcmpv4sf3
13561 || d->icode == CODE_FOR_maskncmpv4sf3
13562 || d->icode == CODE_FOR_vmmaskncmpv4sf3
13563 || d->icode == CODE_FOR_maskcmpv2df3
13564 || d->icode == CODE_FOR_vmmaskcmpv2df3
13565 || d->icode == CODE_FOR_maskncmpv2df3
13566 || d->icode == CODE_FOR_vmmaskncmpv2df3)
13567 return ix86_expand_sse_compare (d, arglist, target);
13568
13569 return ix86_expand_binop_builtin (d->icode, arglist, target);
13570 }
13571
13572 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
13573 if (d->code == fcode)
13574 return ix86_expand_unop_builtin (d->icode, arglist, target, 0);
13575
13576 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
13577 if (d->code == fcode)
13578 return ix86_expand_sse_comi (d, arglist, target);
13579
13580 /* @@@ Should really do something sensible here. */
13581 return 0;
13582 }
13583
13584 /* Store OPERAND to the memory after reload is completed. This means
13585 that we can't easily use assign_stack_local. */
13586 rtx
13587 ix86_force_to_memory (mode, operand)
13588 enum machine_mode mode;
13589 rtx operand;
13590 {
13591 rtx result;
13592 if (!reload_completed)
13593 abort ();
13594 if (TARGET_64BIT && TARGET_RED_ZONE)
13595 {
13596 result = gen_rtx_MEM (mode,
13597 gen_rtx_PLUS (Pmode,
13598 stack_pointer_rtx,
13599 GEN_INT (-RED_ZONE_SIZE)));
13600 emit_move_insn (result, operand);
13601 }
13602 else if (TARGET_64BIT && !TARGET_RED_ZONE)
13603 {
13604 switch (mode)
13605 {
13606 case HImode:
13607 case SImode:
13608 operand = gen_lowpart (DImode, operand);
13609 /* FALLTHRU */
13610 case DImode:
13611 emit_insn (
13612 gen_rtx_SET (VOIDmode,
13613 gen_rtx_MEM (DImode,
13614 gen_rtx_PRE_DEC (DImode,
13615 stack_pointer_rtx)),
13616 operand));
13617 break;
13618 default:
13619 abort ();
13620 }
13621 result = gen_rtx_MEM (mode, stack_pointer_rtx);
13622 }
13623 else
13624 {
13625 switch (mode)
13626 {
13627 case DImode:
13628 {
13629 rtx operands[2];
13630 split_di (&operand, 1, operands, operands + 1);
13631 emit_insn (
13632 gen_rtx_SET (VOIDmode,
13633 gen_rtx_MEM (SImode,
13634 gen_rtx_PRE_DEC (Pmode,
13635 stack_pointer_rtx)),
13636 operands[1]));
13637 emit_insn (
13638 gen_rtx_SET (VOIDmode,
13639 gen_rtx_MEM (SImode,
13640 gen_rtx_PRE_DEC (Pmode,
13641 stack_pointer_rtx)),
13642 operands[0]));
13643 }
13644 break;
13645 case HImode:
13646 /* It is better to store HImodes as SImodes. */
13647 if (!TARGET_PARTIAL_REG_STALL)
13648 operand = gen_lowpart (SImode, operand);
13649 /* FALLTHRU */
13650 case SImode:
13651 emit_insn (
13652 gen_rtx_SET (VOIDmode,
13653 gen_rtx_MEM (GET_MODE (operand),
13654 gen_rtx_PRE_DEC (SImode,
13655 stack_pointer_rtx)),
13656 operand));
13657 break;
13658 default:
13659 abort ();
13660 }
13661 result = gen_rtx_MEM (mode, stack_pointer_rtx);
13662 }
13663 return result;
13664 }
13665
13666 /* Free operand from the memory. */
13667 void
13668 ix86_free_from_memory (mode)
13669 enum machine_mode mode;
13670 {
13671 if (!TARGET_64BIT || !TARGET_RED_ZONE)
13672 {
13673 int size;
13674
13675 if (mode == DImode || TARGET_64BIT)
13676 size = 8;
13677 else if (mode == HImode && TARGET_PARTIAL_REG_STALL)
13678 size = 2;
13679 else
13680 size = 4;
13681 /* Use LEA to deallocate stack space. In peephole2 it will be converted
13682 to pop or add instruction if registers are available. */
13683 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
13684 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
13685 GEN_INT (size))));
13686 }
13687 }
13688
13689 /* Put float CONST_DOUBLE in the constant pool instead of fp regs.
13690 QImode must go into class Q_REGS.
13691 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
13692 movdf to do mem-to-mem moves through integer regs. */
13693 enum reg_class
13694 ix86_preferred_reload_class (x, class)
13695 rtx x;
13696 enum reg_class class;
13697 {
13698 if (GET_CODE (x) == CONST_VECTOR && x != CONST0_RTX (GET_MODE (x)))
13699 return NO_REGS;
13700 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
13701 {
13702 /* SSE can't load any constant directly yet. */
13703 if (SSE_CLASS_P (class))
13704 return NO_REGS;
13705 /* Floats can load 0 and 1. */
13706 if (MAYBE_FLOAT_CLASS_P (class) && standard_80387_constant_p (x))
13707 {
13708 /* Limit class to non-SSE. Use GENERAL_REGS if possible. */
13709 if (MAYBE_SSE_CLASS_P (class))
13710 return (reg_class_subset_p (class, GENERAL_REGS)
13711 ? GENERAL_REGS : FLOAT_REGS);
13712 else
13713 return class;
13714 }
13715 /* General regs can load everything. */
13716 if (reg_class_subset_p (class, GENERAL_REGS))
13717 return GENERAL_REGS;
13718 /* In case we haven't resolved FLOAT or SSE yet, give up. */
13719 if (MAYBE_FLOAT_CLASS_P (class) || MAYBE_SSE_CLASS_P (class))
13720 return NO_REGS;
13721 }
13722 if (MAYBE_MMX_CLASS_P (class) && CONSTANT_P (x))
13723 return NO_REGS;
13724 if (GET_MODE (x) == QImode && ! reg_class_subset_p (class, Q_REGS))
13725 return Q_REGS;
13726 return class;
13727 }
13728
13729 /* If we are copying between general and FP registers, we need a memory
13730 location. The same is true for SSE and MMX registers.
13731
13732 The macro can't work reliably when one of the CLASSES is class containing
13733 registers from multiple units (SSE, MMX, integer). We avoid this by never
13734 combining those units in single alternative in the machine description.
13735 Ensure that this constraint holds to avoid unexpected surprises.
13736
13737 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
13738 enforce these sanity checks. */
13739 int
13740 ix86_secondary_memory_needed (class1, class2, mode, strict)
13741 enum reg_class class1, class2;
13742 enum machine_mode mode;
13743 int strict;
13744 {
13745 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
13746 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
13747 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
13748 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
13749 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
13750 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
13751 {
13752 if (strict)
13753 abort ();
13754 else
13755 return 1;
13756 }
13757 return (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2)
13758 || (SSE_CLASS_P (class1) != SSE_CLASS_P (class2)
13759 && (mode) != SImode)
13760 || (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
13761 && (mode) != SImode));
13762 }
13763 /* Return the cost of moving data from a register in class CLASS1 to
13764 one in class CLASS2.
13765
13766 It is not required that the cost always equal 2 when FROM is the same as TO;
13767 on some machines it is expensive to move between registers if they are not
13768 general registers. */
13769 int
13770 ix86_register_move_cost (mode, class1, class2)
13771 enum machine_mode mode;
13772 enum reg_class class1, class2;
13773 {
13774 /* In case we require secondary memory, compute cost of the store followed
13775 by load. In order to avoid bad register allocation choices, we need
13776 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
13777
13778 if (ix86_secondary_memory_needed (class1, class2, mode, 0))
13779 {
13780 int cost = 1;
13781
13782 cost += MAX (MEMORY_MOVE_COST (mode, class1, 0),
13783 MEMORY_MOVE_COST (mode, class1, 1));
13784 cost += MAX (MEMORY_MOVE_COST (mode, class2, 0),
13785 MEMORY_MOVE_COST (mode, class2, 1));
13786
13787 /* In case of copying from general_purpose_register we may emit multiple
13788 stores followed by single load causing memory size mismatch stall.
13789 Count this as arbitarily high cost of 20. */
13790 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
13791 cost += 20;
13792
13793 /* In the case of FP/MMX moves, the registers actually overlap, and we
13794 have to switch modes in order to treat them differently. */
13795 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
13796 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
13797 cost += 20;
13798
13799 return cost;
13800 }
13801
13802 /* Moves between SSE/MMX and integer unit are expensive. */
13803 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
13804 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
13805 return ix86_cost->mmxsse_to_integer;
13806 if (MAYBE_FLOAT_CLASS_P (class1))
13807 return ix86_cost->fp_move;
13808 if (MAYBE_SSE_CLASS_P (class1))
13809 return ix86_cost->sse_move;
13810 if (MAYBE_MMX_CLASS_P (class1))
13811 return ix86_cost->mmx_move;
13812 return 2;
13813 }
13814
13815 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
13816 int
13817 ix86_hard_regno_mode_ok (regno, mode)
13818 int regno;
13819 enum machine_mode mode;
13820 {
13821 /* Flags and only flags can only hold CCmode values. */
13822 if (CC_REGNO_P (regno))
13823 return GET_MODE_CLASS (mode) == MODE_CC;
13824 if (GET_MODE_CLASS (mode) == MODE_CC
13825 || GET_MODE_CLASS (mode) == MODE_RANDOM
13826 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
13827 return 0;
13828 if (FP_REGNO_P (regno))
13829 return VALID_FP_MODE_P (mode);
13830 if (SSE_REGNO_P (regno))
13831 return VALID_SSE_REG_MODE (mode);
13832 if (MMX_REGNO_P (regno))
13833 return VALID_MMX_REG_MODE (mode) || VALID_MMX_REG_MODE_3DNOW (mode);
13834 /* We handle both integer and floats in the general purpose registers.
13835 In future we should be able to handle vector modes as well. */
13836 if (!VALID_INT_MODE_P (mode) && !VALID_FP_MODE_P (mode))
13837 return 0;
13838 /* Take care for QImode values - they can be in non-QI regs, but then
13839 they do cause partial register stalls. */
13840 if (regno < 4 || mode != QImode || TARGET_64BIT)
13841 return 1;
13842 return reload_in_progress || reload_completed || !TARGET_PARTIAL_REG_STALL;
13843 }
13844
13845 /* Return the cost of moving data of mode M between a
13846 register and memory. A value of 2 is the default; this cost is
13847 relative to those in `REGISTER_MOVE_COST'.
13848
13849 If moving between registers and memory is more expensive than
13850 between two registers, you should define this macro to express the
13851 relative cost.
13852
13853 Model also increased moving costs of QImode registers in non
13854 Q_REGS classes.
13855 */
13856 int
13857 ix86_memory_move_cost (mode, class, in)
13858 enum machine_mode mode;
13859 enum reg_class class;
13860 int in;
13861 {
13862 if (FLOAT_CLASS_P (class))
13863 {
13864 int index;
13865 switch (mode)
13866 {
13867 case SFmode:
13868 index = 0;
13869 break;
13870 case DFmode:
13871 index = 1;
13872 break;
13873 case XFmode:
13874 case TFmode:
13875 index = 2;
13876 break;
13877 default:
13878 return 100;
13879 }
13880 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
13881 }
13882 if (SSE_CLASS_P (class))
13883 {
13884 int index;
13885 switch (GET_MODE_SIZE (mode))
13886 {
13887 case 4:
13888 index = 0;
13889 break;
13890 case 8:
13891 index = 1;
13892 break;
13893 case 16:
13894 index = 2;
13895 break;
13896 default:
13897 return 100;
13898 }
13899 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
13900 }
13901 if (MMX_CLASS_P (class))
13902 {
13903 int index;
13904 switch (GET_MODE_SIZE (mode))
13905 {
13906 case 4:
13907 index = 0;
13908 break;
13909 case 8:
13910 index = 1;
13911 break;
13912 default:
13913 return 100;
13914 }
13915 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
13916 }
13917 switch (GET_MODE_SIZE (mode))
13918 {
13919 case 1:
13920 if (in)
13921 return (Q_CLASS_P (class) ? ix86_cost->int_load[0]
13922 : ix86_cost->movzbl_load);
13923 else
13924 return (Q_CLASS_P (class) ? ix86_cost->int_store[0]
13925 : ix86_cost->int_store[0] + 4);
13926 break;
13927 case 2:
13928 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
13929 default:
13930 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
13931 if (mode == TFmode)
13932 mode = XFmode;
13933 return ((in ? ix86_cost->int_load[2] : ix86_cost->int_store[2])
13934 * ((int) GET_MODE_SIZE (mode)
13935 + UNITS_PER_WORD -1 ) / UNITS_PER_WORD);
13936 }
13937 }
13938
13939 #if defined (DO_GLOBAL_CTORS_BODY) && defined (HAS_INIT_SECTION)
13940 static void
13941 ix86_svr3_asm_out_constructor (symbol, priority)
13942 rtx symbol;
13943 int priority ATTRIBUTE_UNUSED;
13944 {
13945 init_section ();
13946 fputs ("\tpushl $", asm_out_file);
13947 assemble_name (asm_out_file, XSTR (symbol, 0));
13948 fputc ('\n', asm_out_file);
13949 }
13950 #endif
13951
13952 #if TARGET_MACHO
13953
13954 static int current_machopic_label_num;
13955
13956 /* Given a symbol name and its associated stub, write out the
13957 definition of the stub. */
13958
13959 void
13960 machopic_output_stub (file, symb, stub)
13961 FILE *file;
13962 const char *symb, *stub;
13963 {
13964 unsigned int length;
13965 char *binder_name, *symbol_name, lazy_ptr_name[32];
13966 int label = ++current_machopic_label_num;
13967
13968 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
13969 symb = (*targetm.strip_name_encoding) (symb);
13970
13971 length = strlen (stub);
13972 binder_name = alloca (length + 32);
13973 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
13974
13975 length = strlen (symb);
13976 symbol_name = alloca (length + 32);
13977 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
13978
13979 sprintf (lazy_ptr_name, "L%d$lz", label);
13980
13981 if (MACHOPIC_PURE)
13982 machopic_picsymbol_stub_section ();
13983 else
13984 machopic_symbol_stub_section ();
13985
13986 fprintf (file, "%s:\n", stub);
13987 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
13988
13989 if (MACHOPIC_PURE)
13990 {
13991 fprintf (file, "\tcall LPC$%d\nLPC$%d:\tpopl %%eax\n", label, label);
13992 fprintf (file, "\tmovl %s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name, label);
13993 fprintf (file, "\tjmp %%edx\n");
13994 }
13995 else
13996 fprintf (file, "\tjmp *%s\n", lazy_ptr_name);
13997
13998 fprintf (file, "%s:\n", binder_name);
13999
14000 if (MACHOPIC_PURE)
14001 {
14002 fprintf (file, "\tlea %s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name, label);
14003 fprintf (file, "\tpushl %%eax\n");
14004 }
14005 else
14006 fprintf (file, "\t pushl $%s\n", lazy_ptr_name);
14007
14008 fprintf (file, "\tjmp dyld_stub_binding_helper\n");
14009
14010 machopic_lazy_symbol_ptr_section ();
14011 fprintf (file, "%s:\n", lazy_ptr_name);
14012 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
14013 fprintf (file, "\t.long %s\n", binder_name);
14014 }
14015 #endif /* TARGET_MACHO */
14016
14017 /* Order the registers for register allocator. */
14018
14019 void
14020 x86_order_regs_for_local_alloc ()
14021 {
14022 int pos = 0;
14023 int i;
14024
14025 /* First allocate the local general purpose registers. */
14026 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
14027 if (GENERAL_REGNO_P (i) && call_used_regs[i])
14028 reg_alloc_order [pos++] = i;
14029
14030 /* Global general purpose registers. */
14031 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
14032 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
14033 reg_alloc_order [pos++] = i;
14034
14035 /* x87 registers come first in case we are doing FP math
14036 using them. */
14037 if (!TARGET_SSE_MATH)
14038 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
14039 reg_alloc_order [pos++] = i;
14040
14041 /* SSE registers. */
14042 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
14043 reg_alloc_order [pos++] = i;
14044 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
14045 reg_alloc_order [pos++] = i;
14046
14047 /* x87 registerts. */
14048 if (TARGET_SSE_MATH)
14049 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
14050 reg_alloc_order [pos++] = i;
14051
14052 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
14053 reg_alloc_order [pos++] = i;
14054
14055 /* Initialize the rest of array as we do not allocate some registers
14056 at all. */
14057 while (pos < FIRST_PSEUDO_REGISTER)
14058 reg_alloc_order [pos++] = 0;
14059 }
14060
14061 /* Returns an expression indicating where the this parameter is
14062 located on entry to the FUNCTION. */
14063
14064 static rtx
14065 x86_this_parameter (function)
14066 tree function;
14067 {
14068 tree type = TREE_TYPE (function);
14069
14070 if (TARGET_64BIT)
14071 {
14072 int n = aggregate_value_p (TREE_TYPE (type)) != 0;
14073 return gen_rtx_REG (DImode, x86_64_int_parameter_registers[n]);
14074 }
14075
14076 if (ix86_fntype_regparm (type) > 0)
14077 {
14078 tree parm;
14079
14080 parm = TYPE_ARG_TYPES (type);
14081 /* Figure out whether or not the function has a variable number of
14082 arguments. */
14083 for (; parm; parm = TREE_CHAIN (parm))
14084 if (TREE_VALUE (parm) == void_type_node)
14085 break;
14086 /* If not, the this parameter is in %eax. */
14087 if (parm)
14088 return gen_rtx_REG (SImode, 0);
14089 }
14090
14091 if (aggregate_value_p (TREE_TYPE (type)))
14092 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 8));
14093 else
14094 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 4));
14095 }
14096
14097 /* Determine whether x86_output_mi_thunk can succeed. */
14098
14099 static bool
14100 x86_can_output_mi_thunk (thunk, delta, vcall_offset, function)
14101 tree thunk ATTRIBUTE_UNUSED;
14102 HOST_WIDE_INT delta ATTRIBUTE_UNUSED;
14103 HOST_WIDE_INT vcall_offset;
14104 tree function;
14105 {
14106 /* 64-bit can handle anything. */
14107 if (TARGET_64BIT)
14108 return true;
14109
14110 /* For 32-bit, everything's fine if we have one free register. */
14111 if (ix86_fntype_regparm (TREE_TYPE (function)) < 3)
14112 return true;
14113
14114 /* Need a free register for vcall_offset. */
14115 if (vcall_offset)
14116 return false;
14117
14118 /* Need a free register for GOT references. */
14119 if (flag_pic && !(*targetm.binds_local_p) (function))
14120 return false;
14121
14122 /* Otherwise ok. */
14123 return true;
14124 }
14125
14126 /* Output the assembler code for a thunk function. THUNK_DECL is the
14127 declaration for the thunk function itself, FUNCTION is the decl for
14128 the target function. DELTA is an immediate constant offset to be
14129 added to THIS. If VCALL_OFFSET is non-zero, the word at
14130 *(*this + vcall_offset) should be added to THIS. */
14131
14132 static void
14133 x86_output_mi_thunk (file, thunk, delta, vcall_offset, function)
14134 FILE *file ATTRIBUTE_UNUSED;
14135 tree thunk ATTRIBUTE_UNUSED;
14136 HOST_WIDE_INT delta;
14137 HOST_WIDE_INT vcall_offset;
14138 tree function;
14139 {
14140 rtx xops[3];
14141 rtx this = x86_this_parameter (function);
14142 rtx this_reg, tmp;
14143
14144 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
14145 pull it in now and let DELTA benefit. */
14146 if (REG_P (this))
14147 this_reg = this;
14148 else if (vcall_offset)
14149 {
14150 /* Put the this parameter into %eax. */
14151 xops[0] = this;
14152 xops[1] = this_reg = gen_rtx_REG (Pmode, 0);
14153 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
14154 }
14155 else
14156 this_reg = NULL_RTX;
14157
14158 /* Adjust the this parameter by a fixed constant. */
14159 if (delta)
14160 {
14161 xops[0] = GEN_INT (delta);
14162 xops[1] = this_reg ? this_reg : this;
14163 if (TARGET_64BIT)
14164 {
14165 if (!x86_64_general_operand (xops[0], DImode))
14166 {
14167 tmp = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 2 /* R10 */);
14168 xops[1] = tmp;
14169 output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops);
14170 xops[0] = tmp;
14171 xops[1] = this;
14172 }
14173 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
14174 }
14175 else
14176 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
14177 }
14178
14179 /* Adjust the this parameter by a value stored in the vtable. */
14180 if (vcall_offset)
14181 {
14182 if (TARGET_64BIT)
14183 tmp = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 2 /* R10 */);
14184 else
14185 tmp = gen_rtx_REG (SImode, 2 /* ECX */);
14186
14187 xops[0] = gen_rtx_MEM (Pmode, this_reg);
14188 xops[1] = tmp;
14189 if (TARGET_64BIT)
14190 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
14191 else
14192 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
14193
14194 /* Adjust the this parameter. */
14195 xops[0] = gen_rtx_MEM (Pmode, plus_constant (tmp, vcall_offset));
14196 if (TARGET_64BIT && !memory_operand (xops[0], Pmode))
14197 {
14198 rtx tmp2 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
14199 xops[0] = GEN_INT (vcall_offset);
14200 xops[1] = tmp2;
14201 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
14202 xops[0] = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, tmp, tmp2));
14203 }
14204 xops[1] = this_reg;
14205 if (TARGET_64BIT)
14206 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
14207 else
14208 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
14209 }
14210
14211 /* If necessary, drop THIS back to its stack slot. */
14212 if (this_reg && this_reg != this)
14213 {
14214 xops[0] = this_reg;
14215 xops[1] = this;
14216 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
14217 }
14218
14219 xops[0] = DECL_RTL (function);
14220 if (TARGET_64BIT)
14221 {
14222 if (!flag_pic || (*targetm.binds_local_p) (function))
14223 output_asm_insn ("jmp\t%P0", xops);
14224 else
14225 {
14226 tmp = XEXP (xops[0], 0);
14227 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, tmp), UNSPEC_GOTPCREL);
14228 tmp = gen_rtx_CONST (Pmode, tmp);
14229 tmp = gen_rtx_MEM (QImode, tmp);
14230 xops[0] = tmp;
14231 output_asm_insn ("jmp\t%A0", xops);
14232 }
14233 }
14234 else
14235 {
14236 if (!flag_pic || (*targetm.binds_local_p) (function))
14237 output_asm_insn ("jmp\t%P0", xops);
14238 else
14239 {
14240 tmp = gen_rtx_REG (SImode, 2 /* ECX */);
14241 output_set_got (tmp);
14242
14243 xops[1] = tmp;
14244 output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops);
14245 output_asm_insn ("jmp\t{*}%1", xops);
14246 }
14247 }
14248 }
14249
14250 int
14251 x86_field_alignment (field, computed)
14252 tree field;
14253 int computed;
14254 {
14255 enum machine_mode mode;
14256 tree type = TREE_TYPE (field);
14257
14258 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
14259 return computed;
14260 mode = TYPE_MODE (TREE_CODE (type) == ARRAY_TYPE
14261 ? get_inner_array_type (type) : type);
14262 if (mode == DFmode || mode == DCmode
14263 || GET_MODE_CLASS (mode) == MODE_INT
14264 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
14265 return MIN (32, computed);
14266 return computed;
14267 }
14268
14269 /* Output assembler code to FILE to increment profiler label # LABELNO
14270 for profiling a function entry. */
14271 void
14272 x86_function_profiler (file, labelno)
14273 FILE *file;
14274 int labelno;
14275 {
14276 if (TARGET_64BIT)
14277 if (flag_pic)
14278 {
14279 #ifndef NO_PROFILE_COUNTERS
14280 fprintf (file, "\tleaq\t%sP%d@(%%rip),%%r11\n", LPREFIX, labelno);
14281 #endif
14282 fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", MCOUNT_NAME);
14283 }
14284 else
14285 {
14286 #ifndef NO_PROFILE_COUNTERS
14287 fprintf (file, "\tmovq\t$%sP%d,%%r11\n", LPREFIX, labelno);
14288 #endif
14289 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
14290 }
14291 else if (flag_pic)
14292 {
14293 #ifndef NO_PROFILE_COUNTERS
14294 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%%s\n",
14295 LPREFIX, labelno, PROFILE_COUNT_REGISTER);
14296 #endif
14297 fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", MCOUNT_NAME);
14298 }
14299 else
14300 {
14301 #ifndef NO_PROFILE_COUNTERS
14302 fprintf (file, "\tmovl\t$%sP%d,%%$s\n", LPREFIX, labelno,
14303 PROFILE_COUNT_REGISTER);
14304 #endif
14305 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
14306 }
14307 }
14308
14309 /* Implement machine specific optimizations.
14310 At the moment we implement single transformation: AMD Athlon works faster
14311 when RET is not destination of conditional jump or directly preceeded
14312 by other jump instruction. We avoid the penalty by inserting NOP just
14313 before the RET instructions in such cases. */
14314 void
14315 x86_machine_dependent_reorg (first)
14316 rtx first ATTRIBUTE_UNUSED;
14317 {
14318 edge e;
14319
14320 if (!TARGET_ATHLON || !optimize || optimize_size)
14321 return;
14322 for (e = EXIT_BLOCK_PTR->pred; e; e = e->pred_next)
14323 {
14324 basic_block bb = e->src;
14325 rtx ret = bb->end;
14326 rtx prev;
14327 bool insert = false;
14328
14329 if (!returnjump_p (ret) || !maybe_hot_bb_p (bb))
14330 continue;
14331 prev = prev_nonnote_insn (ret);
14332 if (prev && GET_CODE (prev) == CODE_LABEL)
14333 {
14334 edge e;
14335 for (e = bb->pred; e; e = e->pred_next)
14336 if (EDGE_FREQUENCY (e) && e->src->index > 0
14337 && !(e->flags & EDGE_FALLTHRU))
14338 insert = 1;
14339 }
14340 if (!insert)
14341 {
14342 prev = prev_real_insn (ret);
14343 if (prev && GET_CODE (prev) == JUMP_INSN
14344 && any_condjump_p (prev))
14345 insert = 1;
14346 }
14347 if (insert)
14348 emit_insn_before (gen_nop (), ret);
14349 }
14350 }
14351
14352 #include "gt-i386.h"