]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/config/i386/i386.c
builtins.c, [...]: Avoid "`" as left quote, using "'" or %q, %< and %> as appropriate.
[thirdparty/gcc.git] / gcc / config / i386 / i386.c
1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004 Free Software Foundation, Inc.
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
10 any later version.
11
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING. If not, write to
19 the Free Software Foundation, 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
21
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "tm.h"
26 #include "rtl.h"
27 #include "tree.h"
28 #include "tm_p.h"
29 #include "regs.h"
30 #include "hard-reg-set.h"
31 #include "real.h"
32 #include "insn-config.h"
33 #include "conditions.h"
34 #include "output.h"
35 #include "insn-codes.h"
36 #include "insn-attr.h"
37 #include "flags.h"
38 #include "except.h"
39 #include "function.h"
40 #include "recog.h"
41 #include "expr.h"
42 #include "optabs.h"
43 #include "toplev.h"
44 #include "basic-block.h"
45 #include "ggc.h"
46 #include "target.h"
47 #include "target-def.h"
48 #include "langhooks.h"
49 #include "cgraph.h"
50 #include "tree-gimple.h"
51
52 #ifndef CHECK_STACK_LIMIT
53 #define CHECK_STACK_LIMIT (-1)
54 #endif
55
56 /* Return index of given mode in mult and division cost tables. */
57 #define MODE_INDEX(mode) \
58 ((mode) == QImode ? 0 \
59 : (mode) == HImode ? 1 \
60 : (mode) == SImode ? 2 \
61 : (mode) == DImode ? 3 \
62 : 4)
63
64 /* Processor costs (relative to an add) */
65 static const
66 struct processor_costs size_cost = { /* costs for tunning for size */
67 2, /* cost of an add instruction */
68 3, /* cost of a lea instruction */
69 2, /* variable shift costs */
70 3, /* constant shift costs */
71 {3, 3, 3, 3, 5}, /* cost of starting a multiply */
72 0, /* cost of multiply per each bit set */
73 {3, 3, 3, 3, 5}, /* cost of a divide/mod */
74 3, /* cost of movsx */
75 3, /* cost of movzx */
76 0, /* "large" insn */
77 2, /* MOVE_RATIO */
78 2, /* cost for loading QImode using movzbl */
79 {2, 2, 2}, /* cost of loading integer registers
80 in QImode, HImode and SImode.
81 Relative to reg-reg move (2). */
82 {2, 2, 2}, /* cost of storing integer registers */
83 2, /* cost of reg,reg fld/fst */
84 {2, 2, 2}, /* cost of loading fp registers
85 in SFmode, DFmode and XFmode */
86 {2, 2, 2}, /* cost of loading integer registers */
87 3, /* cost of moving MMX register */
88 {3, 3}, /* cost of loading MMX registers
89 in SImode and DImode */
90 {3, 3}, /* cost of storing MMX registers
91 in SImode and DImode */
92 3, /* cost of moving SSE register */
93 {3, 3, 3}, /* cost of loading SSE registers
94 in SImode, DImode and TImode */
95 {3, 3, 3}, /* cost of storing SSE registers
96 in SImode, DImode and TImode */
97 3, /* MMX or SSE register to integer */
98 0, /* size of prefetch block */
99 0, /* number of parallel prefetches */
100 1, /* Branch cost */
101 2, /* cost of FADD and FSUB insns. */
102 2, /* cost of FMUL instruction. */
103 2, /* cost of FDIV instruction. */
104 2, /* cost of FABS instruction. */
105 2, /* cost of FCHS instruction. */
106 2, /* cost of FSQRT instruction. */
107 };
108
109 /* Processor costs (relative to an add) */
110 static const
111 struct processor_costs i386_cost = { /* 386 specific costs */
112 1, /* cost of an add instruction */
113 1, /* cost of a lea instruction */
114 3, /* variable shift costs */
115 2, /* constant shift costs */
116 {6, 6, 6, 6, 6}, /* cost of starting a multiply */
117 1, /* cost of multiply per each bit set */
118 {23, 23, 23, 23, 23}, /* cost of a divide/mod */
119 3, /* cost of movsx */
120 2, /* cost of movzx */
121 15, /* "large" insn */
122 3, /* MOVE_RATIO */
123 4, /* cost for loading QImode using movzbl */
124 {2, 4, 2}, /* cost of loading integer registers
125 in QImode, HImode and SImode.
126 Relative to reg-reg move (2). */
127 {2, 4, 2}, /* cost of storing integer registers */
128 2, /* cost of reg,reg fld/fst */
129 {8, 8, 8}, /* cost of loading fp registers
130 in SFmode, DFmode and XFmode */
131 {8, 8, 8}, /* cost of loading integer registers */
132 2, /* cost of moving MMX register */
133 {4, 8}, /* cost of loading MMX registers
134 in SImode and DImode */
135 {4, 8}, /* cost of storing MMX registers
136 in SImode and DImode */
137 2, /* cost of moving SSE register */
138 {4, 8, 16}, /* cost of loading SSE registers
139 in SImode, DImode and TImode */
140 {4, 8, 16}, /* cost of storing SSE registers
141 in SImode, DImode and TImode */
142 3, /* MMX or SSE register to integer */
143 0, /* size of prefetch block */
144 0, /* number of parallel prefetches */
145 1, /* Branch cost */
146 23, /* cost of FADD and FSUB insns. */
147 27, /* cost of FMUL instruction. */
148 88, /* cost of FDIV instruction. */
149 22, /* cost of FABS instruction. */
150 24, /* cost of FCHS instruction. */
151 122, /* cost of FSQRT instruction. */
152 };
153
154 static const
155 struct processor_costs i486_cost = { /* 486 specific costs */
156 1, /* cost of an add instruction */
157 1, /* cost of a lea instruction */
158 3, /* variable shift costs */
159 2, /* constant shift costs */
160 {12, 12, 12, 12, 12}, /* cost of starting a multiply */
161 1, /* cost of multiply per each bit set */
162 {40, 40, 40, 40, 40}, /* cost of a divide/mod */
163 3, /* cost of movsx */
164 2, /* cost of movzx */
165 15, /* "large" insn */
166 3, /* MOVE_RATIO */
167 4, /* cost for loading QImode using movzbl */
168 {2, 4, 2}, /* cost of loading integer registers
169 in QImode, HImode and SImode.
170 Relative to reg-reg move (2). */
171 {2, 4, 2}, /* cost of storing integer registers */
172 2, /* cost of reg,reg fld/fst */
173 {8, 8, 8}, /* cost of loading fp registers
174 in SFmode, DFmode and XFmode */
175 {8, 8, 8}, /* cost of loading integer registers */
176 2, /* cost of moving MMX register */
177 {4, 8}, /* cost of loading MMX registers
178 in SImode and DImode */
179 {4, 8}, /* cost of storing MMX registers
180 in SImode and DImode */
181 2, /* cost of moving SSE register */
182 {4, 8, 16}, /* cost of loading SSE registers
183 in SImode, DImode and TImode */
184 {4, 8, 16}, /* cost of storing SSE registers
185 in SImode, DImode and TImode */
186 3, /* MMX or SSE register to integer */
187 0, /* size of prefetch block */
188 0, /* number of parallel prefetches */
189 1, /* Branch cost */
190 8, /* cost of FADD and FSUB insns. */
191 16, /* cost of FMUL instruction. */
192 73, /* cost of FDIV instruction. */
193 3, /* cost of FABS instruction. */
194 3, /* cost of FCHS instruction. */
195 83, /* cost of FSQRT instruction. */
196 };
197
198 static const
199 struct processor_costs pentium_cost = {
200 1, /* cost of an add instruction */
201 1, /* cost of a lea instruction */
202 4, /* variable shift costs */
203 1, /* constant shift costs */
204 {11, 11, 11, 11, 11}, /* cost of starting a multiply */
205 0, /* cost of multiply per each bit set */
206 {25, 25, 25, 25, 25}, /* cost of a divide/mod */
207 3, /* cost of movsx */
208 2, /* cost of movzx */
209 8, /* "large" insn */
210 6, /* MOVE_RATIO */
211 6, /* cost for loading QImode using movzbl */
212 {2, 4, 2}, /* cost of loading integer registers
213 in QImode, HImode and SImode.
214 Relative to reg-reg move (2). */
215 {2, 4, 2}, /* cost of storing integer registers */
216 2, /* cost of reg,reg fld/fst */
217 {2, 2, 6}, /* cost of loading fp registers
218 in SFmode, DFmode and XFmode */
219 {4, 4, 6}, /* cost of loading integer registers */
220 8, /* cost of moving MMX register */
221 {8, 8}, /* cost of loading MMX registers
222 in SImode and DImode */
223 {8, 8}, /* cost of storing MMX registers
224 in SImode and DImode */
225 2, /* cost of moving SSE register */
226 {4, 8, 16}, /* cost of loading SSE registers
227 in SImode, DImode and TImode */
228 {4, 8, 16}, /* cost of storing SSE registers
229 in SImode, DImode and TImode */
230 3, /* MMX or SSE register to integer */
231 0, /* size of prefetch block */
232 0, /* number of parallel prefetches */
233 2, /* Branch cost */
234 3, /* cost of FADD and FSUB insns. */
235 3, /* cost of FMUL instruction. */
236 39, /* cost of FDIV instruction. */
237 1, /* cost of FABS instruction. */
238 1, /* cost of FCHS instruction. */
239 70, /* cost of FSQRT instruction. */
240 };
241
242 static const
243 struct processor_costs pentiumpro_cost = {
244 1, /* cost of an add instruction */
245 1, /* cost of a lea instruction */
246 1, /* variable shift costs */
247 1, /* constant shift costs */
248 {4, 4, 4, 4, 4}, /* cost of starting a multiply */
249 0, /* cost of multiply per each bit set */
250 {17, 17, 17, 17, 17}, /* cost of a divide/mod */
251 1, /* cost of movsx */
252 1, /* cost of movzx */
253 8, /* "large" insn */
254 6, /* MOVE_RATIO */
255 2, /* cost for loading QImode using movzbl */
256 {4, 4, 4}, /* cost of loading integer registers
257 in QImode, HImode and SImode.
258 Relative to reg-reg move (2). */
259 {2, 2, 2}, /* cost of storing integer registers */
260 2, /* cost of reg,reg fld/fst */
261 {2, 2, 6}, /* cost of loading fp registers
262 in SFmode, DFmode and XFmode */
263 {4, 4, 6}, /* cost of loading integer registers */
264 2, /* cost of moving MMX register */
265 {2, 2}, /* cost of loading MMX registers
266 in SImode and DImode */
267 {2, 2}, /* cost of storing MMX registers
268 in SImode and DImode */
269 2, /* cost of moving SSE register */
270 {2, 2, 8}, /* cost of loading SSE registers
271 in SImode, DImode and TImode */
272 {2, 2, 8}, /* cost of storing SSE registers
273 in SImode, DImode and TImode */
274 3, /* MMX or SSE register to integer */
275 32, /* size of prefetch block */
276 6, /* number of parallel prefetches */
277 2, /* Branch cost */
278 3, /* cost of FADD and FSUB insns. */
279 5, /* cost of FMUL instruction. */
280 56, /* cost of FDIV instruction. */
281 2, /* cost of FABS instruction. */
282 2, /* cost of FCHS instruction. */
283 56, /* cost of FSQRT instruction. */
284 };
285
286 static const
287 struct processor_costs k6_cost = {
288 1, /* cost of an add instruction */
289 2, /* cost of a lea instruction */
290 1, /* variable shift costs */
291 1, /* constant shift costs */
292 {3, 3, 3, 3, 3}, /* cost of starting a multiply */
293 0, /* cost of multiply per each bit set */
294 {18, 18, 18, 18, 18}, /* cost of a divide/mod */
295 2, /* cost of movsx */
296 2, /* cost of movzx */
297 8, /* "large" insn */
298 4, /* MOVE_RATIO */
299 3, /* cost for loading QImode using movzbl */
300 {4, 5, 4}, /* cost of loading integer registers
301 in QImode, HImode and SImode.
302 Relative to reg-reg move (2). */
303 {2, 3, 2}, /* cost of storing integer registers */
304 4, /* cost of reg,reg fld/fst */
305 {6, 6, 6}, /* cost of loading fp registers
306 in SFmode, DFmode and XFmode */
307 {4, 4, 4}, /* cost of loading integer registers */
308 2, /* cost of moving MMX register */
309 {2, 2}, /* cost of loading MMX registers
310 in SImode and DImode */
311 {2, 2}, /* cost of storing MMX registers
312 in SImode and DImode */
313 2, /* cost of moving SSE register */
314 {2, 2, 8}, /* cost of loading SSE registers
315 in SImode, DImode and TImode */
316 {2, 2, 8}, /* cost of storing SSE registers
317 in SImode, DImode and TImode */
318 6, /* MMX or SSE register to integer */
319 32, /* size of prefetch block */
320 1, /* number of parallel prefetches */
321 1, /* Branch cost */
322 2, /* cost of FADD and FSUB insns. */
323 2, /* cost of FMUL instruction. */
324 56, /* cost of FDIV instruction. */
325 2, /* cost of FABS instruction. */
326 2, /* cost of FCHS instruction. */
327 56, /* cost of FSQRT instruction. */
328 };
329
330 static const
331 struct processor_costs athlon_cost = {
332 1, /* cost of an add instruction */
333 2, /* cost of a lea instruction */
334 1, /* variable shift costs */
335 1, /* constant shift costs */
336 {5, 5, 5, 5, 5}, /* cost of starting a multiply */
337 0, /* cost of multiply per each bit set */
338 {18, 26, 42, 74, 74}, /* cost of a divide/mod */
339 1, /* cost of movsx */
340 1, /* cost of movzx */
341 8, /* "large" insn */
342 9, /* MOVE_RATIO */
343 4, /* cost for loading QImode using movzbl */
344 {3, 4, 3}, /* cost of loading integer registers
345 in QImode, HImode and SImode.
346 Relative to reg-reg move (2). */
347 {3, 4, 3}, /* cost of storing integer registers */
348 4, /* cost of reg,reg fld/fst */
349 {4, 4, 12}, /* cost of loading fp registers
350 in SFmode, DFmode and XFmode */
351 {6, 6, 8}, /* cost of loading integer registers */
352 2, /* cost of moving MMX register */
353 {4, 4}, /* cost of loading MMX registers
354 in SImode and DImode */
355 {4, 4}, /* cost of storing MMX registers
356 in SImode and DImode */
357 2, /* cost of moving SSE register */
358 {4, 4, 6}, /* cost of loading SSE registers
359 in SImode, DImode and TImode */
360 {4, 4, 5}, /* cost of storing SSE registers
361 in SImode, DImode and TImode */
362 5, /* MMX or SSE register to integer */
363 64, /* size of prefetch block */
364 6, /* number of parallel prefetches */
365 5, /* Branch cost */
366 4, /* cost of FADD and FSUB insns. */
367 4, /* cost of FMUL instruction. */
368 24, /* cost of FDIV instruction. */
369 2, /* cost of FABS instruction. */
370 2, /* cost of FCHS instruction. */
371 35, /* cost of FSQRT instruction. */
372 };
373
374 static const
375 struct processor_costs k8_cost = {
376 1, /* cost of an add instruction */
377 2, /* cost of a lea instruction */
378 1, /* variable shift costs */
379 1, /* constant shift costs */
380 {3, 4, 3, 4, 5}, /* cost of starting a multiply */
381 0, /* cost of multiply per each bit set */
382 {18, 26, 42, 74, 74}, /* cost of a divide/mod */
383 1, /* cost of movsx */
384 1, /* cost of movzx */
385 8, /* "large" insn */
386 9, /* MOVE_RATIO */
387 4, /* cost for loading QImode using movzbl */
388 {3, 4, 3}, /* cost of loading integer registers
389 in QImode, HImode and SImode.
390 Relative to reg-reg move (2). */
391 {3, 4, 3}, /* cost of storing integer registers */
392 4, /* cost of reg,reg fld/fst */
393 {4, 4, 12}, /* cost of loading fp registers
394 in SFmode, DFmode and XFmode */
395 {6, 6, 8}, /* cost of loading integer registers */
396 2, /* cost of moving MMX register */
397 {3, 3}, /* cost of loading MMX registers
398 in SImode and DImode */
399 {4, 4}, /* cost of storing MMX registers
400 in SImode and DImode */
401 2, /* cost of moving SSE register */
402 {4, 3, 6}, /* cost of loading SSE registers
403 in SImode, DImode and TImode */
404 {4, 4, 5}, /* cost of storing SSE registers
405 in SImode, DImode and TImode */
406 5, /* MMX or SSE register to integer */
407 64, /* size of prefetch block */
408 6, /* number of parallel prefetches */
409 5, /* Branch cost */
410 4, /* cost of FADD and FSUB insns. */
411 4, /* cost of FMUL instruction. */
412 19, /* cost of FDIV instruction. */
413 2, /* cost of FABS instruction. */
414 2, /* cost of FCHS instruction. */
415 35, /* cost of FSQRT instruction. */
416 };
417
418 static const
419 struct processor_costs pentium4_cost = {
420 1, /* cost of an add instruction */
421 3, /* cost of a lea instruction */
422 4, /* variable shift costs */
423 4, /* constant shift costs */
424 {15, 15, 15, 15, 15}, /* cost of starting a multiply */
425 0, /* cost of multiply per each bit set */
426 {56, 56, 56, 56, 56}, /* cost of a divide/mod */
427 1, /* cost of movsx */
428 1, /* cost of movzx */
429 16, /* "large" insn */
430 6, /* MOVE_RATIO */
431 2, /* cost for loading QImode using movzbl */
432 {4, 5, 4}, /* cost of loading integer registers
433 in QImode, HImode and SImode.
434 Relative to reg-reg move (2). */
435 {2, 3, 2}, /* cost of storing integer registers */
436 2, /* cost of reg,reg fld/fst */
437 {2, 2, 6}, /* cost of loading fp registers
438 in SFmode, DFmode and XFmode */
439 {4, 4, 6}, /* cost of loading integer registers */
440 2, /* cost of moving MMX register */
441 {2, 2}, /* cost of loading MMX registers
442 in SImode and DImode */
443 {2, 2}, /* cost of storing MMX registers
444 in SImode and DImode */
445 12, /* cost of moving SSE register */
446 {12, 12, 12}, /* cost of loading SSE registers
447 in SImode, DImode and TImode */
448 {2, 2, 8}, /* cost of storing SSE registers
449 in SImode, DImode and TImode */
450 10, /* MMX or SSE register to integer */
451 64, /* size of prefetch block */
452 6, /* number of parallel prefetches */
453 2, /* Branch cost */
454 5, /* cost of FADD and FSUB insns. */
455 7, /* cost of FMUL instruction. */
456 43, /* cost of FDIV instruction. */
457 2, /* cost of FABS instruction. */
458 2, /* cost of FCHS instruction. */
459 43, /* cost of FSQRT instruction. */
460 };
461
462 static const
463 struct processor_costs nocona_cost = {
464 1, /* cost of an add instruction */
465 1, /* cost of a lea instruction */
466 1, /* variable shift costs */
467 1, /* constant shift costs */
468 {10, 10, 10, 10, 10}, /* cost of starting a multiply */
469 0, /* cost of multiply per each bit set */
470 {66, 66, 66, 66, 66}, /* cost of a divide/mod */
471 1, /* cost of movsx */
472 1, /* cost of movzx */
473 16, /* "large" insn */
474 9, /* MOVE_RATIO */
475 4, /* cost for loading QImode using movzbl */
476 {4, 4, 4}, /* cost of loading integer registers
477 in QImode, HImode and SImode.
478 Relative to reg-reg move (2). */
479 {4, 4, 4}, /* cost of storing integer registers */
480 3, /* cost of reg,reg fld/fst */
481 {12, 12, 12}, /* cost of loading fp registers
482 in SFmode, DFmode and XFmode */
483 {4, 4, 4}, /* cost of loading integer registers */
484 6, /* cost of moving MMX register */
485 {12, 12}, /* cost of loading MMX registers
486 in SImode and DImode */
487 {12, 12}, /* cost of storing MMX registers
488 in SImode and DImode */
489 6, /* cost of moving SSE register */
490 {12, 12, 12}, /* cost of loading SSE registers
491 in SImode, DImode and TImode */
492 {12, 12, 12}, /* cost of storing SSE registers
493 in SImode, DImode and TImode */
494 8, /* MMX or SSE register to integer */
495 128, /* size of prefetch block */
496 8, /* number of parallel prefetches */
497 1, /* Branch cost */
498 6, /* cost of FADD and FSUB insns. */
499 8, /* cost of FMUL instruction. */
500 40, /* cost of FDIV instruction. */
501 3, /* cost of FABS instruction. */
502 3, /* cost of FCHS instruction. */
503 44, /* cost of FSQRT instruction. */
504 };
505
506 const struct processor_costs *ix86_cost = &pentium_cost;
507
508 /* Processor feature/optimization bitmasks. */
509 #define m_386 (1<<PROCESSOR_I386)
510 #define m_486 (1<<PROCESSOR_I486)
511 #define m_PENT (1<<PROCESSOR_PENTIUM)
512 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
513 #define m_K6 (1<<PROCESSOR_K6)
514 #define m_ATHLON (1<<PROCESSOR_ATHLON)
515 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
516 #define m_K8 (1<<PROCESSOR_K8)
517 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
518 #define m_NOCONA (1<<PROCESSOR_NOCONA)
519
520 const int x86_use_leave = m_386 | m_K6 | m_ATHLON_K8;
521 const int x86_push_memory = m_386 | m_K6 | m_ATHLON_K8 | m_PENT4 | m_NOCONA;
522 const int x86_zero_extend_with_and = m_486 | m_PENT;
523 const int x86_movx = m_ATHLON_K8 | m_PPRO | m_PENT4 | m_NOCONA /* m_386 | m_K6 */;
524 const int x86_double_with_add = ~m_386;
525 const int x86_use_bit_test = m_386;
526 const int x86_unroll_strlen = m_486 | m_PENT | m_PPRO | m_ATHLON_K8 | m_K6;
527 const int x86_cmove = m_PPRO | m_ATHLON_K8 | m_PENT4 | m_NOCONA;
528 const int x86_3dnow_a = m_ATHLON_K8;
529 const int x86_deep_branch = m_PPRO | m_K6 | m_ATHLON_K8 | m_PENT4 | m_NOCONA;
530 /* Branch hints were put in P4 based on simulation result. But
531 after P4 was made, no performance benefit was observed with
532 branch hints. It also increases the code size. As the result,
533 icc never generates branch hints. */
534 const int x86_branch_hints = 0;
535 const int x86_use_sahf = m_PPRO | m_K6 | m_PENT4 | m_NOCONA;
536 const int x86_partial_reg_stall = m_PPRO;
537 const int x86_use_loop = m_K6;
538 const int x86_use_fiop = ~(m_PPRO | m_ATHLON_K8 | m_PENT);
539 const int x86_use_mov0 = m_K6;
540 const int x86_use_cltd = ~(m_PENT | m_K6);
541 const int x86_read_modify_write = ~m_PENT;
542 const int x86_read_modify = ~(m_PENT | m_PPRO);
543 const int x86_split_long_moves = m_PPRO;
544 const int x86_promote_QImode = m_K6 | m_PENT | m_386 | m_486 | m_ATHLON_K8;
545 const int x86_fast_prefix = ~(m_PENT | m_486 | m_386);
546 const int x86_single_stringop = m_386 | m_PENT4 | m_NOCONA;
547 const int x86_qimode_math = ~(0);
548 const int x86_promote_qi_regs = 0;
549 const int x86_himode_math = ~(m_PPRO);
550 const int x86_promote_hi_regs = m_PPRO;
551 const int x86_sub_esp_4 = m_ATHLON_K8 | m_PPRO | m_PENT4 | m_NOCONA;
552 const int x86_sub_esp_8 = m_ATHLON_K8 | m_PPRO | m_386 | m_486 | m_PENT4 | m_NOCONA;
553 const int x86_add_esp_4 = m_ATHLON_K8 | m_K6 | m_PENT4 | m_NOCONA;
554 const int x86_add_esp_8 = m_ATHLON_K8 | m_PPRO | m_K6 | m_386 | m_486 | m_PENT4 | m_NOCONA;
555 const int x86_integer_DFmode_moves = ~(m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_PPRO);
556 const int x86_partial_reg_dependency = m_ATHLON_K8 | m_PENT4 | m_NOCONA;
557 const int x86_memory_mismatch_stall = m_ATHLON_K8 | m_PENT4 | m_NOCONA;
558 const int x86_accumulate_outgoing_args = m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_PPRO;
559 const int x86_prologue_using_move = m_ATHLON_K8 | m_PPRO;
560 const int x86_epilogue_using_move = m_ATHLON_K8 | m_PPRO;
561 const int x86_decompose_lea = m_PENT4 | m_NOCONA;
562 const int x86_shift1 = ~m_486;
563 const int x86_arch_always_fancy_math_387 = m_PENT | m_PPRO | m_ATHLON_K8 | m_PENT4 | m_NOCONA;
564 const int x86_sse_partial_reg_dependency = m_PENT4 | m_NOCONA | m_PPRO;
565 /* Set for machines where the type and dependencies are resolved on SSE register
566 parts instead of whole registers, so we may maintain just lower part of
567 scalar values in proper format leaving the upper part undefined. */
568 const int x86_sse_partial_regs = m_ATHLON_K8;
569 /* Athlon optimizes partial-register FPS special case, thus avoiding the
570 need for extra instructions beforehand */
571 const int x86_sse_partial_regs_for_cvtsd2ss = 0;
572 const int x86_sse_typeless_stores = m_ATHLON_K8;
573 const int x86_sse_load0_by_pxor = m_PPRO | m_PENT4 | m_NOCONA;
574 const int x86_use_ffreep = m_ATHLON_K8;
575 const int x86_rep_movl_optimal = m_386 | m_PENT | m_PPRO | m_K6;
576 const int x86_inter_unit_moves = ~(m_ATHLON_K8);
577 const int x86_ext_80387_constants = m_K6 | m_ATHLON | m_PENT4 | m_NOCONA | m_PPRO;
578 /* Some CPU cores are not able to predict more than 4 branch instructions in
579 the 16 byte window. */
580 const int x86_four_jump_limit = m_PPRO | m_ATHLON_K8 | m_PENT4 | m_NOCONA;
581 const int x86_schedule = m_PPRO | m_ATHLON_K8 | m_K8 | m_PENT;
582
583 /* In case the average insn count for single function invocation is
584 lower than this constant, emit fast (but longer) prologue and
585 epilogue code. */
586 #define FAST_PROLOGUE_INSN_COUNT 20
587
588 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
589 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
590 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
591 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
592
593 /* Array of the smallest class containing reg number REGNO, indexed by
594 REGNO. Used by REGNO_REG_CLASS in i386.h. */
595
596 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
597 {
598 /* ax, dx, cx, bx */
599 AREG, DREG, CREG, BREG,
600 /* si, di, bp, sp */
601 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
602 /* FP registers */
603 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
604 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
605 /* arg pointer */
606 NON_Q_REGS,
607 /* flags, fpsr, dirflag, frame */
608 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
609 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
610 SSE_REGS, SSE_REGS,
611 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
612 MMX_REGS, MMX_REGS,
613 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
614 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
615 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
616 SSE_REGS, SSE_REGS,
617 };
618
619 /* The "default" register map used in 32bit mode. */
620
621 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
622 {
623 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
624 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
625 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
626 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
627 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
628 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
629 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
630 };
631
632 static int const x86_64_int_parameter_registers[6] =
633 {
634 5 /*RDI*/, 4 /*RSI*/, 1 /*RDX*/, 2 /*RCX*/,
635 FIRST_REX_INT_REG /*R8 */, FIRST_REX_INT_REG + 1 /*R9 */
636 };
637
638 static int const x86_64_int_return_registers[4] =
639 {
640 0 /*RAX*/, 1 /*RDI*/, 5 /*RDI*/, 4 /*RSI*/
641 };
642
643 /* The "default" register map used in 64bit mode. */
644 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
645 {
646 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
647 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
648 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
649 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
650 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
651 8,9,10,11,12,13,14,15, /* extended integer registers */
652 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
653 };
654
655 /* Define the register numbers to be used in Dwarf debugging information.
656 The SVR4 reference port C compiler uses the following register numbers
657 in its Dwarf output code:
658 0 for %eax (gcc regno = 0)
659 1 for %ecx (gcc regno = 2)
660 2 for %edx (gcc regno = 1)
661 3 for %ebx (gcc regno = 3)
662 4 for %esp (gcc regno = 7)
663 5 for %ebp (gcc regno = 6)
664 6 for %esi (gcc regno = 4)
665 7 for %edi (gcc regno = 5)
666 The following three DWARF register numbers are never generated by
667 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
668 believes these numbers have these meanings.
669 8 for %eip (no gcc equivalent)
670 9 for %eflags (gcc regno = 17)
671 10 for %trapno (no gcc equivalent)
672 It is not at all clear how we should number the FP stack registers
673 for the x86 architecture. If the version of SDB on x86/svr4 were
674 a bit less brain dead with respect to floating-point then we would
675 have a precedent to follow with respect to DWARF register numbers
676 for x86 FP registers, but the SDB on x86/svr4 is so completely
677 broken with respect to FP registers that it is hardly worth thinking
678 of it as something to strive for compatibility with.
679 The version of x86/svr4 SDB I have at the moment does (partially)
680 seem to believe that DWARF register number 11 is associated with
681 the x86 register %st(0), but that's about all. Higher DWARF
682 register numbers don't seem to be associated with anything in
683 particular, and even for DWARF regno 11, SDB only seems to under-
684 stand that it should say that a variable lives in %st(0) (when
685 asked via an `=' command) if we said it was in DWARF regno 11,
686 but SDB still prints garbage when asked for the value of the
687 variable in question (via a `/' command).
688 (Also note that the labels SDB prints for various FP stack regs
689 when doing an `x' command are all wrong.)
690 Note that these problems generally don't affect the native SVR4
691 C compiler because it doesn't allow the use of -O with -g and
692 because when it is *not* optimizing, it allocates a memory
693 location for each floating-point variable, and the memory
694 location is what gets described in the DWARF AT_location
695 attribute for the variable in question.
696 Regardless of the severe mental illness of the x86/svr4 SDB, we
697 do something sensible here and we use the following DWARF
698 register numbers. Note that these are all stack-top-relative
699 numbers.
700 11 for %st(0) (gcc regno = 8)
701 12 for %st(1) (gcc regno = 9)
702 13 for %st(2) (gcc regno = 10)
703 14 for %st(3) (gcc regno = 11)
704 15 for %st(4) (gcc regno = 12)
705 16 for %st(5) (gcc regno = 13)
706 17 for %st(6) (gcc regno = 14)
707 18 for %st(7) (gcc regno = 15)
708 */
709 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
710 {
711 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
712 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
713 -1, 9, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
714 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
715 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
716 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
717 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
718 };
719
720 /* Test and compare insns in i386.md store the information needed to
721 generate branch and scc insns here. */
722
723 rtx ix86_compare_op0 = NULL_RTX;
724 rtx ix86_compare_op1 = NULL_RTX;
725
726 #define MAX_386_STACK_LOCALS 3
727 /* Size of the register save area. */
728 #define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16)
729
730 /* Define the structure for the machine field in struct function. */
731
732 struct stack_local_entry GTY(())
733 {
734 unsigned short mode;
735 unsigned short n;
736 rtx rtl;
737 struct stack_local_entry *next;
738 };
739
740 /* Structure describing stack frame layout.
741 Stack grows downward:
742
743 [arguments]
744 <- ARG_POINTER
745 saved pc
746
747 saved frame pointer if frame_pointer_needed
748 <- HARD_FRAME_POINTER
749 [saved regs]
750
751 [padding1] \
752 )
753 [va_arg registers] (
754 > to_allocate <- FRAME_POINTER
755 [frame] (
756 )
757 [padding2] /
758 */
759 struct ix86_frame
760 {
761 int nregs;
762 int padding1;
763 int va_arg_size;
764 HOST_WIDE_INT frame;
765 int padding2;
766 int outgoing_arguments_size;
767 int red_zone_size;
768
769 HOST_WIDE_INT to_allocate;
770 /* The offsets relative to ARG_POINTER. */
771 HOST_WIDE_INT frame_pointer_offset;
772 HOST_WIDE_INT hard_frame_pointer_offset;
773 HOST_WIDE_INT stack_pointer_offset;
774
775 /* When save_regs_using_mov is set, emit prologue using
776 move instead of push instructions. */
777 bool save_regs_using_mov;
778 };
779
780 /* Used to enable/disable debugging features. */
781 const char *ix86_debug_arg_string, *ix86_debug_addr_string;
782 /* Code model option as passed by user. */
783 const char *ix86_cmodel_string;
784 /* Parsed value. */
785 enum cmodel ix86_cmodel;
786 /* Asm dialect. */
787 const char *ix86_asm_string;
788 enum asm_dialect ix86_asm_dialect = ASM_ATT;
789 /* TLS dialext. */
790 const char *ix86_tls_dialect_string;
791 enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU;
792
793 /* Which unit we are generating floating point math for. */
794 enum fpmath_unit ix86_fpmath;
795
796 /* Which cpu are we scheduling for. */
797 enum processor_type ix86_tune;
798 /* Which instruction set architecture to use. */
799 enum processor_type ix86_arch;
800
801 /* Strings to hold which cpu and instruction set architecture to use. */
802 const char *ix86_tune_string; /* for -mtune=<xxx> */
803 const char *ix86_arch_string; /* for -march=<xxx> */
804 const char *ix86_fpmath_string; /* for -mfpmath=<xxx> */
805
806 /* # of registers to use to pass arguments. */
807 const char *ix86_regparm_string;
808
809 /* true if sse prefetch instruction is not NOOP. */
810 int x86_prefetch_sse;
811
812 /* ix86_regparm_string as a number */
813 int ix86_regparm;
814
815 /* Alignment to use for loops and jumps: */
816
817 /* Power of two alignment for loops. */
818 const char *ix86_align_loops_string;
819
820 /* Power of two alignment for non-loop jumps. */
821 const char *ix86_align_jumps_string;
822
823 /* Power of two alignment for stack boundary in bytes. */
824 const char *ix86_preferred_stack_boundary_string;
825
826 /* Preferred alignment for stack boundary in bits. */
827 unsigned int ix86_preferred_stack_boundary;
828
829 /* Values 1-5: see jump.c */
830 int ix86_branch_cost;
831 const char *ix86_branch_cost_string;
832
833 /* Power of two alignment for functions. */
834 const char *ix86_align_funcs_string;
835
836 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
837 char internal_label_prefix[16];
838 int internal_label_prefix_len;
839 \f
840 static void output_pic_addr_const (FILE *, rtx, int);
841 static void put_condition_code (enum rtx_code, enum machine_mode,
842 int, int, FILE *);
843 static const char *get_some_local_dynamic_name (void);
844 static int get_some_local_dynamic_name_1 (rtx *, void *);
845 static rtx ix86_expand_int_compare (enum rtx_code, rtx, rtx);
846 static enum rtx_code ix86_prepare_fp_compare_args (enum rtx_code, rtx *,
847 rtx *);
848 static bool ix86_fixed_condition_code_regs (unsigned int *, unsigned int *);
849 static enum machine_mode ix86_cc_modes_compatible (enum machine_mode,
850 enum machine_mode);
851 static rtx get_thread_pointer (int);
852 static rtx legitimize_tls_address (rtx, enum tls_model, int);
853 static void get_pc_thunk_name (char [32], unsigned int);
854 static rtx gen_push (rtx);
855 static int ix86_flags_dependant (rtx, rtx, enum attr_type);
856 static int ix86_agi_dependant (rtx, rtx, enum attr_type);
857 static struct machine_function * ix86_init_machine_status (void);
858 static int ix86_split_to_parts (rtx, rtx *, enum machine_mode);
859 static int ix86_nsaved_regs (void);
860 static void ix86_emit_save_regs (void);
861 static void ix86_emit_save_regs_using_mov (rtx, HOST_WIDE_INT);
862 static void ix86_emit_restore_regs_using_mov (rtx, HOST_WIDE_INT, int);
863 static void ix86_output_function_epilogue (FILE *, HOST_WIDE_INT);
864 static HOST_WIDE_INT ix86_GOT_alias_set (void);
865 static void ix86_adjust_counter (rtx, HOST_WIDE_INT);
866 static rtx ix86_expand_aligntest (rtx, int);
867 static void ix86_expand_strlensi_unroll_1 (rtx, rtx, rtx);
868 static int ix86_issue_rate (void);
869 static int ix86_adjust_cost (rtx, rtx, rtx, int);
870 static int ia32_multipass_dfa_lookahead (void);
871 static bool ix86_misaligned_mem_ok (enum machine_mode);
872 static void ix86_init_mmx_sse_builtins (void);
873 static rtx x86_this_parameter (tree);
874 static void x86_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
875 HOST_WIDE_INT, tree);
876 static bool x86_can_output_mi_thunk (tree, HOST_WIDE_INT, HOST_WIDE_INT, tree);
877 static void x86_file_start (void);
878 static void ix86_reorg (void);
879 static bool ix86_expand_carry_flag_compare (enum rtx_code, rtx, rtx, rtx*);
880 static tree ix86_build_builtin_va_list (void);
881 static void ix86_setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode,
882 tree, int *, int);
883 static tree ix86_gimplify_va_arg (tree, tree, tree *, tree *);
884 static bool ix86_vector_mode_supported_p (enum machine_mode);
885
886 static int ix86_address_cost (rtx);
887 static bool ix86_cannot_force_const_mem (rtx);
888 static rtx ix86_delegitimize_address (rtx);
889
890 struct builtin_description;
891 static rtx ix86_expand_sse_comi (const struct builtin_description *,
892 tree, rtx);
893 static rtx ix86_expand_sse_compare (const struct builtin_description *,
894 tree, rtx);
895 static rtx ix86_expand_unop1_builtin (enum insn_code, tree, rtx);
896 static rtx ix86_expand_unop_builtin (enum insn_code, tree, rtx, int);
897 static rtx ix86_expand_binop_builtin (enum insn_code, tree, rtx);
898 static rtx ix86_expand_store_builtin (enum insn_code, tree);
899 static rtx safe_vector_operand (rtx, enum machine_mode);
900 static rtx ix86_expand_fp_compare (enum rtx_code, rtx, rtx, rtx, rtx *, rtx *);
901 static int ix86_fp_comparison_arithmetics_cost (enum rtx_code code);
902 static int ix86_fp_comparison_fcomi_cost (enum rtx_code code);
903 static int ix86_fp_comparison_sahf_cost (enum rtx_code code);
904 static int ix86_fp_comparison_cost (enum rtx_code code);
905 static unsigned int ix86_select_alt_pic_regnum (void);
906 static int ix86_save_reg (unsigned int, int);
907 static void ix86_compute_frame_layout (struct ix86_frame *);
908 static int ix86_comp_type_attributes (tree, tree);
909 static int ix86_function_regparm (tree, tree);
910 const struct attribute_spec ix86_attribute_table[];
911 static bool ix86_function_ok_for_sibcall (tree, tree);
912 static tree ix86_handle_cdecl_attribute (tree *, tree, tree, int, bool *);
913 static tree ix86_handle_regparm_attribute (tree *, tree, tree, int, bool *);
914 static int ix86_value_regno (enum machine_mode);
915 static bool contains_128bit_aligned_vector_p (tree);
916 static rtx ix86_struct_value_rtx (tree, int);
917 static bool ix86_ms_bitfield_layout_p (tree);
918 static tree ix86_handle_struct_attribute (tree *, tree, tree, int, bool *);
919 static int extended_reg_mentioned_1 (rtx *, void *);
920 static bool ix86_rtx_costs (rtx, int, int, int *);
921 static int min_insn_size (rtx);
922 static tree ix86_md_asm_clobbers (tree clobbers);
923 static bool ix86_must_pass_in_stack (enum machine_mode mode, tree type);
924 static bool ix86_pass_by_reference (CUMULATIVE_ARGS *, enum machine_mode,
925 tree, bool);
926
927 #if defined (DO_GLOBAL_CTORS_BODY) && defined (HAS_INIT_SECTION)
928 static void ix86_svr3_asm_out_constructor (rtx, int);
929 #endif
930
931 /* Register class used for passing given 64bit part of the argument.
932 These represent classes as documented by the PS ABI, with the exception
933 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
934 use SF or DFmode move instead of DImode to avoid reformatting penalties.
935
936 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
937 whenever possible (upper half does contain padding).
938 */
939 enum x86_64_reg_class
940 {
941 X86_64_NO_CLASS,
942 X86_64_INTEGER_CLASS,
943 X86_64_INTEGERSI_CLASS,
944 X86_64_SSE_CLASS,
945 X86_64_SSESF_CLASS,
946 X86_64_SSEDF_CLASS,
947 X86_64_SSEUP_CLASS,
948 X86_64_X87_CLASS,
949 X86_64_X87UP_CLASS,
950 X86_64_MEMORY_CLASS
951 };
952 static const char * const x86_64_reg_class_name[] =
953 {"no", "integer", "integerSI", "sse", "sseSF", "sseDF", "sseup", "x87", "x87up", "no"};
954
955 #define MAX_CLASSES 4
956 static int classify_argument (enum machine_mode, tree,
957 enum x86_64_reg_class [MAX_CLASSES], int);
958 static int examine_argument (enum machine_mode, tree, int, int *, int *);
959 static rtx construct_container (enum machine_mode, tree, int, int, int,
960 const int *, int);
961 static enum x86_64_reg_class merge_classes (enum x86_64_reg_class,
962 enum x86_64_reg_class);
963
964 /* Table of constants used by fldpi, fldln2, etc.... */
965 static REAL_VALUE_TYPE ext_80387_constants_table [5];
966 static bool ext_80387_constants_init = 0;
967 static void init_ext_80387_constants (void);
968 \f
969 /* Initialize the GCC target structure. */
970 #undef TARGET_ATTRIBUTE_TABLE
971 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
972 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
973 # undef TARGET_MERGE_DECL_ATTRIBUTES
974 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
975 #endif
976
977 #undef TARGET_COMP_TYPE_ATTRIBUTES
978 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
979
980 #undef TARGET_INIT_BUILTINS
981 #define TARGET_INIT_BUILTINS ix86_init_builtins
982
983 #undef TARGET_EXPAND_BUILTIN
984 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
985
986 #undef TARGET_ASM_FUNCTION_EPILOGUE
987 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
988
989 #undef TARGET_ASM_OPEN_PAREN
990 #define TARGET_ASM_OPEN_PAREN ""
991 #undef TARGET_ASM_CLOSE_PAREN
992 #define TARGET_ASM_CLOSE_PAREN ""
993
994 #undef TARGET_ASM_ALIGNED_HI_OP
995 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
996 #undef TARGET_ASM_ALIGNED_SI_OP
997 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
998 #ifdef ASM_QUAD
999 #undef TARGET_ASM_ALIGNED_DI_OP
1000 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
1001 #endif
1002
1003 #undef TARGET_ASM_UNALIGNED_HI_OP
1004 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
1005 #undef TARGET_ASM_UNALIGNED_SI_OP
1006 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
1007 #undef TARGET_ASM_UNALIGNED_DI_OP
1008 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
1009
1010 #undef TARGET_SCHED_ADJUST_COST
1011 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
1012 #undef TARGET_SCHED_ISSUE_RATE
1013 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
1014 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
1015 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
1016 ia32_multipass_dfa_lookahead
1017
1018 #undef TARGET_VECTORIZE_MISALIGNED_MEM_OK
1019 #define TARGET_VECTORIZE_MISALIGNED_MEM_OK ix86_misaligned_mem_ok
1020
1021 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
1022 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
1023
1024 #ifdef HAVE_AS_TLS
1025 #undef TARGET_HAVE_TLS
1026 #define TARGET_HAVE_TLS true
1027 #endif
1028 #undef TARGET_CANNOT_FORCE_CONST_MEM
1029 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
1030
1031 #undef TARGET_DELEGITIMIZE_ADDRESS
1032 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
1033
1034 #undef TARGET_MS_BITFIELD_LAYOUT_P
1035 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
1036
1037 #undef TARGET_ASM_OUTPUT_MI_THUNK
1038 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
1039 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
1040 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
1041
1042 #undef TARGET_ASM_FILE_START
1043 #define TARGET_ASM_FILE_START x86_file_start
1044
1045 #undef TARGET_RTX_COSTS
1046 #define TARGET_RTX_COSTS ix86_rtx_costs
1047 #undef TARGET_ADDRESS_COST
1048 #define TARGET_ADDRESS_COST ix86_address_cost
1049
1050 #undef TARGET_FIXED_CONDITION_CODE_REGS
1051 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
1052 #undef TARGET_CC_MODES_COMPATIBLE
1053 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
1054
1055 #undef TARGET_MACHINE_DEPENDENT_REORG
1056 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
1057
1058 #undef TARGET_BUILD_BUILTIN_VA_LIST
1059 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
1060
1061 #undef TARGET_MD_ASM_CLOBBERS
1062 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
1063
1064 #undef TARGET_PROMOTE_PROTOTYPES
1065 #define TARGET_PROMOTE_PROTOTYPES hook_bool_tree_true
1066 #undef TARGET_STRUCT_VALUE_RTX
1067 #define TARGET_STRUCT_VALUE_RTX ix86_struct_value_rtx
1068 #undef TARGET_SETUP_INCOMING_VARARGS
1069 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
1070 #undef TARGET_MUST_PASS_IN_STACK
1071 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
1072 #undef TARGET_PASS_BY_REFERENCE
1073 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
1074
1075 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
1076 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
1077
1078 #undef TARGET_VECTOR_MODE_SUPPORTED_P
1079 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
1080
1081 #ifdef SUBTARGET_INSERT_ATTRIBUTES
1082 #undef TARGET_INSERT_ATTRIBUTES
1083 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
1084 #endif
1085
1086 struct gcc_target targetm = TARGET_INITIALIZER;
1087
1088 \f
1089 /* The svr4 ABI for the i386 says that records and unions are returned
1090 in memory. */
1091 #ifndef DEFAULT_PCC_STRUCT_RETURN
1092 #define DEFAULT_PCC_STRUCT_RETURN 1
1093 #endif
1094
1095 /* Sometimes certain combinations of command options do not make
1096 sense on a particular target machine. You can define a macro
1097 `OVERRIDE_OPTIONS' to take account of this. This macro, if
1098 defined, is executed once just after all the command options have
1099 been parsed.
1100
1101 Don't use this macro to turn on various extra optimizations for
1102 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
1103
1104 void
1105 override_options (void)
1106 {
1107 int i;
1108 int ix86_tune_defaulted = 0;
1109
1110 /* Comes from final.c -- no real reason to change it. */
1111 #define MAX_CODE_ALIGN 16
1112
1113 static struct ptt
1114 {
1115 const struct processor_costs *cost; /* Processor costs */
1116 const int target_enable; /* Target flags to enable. */
1117 const int target_disable; /* Target flags to disable. */
1118 const int align_loop; /* Default alignments. */
1119 const int align_loop_max_skip;
1120 const int align_jump;
1121 const int align_jump_max_skip;
1122 const int align_func;
1123 }
1124 const processor_target_table[PROCESSOR_max] =
1125 {
1126 {&i386_cost, 0, 0, 4, 3, 4, 3, 4},
1127 {&i486_cost, 0, 0, 16, 15, 16, 15, 16},
1128 {&pentium_cost, 0, 0, 16, 7, 16, 7, 16},
1129 {&pentiumpro_cost, 0, 0, 16, 15, 16, 7, 16},
1130 {&k6_cost, 0, 0, 32, 7, 32, 7, 32},
1131 {&athlon_cost, 0, 0, 16, 7, 16, 7, 16},
1132 {&pentium4_cost, 0, 0, 0, 0, 0, 0, 0},
1133 {&k8_cost, 0, 0, 16, 7, 16, 7, 16},
1134 {&nocona_cost, 0, 0, 0, 0, 0, 0, 0}
1135 };
1136
1137 static const char * const cpu_names[] = TARGET_CPU_DEFAULT_NAMES;
1138 static struct pta
1139 {
1140 const char *const name; /* processor name or nickname. */
1141 const enum processor_type processor;
1142 const enum pta_flags
1143 {
1144 PTA_SSE = 1,
1145 PTA_SSE2 = 2,
1146 PTA_SSE3 = 4,
1147 PTA_MMX = 8,
1148 PTA_PREFETCH_SSE = 16,
1149 PTA_3DNOW = 32,
1150 PTA_3DNOW_A = 64,
1151 PTA_64BIT = 128
1152 } flags;
1153 }
1154 const processor_alias_table[] =
1155 {
1156 {"i386", PROCESSOR_I386, 0},
1157 {"i486", PROCESSOR_I486, 0},
1158 {"i586", PROCESSOR_PENTIUM, 0},
1159 {"pentium", PROCESSOR_PENTIUM, 0},
1160 {"pentium-mmx", PROCESSOR_PENTIUM, PTA_MMX},
1161 {"winchip-c6", PROCESSOR_I486, PTA_MMX},
1162 {"winchip2", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1163 {"c3", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1164 {"c3-2", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_PREFETCH_SSE | PTA_SSE},
1165 {"i686", PROCESSOR_PENTIUMPRO, 0},
1166 {"pentiumpro", PROCESSOR_PENTIUMPRO, 0},
1167 {"pentium2", PROCESSOR_PENTIUMPRO, PTA_MMX},
1168 {"pentium3", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
1169 {"pentium3m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
1170 {"pentium-m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE | PTA_SSE2},
1171 {"pentium4", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2
1172 | PTA_MMX | PTA_PREFETCH_SSE},
1173 {"pentium4m", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2
1174 | PTA_MMX | PTA_PREFETCH_SSE},
1175 {"prescott", PROCESSOR_NOCONA, PTA_SSE | PTA_SSE2 | PTA_SSE3
1176 | PTA_MMX | PTA_PREFETCH_SSE},
1177 {"nocona", PROCESSOR_NOCONA, PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_64BIT
1178 | PTA_MMX | PTA_PREFETCH_SSE},
1179 {"k6", PROCESSOR_K6, PTA_MMX},
1180 {"k6-2", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1181 {"k6-3", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1182 {"athlon", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1183 | PTA_3DNOW_A},
1184 {"athlon-tbird", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE
1185 | PTA_3DNOW | PTA_3DNOW_A},
1186 {"athlon-4", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1187 | PTA_3DNOW_A | PTA_SSE},
1188 {"athlon-xp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1189 | PTA_3DNOW_A | PTA_SSE},
1190 {"athlon-mp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1191 | PTA_3DNOW_A | PTA_SSE},
1192 {"x86-64", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_64BIT
1193 | PTA_SSE | PTA_SSE2 },
1194 {"k8", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1195 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1196 {"opteron", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1197 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1198 {"athlon64", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1199 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1200 {"athlon-fx", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1201 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1202 };
1203
1204 int const pta_size = ARRAY_SIZE (processor_alias_table);
1205
1206 /* Set the default values for switches whose default depends on TARGET_64BIT
1207 in case they weren't overwritten by command line options. */
1208 if (TARGET_64BIT)
1209 {
1210 if (flag_omit_frame_pointer == 2)
1211 flag_omit_frame_pointer = 1;
1212 if (flag_asynchronous_unwind_tables == 2)
1213 flag_asynchronous_unwind_tables = 1;
1214 if (flag_pcc_struct_return == 2)
1215 flag_pcc_struct_return = 0;
1216 }
1217 else
1218 {
1219 if (flag_omit_frame_pointer == 2)
1220 flag_omit_frame_pointer = 0;
1221 if (flag_asynchronous_unwind_tables == 2)
1222 flag_asynchronous_unwind_tables = 0;
1223 if (flag_pcc_struct_return == 2)
1224 flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
1225 }
1226
1227 #ifdef SUBTARGET_OVERRIDE_OPTIONS
1228 SUBTARGET_OVERRIDE_OPTIONS;
1229 #endif
1230
1231 if (!ix86_tune_string && ix86_arch_string)
1232 ix86_tune_string = ix86_arch_string;
1233 if (!ix86_tune_string)
1234 {
1235 ix86_tune_string = cpu_names [TARGET_CPU_DEFAULT];
1236 ix86_tune_defaulted = 1;
1237 }
1238 if (!ix86_arch_string)
1239 ix86_arch_string = TARGET_64BIT ? "x86-64" : "i386";
1240
1241 if (ix86_cmodel_string != 0)
1242 {
1243 if (!strcmp (ix86_cmodel_string, "small"))
1244 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1245 else if (flag_pic)
1246 sorry ("code model %s not supported in PIC mode", ix86_cmodel_string);
1247 else if (!strcmp (ix86_cmodel_string, "32"))
1248 ix86_cmodel = CM_32;
1249 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
1250 ix86_cmodel = CM_KERNEL;
1251 else if (!strcmp (ix86_cmodel_string, "medium") && !flag_pic)
1252 ix86_cmodel = CM_MEDIUM;
1253 else if (!strcmp (ix86_cmodel_string, "large") && !flag_pic)
1254 ix86_cmodel = CM_LARGE;
1255 else
1256 error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string);
1257 }
1258 else
1259 {
1260 ix86_cmodel = CM_32;
1261 if (TARGET_64BIT)
1262 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1263 }
1264 if (ix86_asm_string != 0)
1265 {
1266 if (!strcmp (ix86_asm_string, "intel"))
1267 ix86_asm_dialect = ASM_INTEL;
1268 else if (!strcmp (ix86_asm_string, "att"))
1269 ix86_asm_dialect = ASM_ATT;
1270 else
1271 error ("bad value (%s) for -masm= switch", ix86_asm_string);
1272 }
1273 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
1274 error ("code model %qs not supported in the %s bit mode",
1275 ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
1276 if (ix86_cmodel == CM_LARGE)
1277 sorry ("code model %<large%> not supported yet");
1278 if ((TARGET_64BIT != 0) != ((target_flags & MASK_64BIT) != 0))
1279 sorry ("%i-bit mode not compiled in",
1280 (target_flags & MASK_64BIT) ? 64 : 32);
1281
1282 for (i = 0; i < pta_size; i++)
1283 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
1284 {
1285 ix86_arch = processor_alias_table[i].processor;
1286 /* Default cpu tuning to the architecture. */
1287 ix86_tune = ix86_arch;
1288 if (processor_alias_table[i].flags & PTA_MMX
1289 && !(target_flags_explicit & MASK_MMX))
1290 target_flags |= MASK_MMX;
1291 if (processor_alias_table[i].flags & PTA_3DNOW
1292 && !(target_flags_explicit & MASK_3DNOW))
1293 target_flags |= MASK_3DNOW;
1294 if (processor_alias_table[i].flags & PTA_3DNOW_A
1295 && !(target_flags_explicit & MASK_3DNOW_A))
1296 target_flags |= MASK_3DNOW_A;
1297 if (processor_alias_table[i].flags & PTA_SSE
1298 && !(target_flags_explicit & MASK_SSE))
1299 target_flags |= MASK_SSE;
1300 if (processor_alias_table[i].flags & PTA_SSE2
1301 && !(target_flags_explicit & MASK_SSE2))
1302 target_flags |= MASK_SSE2;
1303 if (processor_alias_table[i].flags & PTA_SSE3
1304 && !(target_flags_explicit & MASK_SSE3))
1305 target_flags |= MASK_SSE3;
1306 if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
1307 x86_prefetch_sse = true;
1308 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
1309 {
1310 if (ix86_tune_defaulted)
1311 {
1312 ix86_tune_string = "x86-64";
1313 for (i = 0; i < pta_size; i++)
1314 if (! strcmp (ix86_tune_string,
1315 processor_alias_table[i].name))
1316 break;
1317 ix86_tune = processor_alias_table[i].processor;
1318 }
1319 else
1320 error ("CPU you selected does not support x86-64 "
1321 "instruction set");
1322 }
1323 break;
1324 }
1325
1326 if (i == pta_size)
1327 error ("bad value (%s) for -march= switch", ix86_arch_string);
1328
1329 for (i = 0; i < pta_size; i++)
1330 if (! strcmp (ix86_tune_string, processor_alias_table[i].name))
1331 {
1332 ix86_tune = processor_alias_table[i].processor;
1333 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
1334 error ("CPU you selected does not support x86-64 instruction set");
1335
1336 /* Intel CPUs have always interpreted SSE prefetch instructions as
1337 NOPs; so, we can enable SSE prefetch instructions even when
1338 -mtune (rather than -march) points us to a processor that has them.
1339 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
1340 higher processors. */
1341 if (TARGET_CMOVE && (processor_alias_table[i].flags & PTA_PREFETCH_SSE))
1342 x86_prefetch_sse = true;
1343 break;
1344 }
1345 if (i == pta_size)
1346 error ("bad value (%s) for -mtune= switch", ix86_tune_string);
1347
1348 if (optimize_size)
1349 ix86_cost = &size_cost;
1350 else
1351 ix86_cost = processor_target_table[ix86_tune].cost;
1352 target_flags |= processor_target_table[ix86_tune].target_enable;
1353 target_flags &= ~processor_target_table[ix86_tune].target_disable;
1354
1355 /* Arrange to set up i386_stack_locals for all functions. */
1356 init_machine_status = ix86_init_machine_status;
1357
1358 /* Validate -mregparm= value. */
1359 if (ix86_regparm_string)
1360 {
1361 i = atoi (ix86_regparm_string);
1362 if (i < 0 || i > REGPARM_MAX)
1363 error ("-mregparm=%d is not between 0 and %d", i, REGPARM_MAX);
1364 else
1365 ix86_regparm = i;
1366 }
1367 else
1368 if (TARGET_64BIT)
1369 ix86_regparm = REGPARM_MAX;
1370
1371 /* If the user has provided any of the -malign-* options,
1372 warn and use that value only if -falign-* is not set.
1373 Remove this code in GCC 3.2 or later. */
1374 if (ix86_align_loops_string)
1375 {
1376 warning ("-malign-loops is obsolete, use -falign-loops");
1377 if (align_loops == 0)
1378 {
1379 i = atoi (ix86_align_loops_string);
1380 if (i < 0 || i > MAX_CODE_ALIGN)
1381 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1382 else
1383 align_loops = 1 << i;
1384 }
1385 }
1386
1387 if (ix86_align_jumps_string)
1388 {
1389 warning ("-malign-jumps is obsolete, use -falign-jumps");
1390 if (align_jumps == 0)
1391 {
1392 i = atoi (ix86_align_jumps_string);
1393 if (i < 0 || i > MAX_CODE_ALIGN)
1394 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1395 else
1396 align_jumps = 1 << i;
1397 }
1398 }
1399
1400 if (ix86_align_funcs_string)
1401 {
1402 warning ("-malign-functions is obsolete, use -falign-functions");
1403 if (align_functions == 0)
1404 {
1405 i = atoi (ix86_align_funcs_string);
1406 if (i < 0 || i > MAX_CODE_ALIGN)
1407 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1408 else
1409 align_functions = 1 << i;
1410 }
1411 }
1412
1413 /* Default align_* from the processor table. */
1414 if (align_loops == 0)
1415 {
1416 align_loops = processor_target_table[ix86_tune].align_loop;
1417 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
1418 }
1419 if (align_jumps == 0)
1420 {
1421 align_jumps = processor_target_table[ix86_tune].align_jump;
1422 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
1423 }
1424 if (align_functions == 0)
1425 {
1426 align_functions = processor_target_table[ix86_tune].align_func;
1427 }
1428
1429 /* Validate -mpreferred-stack-boundary= value, or provide default.
1430 The default of 128 bits is for Pentium III's SSE __m128, but we
1431 don't want additional code to keep the stack aligned when
1432 optimizing for code size. */
1433 ix86_preferred_stack_boundary = (optimize_size
1434 ? TARGET_64BIT ? 128 : 32
1435 : 128);
1436 if (ix86_preferred_stack_boundary_string)
1437 {
1438 i = atoi (ix86_preferred_stack_boundary_string);
1439 if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
1440 error ("-mpreferred-stack-boundary=%d is not between %d and 12", i,
1441 TARGET_64BIT ? 4 : 2);
1442 else
1443 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
1444 }
1445
1446 /* Validate -mbranch-cost= value, or provide default. */
1447 ix86_branch_cost = processor_target_table[ix86_tune].cost->branch_cost;
1448 if (ix86_branch_cost_string)
1449 {
1450 i = atoi (ix86_branch_cost_string);
1451 if (i < 0 || i > 5)
1452 error ("-mbranch-cost=%d is not between 0 and 5", i);
1453 else
1454 ix86_branch_cost = i;
1455 }
1456
1457 if (ix86_tls_dialect_string)
1458 {
1459 if (strcmp (ix86_tls_dialect_string, "gnu") == 0)
1460 ix86_tls_dialect = TLS_DIALECT_GNU;
1461 else if (strcmp (ix86_tls_dialect_string, "sun") == 0)
1462 ix86_tls_dialect = TLS_DIALECT_SUN;
1463 else
1464 error ("bad value (%s) for -mtls-dialect= switch",
1465 ix86_tls_dialect_string);
1466 }
1467
1468 /* Keep nonleaf frame pointers. */
1469 if (TARGET_OMIT_LEAF_FRAME_POINTER)
1470 flag_omit_frame_pointer = 1;
1471
1472 /* If we're doing fast math, we don't care about comparison order
1473 wrt NaNs. This lets us use a shorter comparison sequence. */
1474 if (flag_unsafe_math_optimizations)
1475 target_flags &= ~MASK_IEEE_FP;
1476
1477 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
1478 since the insns won't need emulation. */
1479 if (x86_arch_always_fancy_math_387 & (1 << ix86_arch))
1480 target_flags &= ~MASK_NO_FANCY_MATH_387;
1481
1482 /* Turn on SSE2 builtins for -msse3. */
1483 if (TARGET_SSE3)
1484 target_flags |= MASK_SSE2;
1485
1486 /* Turn on SSE builtins for -msse2. */
1487 if (TARGET_SSE2)
1488 target_flags |= MASK_SSE;
1489
1490 if (TARGET_64BIT)
1491 {
1492 if (TARGET_ALIGN_DOUBLE)
1493 error ("-malign-double makes no sense in the 64bit mode");
1494 if (TARGET_RTD)
1495 error ("-mrtd calling convention not supported in the 64bit mode");
1496 /* Enable by default the SSE and MMX builtins. */
1497 target_flags |= (MASK_SSE2 | MASK_SSE | MASK_MMX | MASK_128BIT_LONG_DOUBLE);
1498 ix86_fpmath = FPMATH_SSE;
1499 }
1500 else
1501 {
1502 ix86_fpmath = FPMATH_387;
1503 /* i386 ABI does not specify red zone. It still makes sense to use it
1504 when programmer takes care to stack from being destroyed. */
1505 if (!(target_flags_explicit & MASK_NO_RED_ZONE))
1506 target_flags |= MASK_NO_RED_ZONE;
1507 }
1508
1509 if (ix86_fpmath_string != 0)
1510 {
1511 if (! strcmp (ix86_fpmath_string, "387"))
1512 ix86_fpmath = FPMATH_387;
1513 else if (! strcmp (ix86_fpmath_string, "sse"))
1514 {
1515 if (!TARGET_SSE)
1516 {
1517 warning ("SSE instruction set disabled, using 387 arithmetics");
1518 ix86_fpmath = FPMATH_387;
1519 }
1520 else
1521 ix86_fpmath = FPMATH_SSE;
1522 }
1523 else if (! strcmp (ix86_fpmath_string, "387,sse")
1524 || ! strcmp (ix86_fpmath_string, "sse,387"))
1525 {
1526 if (!TARGET_SSE)
1527 {
1528 warning ("SSE instruction set disabled, using 387 arithmetics");
1529 ix86_fpmath = FPMATH_387;
1530 }
1531 else if (!TARGET_80387)
1532 {
1533 warning ("387 instruction set disabled, using SSE arithmetics");
1534 ix86_fpmath = FPMATH_SSE;
1535 }
1536 else
1537 ix86_fpmath = FPMATH_SSE | FPMATH_387;
1538 }
1539 else
1540 error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string);
1541 }
1542
1543 /* It makes no sense to ask for just SSE builtins, so MMX is also turned
1544 on by -msse. */
1545 if (TARGET_SSE)
1546 {
1547 target_flags |= MASK_MMX;
1548 x86_prefetch_sse = true;
1549 }
1550
1551 /* If it has 3DNow! it also has MMX so MMX is also turned on by -m3dnow */
1552 if (TARGET_3DNOW)
1553 {
1554 target_flags |= MASK_MMX;
1555 /* If we are targeting the Athlon architecture, enable the 3Dnow/MMX
1556 extensions it adds. */
1557 if (x86_3dnow_a & (1 << ix86_arch))
1558 target_flags |= MASK_3DNOW_A;
1559 }
1560 if ((x86_accumulate_outgoing_args & TUNEMASK)
1561 && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
1562 && !optimize_size)
1563 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
1564
1565 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
1566 {
1567 char *p;
1568 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
1569 p = strchr (internal_label_prefix, 'X');
1570 internal_label_prefix_len = p - internal_label_prefix;
1571 *p = '\0';
1572 }
1573 /* When scheduling description is not available, disable scheduler pass so it
1574 won't slow down the compilation and make x87 code slower. */
1575 if (!TARGET_SCHEDULE)
1576 flag_schedule_insns_after_reload = flag_schedule_insns = 0;
1577 }
1578 \f
1579 void
1580 optimization_options (int level, int size ATTRIBUTE_UNUSED)
1581 {
1582 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
1583 make the problem with not enough registers even worse. */
1584 #ifdef INSN_SCHEDULING
1585 if (level > 1)
1586 flag_schedule_insns = 0;
1587 #endif
1588
1589 /* The default values of these switches depend on the TARGET_64BIT
1590 that is not known at this moment. Mark these values with 2 and
1591 let user the to override these. In case there is no command line option
1592 specifying them, we will set the defaults in override_options. */
1593 if (optimize >= 1)
1594 flag_omit_frame_pointer = 2;
1595 flag_pcc_struct_return = 2;
1596 flag_asynchronous_unwind_tables = 2;
1597 }
1598 \f
1599 /* Table of valid machine attributes. */
1600 const struct attribute_spec ix86_attribute_table[] =
1601 {
1602 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
1603 /* Stdcall attribute says callee is responsible for popping arguments
1604 if they are not variable. */
1605 { "stdcall", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
1606 /* Fastcall attribute says callee is responsible for popping arguments
1607 if they are not variable. */
1608 { "fastcall", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
1609 /* Cdecl attribute says the callee is a normal C declaration */
1610 { "cdecl", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
1611 /* Regparm attribute specifies how many integer arguments are to be
1612 passed in registers. */
1613 { "regparm", 1, 1, false, true, true, ix86_handle_regparm_attribute },
1614 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
1615 { "dllimport", 0, 0, false, false, false, handle_dll_attribute },
1616 { "dllexport", 0, 0, false, false, false, handle_dll_attribute },
1617 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute },
1618 #endif
1619 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
1620 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
1621 #ifdef SUBTARGET_ATTRIBUTE_TABLE
1622 SUBTARGET_ATTRIBUTE_TABLE,
1623 #endif
1624 { NULL, 0, 0, false, false, false, NULL }
1625 };
1626
1627 /* Decide whether we can make a sibling call to a function. DECL is the
1628 declaration of the function being targeted by the call and EXP is the
1629 CALL_EXPR representing the call. */
1630
1631 static bool
1632 ix86_function_ok_for_sibcall (tree decl, tree exp)
1633 {
1634 /* If we are generating position-independent code, we cannot sibcall
1635 optimize any indirect call, or a direct call to a global function,
1636 as the PLT requires %ebx be live. */
1637 if (!TARGET_64BIT && flag_pic && (!decl || TREE_PUBLIC (decl)))
1638 return false;
1639
1640 /* If we are returning floats on the 80387 register stack, we cannot
1641 make a sibcall from a function that doesn't return a float to a
1642 function that does or, conversely, from a function that does return
1643 a float to a function that doesn't; the necessary stack adjustment
1644 would not be executed. */
1645 if (STACK_REG_P (ix86_function_value (TREE_TYPE (exp)))
1646 != STACK_REG_P (ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)))))
1647 return false;
1648
1649 /* If this call is indirect, we'll need to be able to use a call-clobbered
1650 register for the address of the target function. Make sure that all
1651 such registers are not used for passing parameters. */
1652 if (!decl && !TARGET_64BIT)
1653 {
1654 tree type;
1655
1656 /* We're looking at the CALL_EXPR, we need the type of the function. */
1657 type = TREE_OPERAND (exp, 0); /* pointer expression */
1658 type = TREE_TYPE (type); /* pointer type */
1659 type = TREE_TYPE (type); /* function type */
1660
1661 if (ix86_function_regparm (type, NULL) >= 3)
1662 {
1663 /* ??? Need to count the actual number of registers to be used,
1664 not the possible number of registers. Fix later. */
1665 return false;
1666 }
1667 }
1668
1669 /* Otherwise okay. That also includes certain types of indirect calls. */
1670 return true;
1671 }
1672
1673 /* Handle a "cdecl", "stdcall", or "fastcall" attribute;
1674 arguments as in struct attribute_spec.handler. */
1675 static tree
1676 ix86_handle_cdecl_attribute (tree *node, tree name,
1677 tree args ATTRIBUTE_UNUSED,
1678 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
1679 {
1680 if (TREE_CODE (*node) != FUNCTION_TYPE
1681 && TREE_CODE (*node) != METHOD_TYPE
1682 && TREE_CODE (*node) != FIELD_DECL
1683 && TREE_CODE (*node) != TYPE_DECL)
1684 {
1685 warning ("%qs attribute only applies to functions",
1686 IDENTIFIER_POINTER (name));
1687 *no_add_attrs = true;
1688 }
1689 else
1690 {
1691 if (is_attribute_p ("fastcall", name))
1692 {
1693 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
1694 {
1695 error ("fastcall and stdcall attributes are not compatible");
1696 }
1697 else if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
1698 {
1699 error ("fastcall and regparm attributes are not compatible");
1700 }
1701 }
1702 else if (is_attribute_p ("stdcall", name))
1703 {
1704 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
1705 {
1706 error ("fastcall and stdcall attributes are not compatible");
1707 }
1708 }
1709 }
1710
1711 if (TARGET_64BIT)
1712 {
1713 warning ("%qs attribute ignored", IDENTIFIER_POINTER (name));
1714 *no_add_attrs = true;
1715 }
1716
1717 return NULL_TREE;
1718 }
1719
1720 /* Handle a "regparm" attribute;
1721 arguments as in struct attribute_spec.handler. */
1722 static tree
1723 ix86_handle_regparm_attribute (tree *node, tree name, tree args,
1724 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
1725 {
1726 if (TREE_CODE (*node) != FUNCTION_TYPE
1727 && TREE_CODE (*node) != METHOD_TYPE
1728 && TREE_CODE (*node) != FIELD_DECL
1729 && TREE_CODE (*node) != TYPE_DECL)
1730 {
1731 warning ("%qs attribute only applies to functions",
1732 IDENTIFIER_POINTER (name));
1733 *no_add_attrs = true;
1734 }
1735 else
1736 {
1737 tree cst;
1738
1739 cst = TREE_VALUE (args);
1740 if (TREE_CODE (cst) != INTEGER_CST)
1741 {
1742 warning ("%qs attribute requires an integer constant argument",
1743 IDENTIFIER_POINTER (name));
1744 *no_add_attrs = true;
1745 }
1746 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
1747 {
1748 warning ("argument to %qs attribute larger than %d",
1749 IDENTIFIER_POINTER (name), REGPARM_MAX);
1750 *no_add_attrs = true;
1751 }
1752
1753 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
1754 {
1755 error ("fastcall and regparm attributes are not compatible");
1756 }
1757 }
1758
1759 return NULL_TREE;
1760 }
1761
1762 /* Return 0 if the attributes for two types are incompatible, 1 if they
1763 are compatible, and 2 if they are nearly compatible (which causes a
1764 warning to be generated). */
1765
1766 static int
1767 ix86_comp_type_attributes (tree type1, tree type2)
1768 {
1769 /* Check for mismatch of non-default calling convention. */
1770 const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
1771
1772 if (TREE_CODE (type1) != FUNCTION_TYPE)
1773 return 1;
1774
1775 /* Check for mismatched fastcall types */
1776 if (!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1))
1777 != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2)))
1778 return 0;
1779
1780 /* Check for mismatched return types (cdecl vs stdcall). */
1781 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
1782 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
1783 return 0;
1784 if (ix86_function_regparm (type1, NULL)
1785 != ix86_function_regparm (type2, NULL))
1786 return 0;
1787 return 1;
1788 }
1789 \f
1790 /* Return the regparm value for a fuctio with the indicated TYPE and DECL.
1791 DECL may be NULL when calling function indirectly
1792 or considering a libcall. */
1793
1794 static int
1795 ix86_function_regparm (tree type, tree decl)
1796 {
1797 tree attr;
1798 int regparm = ix86_regparm;
1799 bool user_convention = false;
1800
1801 if (!TARGET_64BIT)
1802 {
1803 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
1804 if (attr)
1805 {
1806 regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
1807 user_convention = true;
1808 }
1809
1810 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
1811 {
1812 regparm = 2;
1813 user_convention = true;
1814 }
1815
1816 /* Use register calling convention for local functions when possible. */
1817 if (!TARGET_64BIT && !user_convention && decl
1818 && flag_unit_at_a_time && !profile_flag)
1819 {
1820 struct cgraph_local_info *i = cgraph_local_info (decl);
1821 if (i && i->local)
1822 {
1823 /* We can't use regparm(3) for nested functions as these use
1824 static chain pointer in third argument. */
1825 if (DECL_CONTEXT (decl) && !DECL_NO_STATIC_CHAIN (decl))
1826 regparm = 2;
1827 else
1828 regparm = 3;
1829 }
1830 }
1831 }
1832 return regparm;
1833 }
1834
1835 /* Return true if EAX is live at the start of the function. Used by
1836 ix86_expand_prologue to determine if we need special help before
1837 calling allocate_stack_worker. */
1838
1839 static bool
1840 ix86_eax_live_at_start_p (void)
1841 {
1842 /* Cheat. Don't bother working forward from ix86_function_regparm
1843 to the function type to whether an actual argument is located in
1844 eax. Instead just look at cfg info, which is still close enough
1845 to correct at this point. This gives false positives for broken
1846 functions that might use uninitialized data that happens to be
1847 allocated in eax, but who cares? */
1848 return REGNO_REG_SET_P (ENTRY_BLOCK_PTR->global_live_at_end, 0);
1849 }
1850
1851 /* Value is the number of bytes of arguments automatically
1852 popped when returning from a subroutine call.
1853 FUNDECL is the declaration node of the function (as a tree),
1854 FUNTYPE is the data type of the function (as a tree),
1855 or for a library call it is an identifier node for the subroutine name.
1856 SIZE is the number of bytes of arguments passed on the stack.
1857
1858 On the 80386, the RTD insn may be used to pop them if the number
1859 of args is fixed, but if the number is variable then the caller
1860 must pop them all. RTD can't be used for library calls now
1861 because the library is compiled with the Unix compiler.
1862 Use of RTD is a selectable option, since it is incompatible with
1863 standard Unix calling sequences. If the option is not selected,
1864 the caller must always pop the args.
1865
1866 The attribute stdcall is equivalent to RTD on a per module basis. */
1867
1868 int
1869 ix86_return_pops_args (tree fundecl, tree funtype, int size)
1870 {
1871 int rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
1872
1873 /* Cdecl functions override -mrtd, and never pop the stack. */
1874 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype))) {
1875
1876 /* Stdcall and fastcall functions will pop the stack if not
1877 variable args. */
1878 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype))
1879 || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype)))
1880 rtd = 1;
1881
1882 if (rtd
1883 && (TYPE_ARG_TYPES (funtype) == NULL_TREE
1884 || (TREE_VALUE (tree_last (TYPE_ARG_TYPES (funtype)))
1885 == void_type_node)))
1886 return size;
1887 }
1888
1889 /* Lose any fake structure return argument if it is passed on the stack. */
1890 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
1891 && !TARGET_64BIT
1892 && !KEEP_AGGREGATE_RETURN_POINTER)
1893 {
1894 int nregs = ix86_function_regparm (funtype, fundecl);
1895
1896 if (!nregs)
1897 return GET_MODE_SIZE (Pmode);
1898 }
1899
1900 return 0;
1901 }
1902 \f
1903 /* Argument support functions. */
1904
1905 /* Return true when register may be used to pass function parameters. */
1906 bool
1907 ix86_function_arg_regno_p (int regno)
1908 {
1909 int i;
1910 if (!TARGET_64BIT)
1911 return (regno < REGPARM_MAX
1912 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
1913 if (SSE_REGNO_P (regno) && TARGET_SSE)
1914 return true;
1915 /* RAX is used as hidden argument to va_arg functions. */
1916 if (!regno)
1917 return true;
1918 for (i = 0; i < REGPARM_MAX; i++)
1919 if (regno == x86_64_int_parameter_registers[i])
1920 return true;
1921 return false;
1922 }
1923
1924 /* Return if we do not know how to pass TYPE solely in registers. */
1925
1926 static bool
1927 ix86_must_pass_in_stack (enum machine_mode mode, tree type)
1928 {
1929 if (must_pass_in_stack_var_size_or_pad (mode, type))
1930 return true;
1931 return (!TARGET_64BIT && type && mode == TImode);
1932 }
1933
1934 /* Initialize a variable CUM of type CUMULATIVE_ARGS
1935 for a call to a function whose data type is FNTYPE.
1936 For a library call, FNTYPE is 0. */
1937
1938 void
1939 init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
1940 tree fntype, /* tree ptr for function decl */
1941 rtx libname, /* SYMBOL_REF of library name or 0 */
1942 tree fndecl)
1943 {
1944 static CUMULATIVE_ARGS zero_cum;
1945 tree param, next_param;
1946
1947 if (TARGET_DEBUG_ARG)
1948 {
1949 fprintf (stderr, "\ninit_cumulative_args (");
1950 if (fntype)
1951 fprintf (stderr, "fntype code = %s, ret code = %s",
1952 tree_code_name[(int) TREE_CODE (fntype)],
1953 tree_code_name[(int) TREE_CODE (TREE_TYPE (fntype))]);
1954 else
1955 fprintf (stderr, "no fntype");
1956
1957 if (libname)
1958 fprintf (stderr, ", libname = %s", XSTR (libname, 0));
1959 }
1960
1961 *cum = zero_cum;
1962
1963 /* Set up the number of registers to use for passing arguments. */
1964 if (fntype)
1965 cum->nregs = ix86_function_regparm (fntype, fndecl);
1966 else
1967 cum->nregs = ix86_regparm;
1968 if (TARGET_SSE)
1969 cum->sse_nregs = SSE_REGPARM_MAX;
1970 if (TARGET_MMX)
1971 cum->mmx_nregs = MMX_REGPARM_MAX;
1972 cum->warn_sse = true;
1973 cum->warn_mmx = true;
1974 cum->maybe_vaarg = false;
1975
1976 /* Use ecx and edx registers if function has fastcall attribute */
1977 if (fntype && !TARGET_64BIT)
1978 {
1979 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)))
1980 {
1981 cum->nregs = 2;
1982 cum->fastcall = 1;
1983 }
1984 }
1985
1986 /* Determine if this function has variable arguments. This is
1987 indicated by the last argument being 'void_type_mode' if there
1988 are no variable arguments. If there are variable arguments, then
1989 we won't pass anything in registers in 32-bit mode. */
1990
1991 if (cum->nregs || cum->mmx_nregs || cum->sse_nregs)
1992 {
1993 for (param = (fntype) ? TYPE_ARG_TYPES (fntype) : 0;
1994 param != 0; param = next_param)
1995 {
1996 next_param = TREE_CHAIN (param);
1997 if (next_param == 0 && TREE_VALUE (param) != void_type_node)
1998 {
1999 if (!TARGET_64BIT)
2000 {
2001 cum->nregs = 0;
2002 cum->sse_nregs = 0;
2003 cum->mmx_nregs = 0;
2004 cum->warn_sse = 0;
2005 cum->warn_mmx = 0;
2006 cum->fastcall = 0;
2007 }
2008 cum->maybe_vaarg = true;
2009 }
2010 }
2011 }
2012 if ((!fntype && !libname)
2013 || (fntype && !TYPE_ARG_TYPES (fntype)))
2014 cum->maybe_vaarg = 1;
2015
2016 if (TARGET_DEBUG_ARG)
2017 fprintf (stderr, ", nregs=%d )\n", cum->nregs);
2018
2019 return;
2020 }
2021
2022 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
2023 of this code is to classify each 8bytes of incoming argument by the register
2024 class and assign registers accordingly. */
2025
2026 /* Return the union class of CLASS1 and CLASS2.
2027 See the x86-64 PS ABI for details. */
2028
2029 static enum x86_64_reg_class
2030 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
2031 {
2032 /* Rule #1: If both classes are equal, this is the resulting class. */
2033 if (class1 == class2)
2034 return class1;
2035
2036 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
2037 the other class. */
2038 if (class1 == X86_64_NO_CLASS)
2039 return class2;
2040 if (class2 == X86_64_NO_CLASS)
2041 return class1;
2042
2043 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
2044 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
2045 return X86_64_MEMORY_CLASS;
2046
2047 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
2048 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
2049 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
2050 return X86_64_INTEGERSI_CLASS;
2051 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
2052 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
2053 return X86_64_INTEGER_CLASS;
2054
2055 /* Rule #5: If one of the classes is X87 or X87UP class, MEMORY is used. */
2056 if (class1 == X86_64_X87_CLASS || class1 == X86_64_X87UP_CLASS
2057 || class2 == X86_64_X87_CLASS || class2 == X86_64_X87UP_CLASS)
2058 return X86_64_MEMORY_CLASS;
2059
2060 /* Rule #6: Otherwise class SSE is used. */
2061 return X86_64_SSE_CLASS;
2062 }
2063
2064 /* Classify the argument of type TYPE and mode MODE.
2065 CLASSES will be filled by the register class used to pass each word
2066 of the operand. The number of words is returned. In case the parameter
2067 should be passed in memory, 0 is returned. As a special case for zero
2068 sized containers, classes[0] will be NO_CLASS and 1 is returned.
2069
2070 BIT_OFFSET is used internally for handling records and specifies offset
2071 of the offset in bits modulo 256 to avoid overflow cases.
2072
2073 See the x86-64 PS ABI for details.
2074 */
2075
2076 static int
2077 classify_argument (enum machine_mode mode, tree type,
2078 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
2079 {
2080 HOST_WIDE_INT bytes =
2081 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2082 int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2083
2084 /* Variable sized entities are always passed/returned in memory. */
2085 if (bytes < 0)
2086 return 0;
2087
2088 if (mode != VOIDmode
2089 && targetm.calls.must_pass_in_stack (mode, type))
2090 return 0;
2091
2092 if (type && AGGREGATE_TYPE_P (type))
2093 {
2094 int i;
2095 tree field;
2096 enum x86_64_reg_class subclasses[MAX_CLASSES];
2097
2098 /* On x86-64 we pass structures larger than 16 bytes on the stack. */
2099 if (bytes > 16)
2100 return 0;
2101
2102 for (i = 0; i < words; i++)
2103 classes[i] = X86_64_NO_CLASS;
2104
2105 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
2106 signalize memory class, so handle it as special case. */
2107 if (!words)
2108 {
2109 classes[0] = X86_64_NO_CLASS;
2110 return 1;
2111 }
2112
2113 /* Classify each field of record and merge classes. */
2114 if (TREE_CODE (type) == RECORD_TYPE)
2115 {
2116 /* For classes first merge in the field of the subclasses. */
2117 if (TYPE_BINFO (type))
2118 {
2119 tree binfo, base_binfo;
2120 int i;
2121
2122 for (binfo = TYPE_BINFO (type), i = 0;
2123 BINFO_BASE_ITERATE (binfo, i, base_binfo); i++)
2124 {
2125 int num;
2126 int offset = tree_low_cst (BINFO_OFFSET (base_binfo), 0) * 8;
2127 tree type = BINFO_TYPE (base_binfo);
2128
2129 num = classify_argument (TYPE_MODE (type),
2130 type, subclasses,
2131 (offset + bit_offset) % 256);
2132 if (!num)
2133 return 0;
2134 for (i = 0; i < num; i++)
2135 {
2136 int pos = (offset + (bit_offset % 64)) / 8 / 8;
2137 classes[i + pos] =
2138 merge_classes (subclasses[i], classes[i + pos]);
2139 }
2140 }
2141 }
2142 /* And now merge the fields of structure. */
2143 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
2144 {
2145 if (TREE_CODE (field) == FIELD_DECL)
2146 {
2147 int num;
2148
2149 /* Bitfields are always classified as integer. Handle them
2150 early, since later code would consider them to be
2151 misaligned integers. */
2152 if (DECL_BIT_FIELD (field))
2153 {
2154 for (i = int_bit_position (field) / 8 / 8;
2155 i < (int_bit_position (field)
2156 + tree_low_cst (DECL_SIZE (field), 0)
2157 + 63) / 8 / 8; i++)
2158 classes[i] =
2159 merge_classes (X86_64_INTEGER_CLASS,
2160 classes[i]);
2161 }
2162 else
2163 {
2164 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
2165 TREE_TYPE (field), subclasses,
2166 (int_bit_position (field)
2167 + bit_offset) % 256);
2168 if (!num)
2169 return 0;
2170 for (i = 0; i < num; i++)
2171 {
2172 int pos =
2173 (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
2174 classes[i + pos] =
2175 merge_classes (subclasses[i], classes[i + pos]);
2176 }
2177 }
2178 }
2179 }
2180 }
2181 /* Arrays are handled as small records. */
2182 else if (TREE_CODE (type) == ARRAY_TYPE)
2183 {
2184 int num;
2185 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
2186 TREE_TYPE (type), subclasses, bit_offset);
2187 if (!num)
2188 return 0;
2189
2190 /* The partial classes are now full classes. */
2191 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
2192 subclasses[0] = X86_64_SSE_CLASS;
2193 if (subclasses[0] == X86_64_INTEGERSI_CLASS && bytes != 4)
2194 subclasses[0] = X86_64_INTEGER_CLASS;
2195
2196 for (i = 0; i < words; i++)
2197 classes[i] = subclasses[i % num];
2198 }
2199 /* Unions are similar to RECORD_TYPE but offset is always 0. */
2200 else if (TREE_CODE (type) == UNION_TYPE
2201 || TREE_CODE (type) == QUAL_UNION_TYPE)
2202 {
2203 /* For classes first merge in the field of the subclasses. */
2204 if (TYPE_BINFO (type))
2205 {
2206 tree binfo, base_binfo;
2207 int i;
2208
2209 for (binfo = TYPE_BINFO (type), i = 0;
2210 BINFO_BASE_ITERATE (binfo, i, base_binfo); i++)
2211 {
2212 int num;
2213 int offset = tree_low_cst (BINFO_OFFSET (base_binfo), 0) * 8;
2214 tree type = BINFO_TYPE (base_binfo);
2215
2216 num = classify_argument (TYPE_MODE (type),
2217 type, subclasses,
2218 (offset + (bit_offset % 64)) % 256);
2219 if (!num)
2220 return 0;
2221 for (i = 0; i < num; i++)
2222 {
2223 int pos = (offset + (bit_offset % 64)) / 8 / 8;
2224 classes[i + pos] =
2225 merge_classes (subclasses[i], classes[i + pos]);
2226 }
2227 }
2228 }
2229 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
2230 {
2231 if (TREE_CODE (field) == FIELD_DECL)
2232 {
2233 int num;
2234 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
2235 TREE_TYPE (field), subclasses,
2236 bit_offset);
2237 if (!num)
2238 return 0;
2239 for (i = 0; i < num; i++)
2240 classes[i] = merge_classes (subclasses[i], classes[i]);
2241 }
2242 }
2243 }
2244 else if (TREE_CODE (type) == SET_TYPE)
2245 {
2246 if (bytes <= 4)
2247 {
2248 classes[0] = X86_64_INTEGERSI_CLASS;
2249 return 1;
2250 }
2251 else if (bytes <= 8)
2252 {
2253 classes[0] = X86_64_INTEGER_CLASS;
2254 return 1;
2255 }
2256 else if (bytes <= 12)
2257 {
2258 classes[0] = X86_64_INTEGER_CLASS;
2259 classes[1] = X86_64_INTEGERSI_CLASS;
2260 return 2;
2261 }
2262 else
2263 {
2264 classes[0] = X86_64_INTEGER_CLASS;
2265 classes[1] = X86_64_INTEGER_CLASS;
2266 return 2;
2267 }
2268 }
2269 else
2270 abort ();
2271
2272 /* Final merger cleanup. */
2273 for (i = 0; i < words; i++)
2274 {
2275 /* If one class is MEMORY, everything should be passed in
2276 memory. */
2277 if (classes[i] == X86_64_MEMORY_CLASS)
2278 return 0;
2279
2280 /* The X86_64_SSEUP_CLASS should be always preceded by
2281 X86_64_SSE_CLASS. */
2282 if (classes[i] == X86_64_SSEUP_CLASS
2283 && (i == 0 || classes[i - 1] != X86_64_SSE_CLASS))
2284 classes[i] = X86_64_SSE_CLASS;
2285
2286 /* X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS. */
2287 if (classes[i] == X86_64_X87UP_CLASS
2288 && (i == 0 || classes[i - 1] != X86_64_X87_CLASS))
2289 classes[i] = X86_64_SSE_CLASS;
2290 }
2291 return words;
2292 }
2293
2294 /* Compute alignment needed. We align all types to natural boundaries with
2295 exception of XFmode that is aligned to 64bits. */
2296 if (mode != VOIDmode && mode != BLKmode)
2297 {
2298 int mode_alignment = GET_MODE_BITSIZE (mode);
2299
2300 if (mode == XFmode)
2301 mode_alignment = 128;
2302 else if (mode == XCmode)
2303 mode_alignment = 256;
2304 if (COMPLEX_MODE_P (mode))
2305 mode_alignment /= 2;
2306 /* Misaligned fields are always returned in memory. */
2307 if (bit_offset % mode_alignment)
2308 return 0;
2309 }
2310
2311 /* for V1xx modes, just use the base mode */
2312 if (VECTOR_MODE_P (mode)
2313 && GET_MODE_SIZE (GET_MODE_INNER (mode)) == bytes)
2314 mode = GET_MODE_INNER (mode);
2315
2316 /* Classification of atomic types. */
2317 switch (mode)
2318 {
2319 case DImode:
2320 case SImode:
2321 case HImode:
2322 case QImode:
2323 case CSImode:
2324 case CHImode:
2325 case CQImode:
2326 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
2327 classes[0] = X86_64_INTEGERSI_CLASS;
2328 else
2329 classes[0] = X86_64_INTEGER_CLASS;
2330 return 1;
2331 case CDImode:
2332 case TImode:
2333 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
2334 return 2;
2335 case CTImode:
2336 return 0;
2337 case SFmode:
2338 if (!(bit_offset % 64))
2339 classes[0] = X86_64_SSESF_CLASS;
2340 else
2341 classes[0] = X86_64_SSE_CLASS;
2342 return 1;
2343 case DFmode:
2344 classes[0] = X86_64_SSEDF_CLASS;
2345 return 1;
2346 case XFmode:
2347 classes[0] = X86_64_X87_CLASS;
2348 classes[1] = X86_64_X87UP_CLASS;
2349 return 2;
2350 case TFmode:
2351 classes[0] = X86_64_SSE_CLASS;
2352 classes[1] = X86_64_SSEUP_CLASS;
2353 return 2;
2354 case SCmode:
2355 classes[0] = X86_64_SSE_CLASS;
2356 return 1;
2357 case DCmode:
2358 classes[0] = X86_64_SSEDF_CLASS;
2359 classes[1] = X86_64_SSEDF_CLASS;
2360 return 2;
2361 case XCmode:
2362 case TCmode:
2363 /* These modes are larger than 16 bytes. */
2364 return 0;
2365 case V4SFmode:
2366 case V4SImode:
2367 case V16QImode:
2368 case V8HImode:
2369 case V2DFmode:
2370 case V2DImode:
2371 classes[0] = X86_64_SSE_CLASS;
2372 classes[1] = X86_64_SSEUP_CLASS;
2373 return 2;
2374 case V2SFmode:
2375 case V2SImode:
2376 case V4HImode:
2377 case V8QImode:
2378 classes[0] = X86_64_SSE_CLASS;
2379 return 1;
2380 case BLKmode:
2381 case VOIDmode:
2382 return 0;
2383 default:
2384 if (VECTOR_MODE_P (mode))
2385 {
2386 if (bytes > 16)
2387 return 0;
2388 if (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT)
2389 {
2390 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
2391 classes[0] = X86_64_INTEGERSI_CLASS;
2392 else
2393 classes[0] = X86_64_INTEGER_CLASS;
2394 classes[1] = X86_64_INTEGER_CLASS;
2395 return 1 + (bytes > 8);
2396 }
2397 }
2398 abort ();
2399 }
2400 }
2401
2402 /* Examine the argument and return set number of register required in each
2403 class. Return 0 iff parameter should be passed in memory. */
2404 static int
2405 examine_argument (enum machine_mode mode, tree type, int in_return,
2406 int *int_nregs, int *sse_nregs)
2407 {
2408 enum x86_64_reg_class class[MAX_CLASSES];
2409 int n = classify_argument (mode, type, class, 0);
2410
2411 *int_nregs = 0;
2412 *sse_nregs = 0;
2413 if (!n)
2414 return 0;
2415 for (n--; n >= 0; n--)
2416 switch (class[n])
2417 {
2418 case X86_64_INTEGER_CLASS:
2419 case X86_64_INTEGERSI_CLASS:
2420 (*int_nregs)++;
2421 break;
2422 case X86_64_SSE_CLASS:
2423 case X86_64_SSESF_CLASS:
2424 case X86_64_SSEDF_CLASS:
2425 (*sse_nregs)++;
2426 break;
2427 case X86_64_NO_CLASS:
2428 case X86_64_SSEUP_CLASS:
2429 break;
2430 case X86_64_X87_CLASS:
2431 case X86_64_X87UP_CLASS:
2432 if (!in_return)
2433 return 0;
2434 break;
2435 case X86_64_MEMORY_CLASS:
2436 abort ();
2437 }
2438 return 1;
2439 }
2440 /* Construct container for the argument used by GCC interface. See
2441 FUNCTION_ARG for the detailed description. */
2442 static rtx
2443 construct_container (enum machine_mode mode, tree type, int in_return,
2444 int nintregs, int nsseregs, const int * intreg,
2445 int sse_regno)
2446 {
2447 enum machine_mode tmpmode;
2448 int bytes =
2449 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2450 enum x86_64_reg_class class[MAX_CLASSES];
2451 int n;
2452 int i;
2453 int nexps = 0;
2454 int needed_sseregs, needed_intregs;
2455 rtx exp[MAX_CLASSES];
2456 rtx ret;
2457
2458 n = classify_argument (mode, type, class, 0);
2459 if (TARGET_DEBUG_ARG)
2460 {
2461 if (!n)
2462 fprintf (stderr, "Memory class\n");
2463 else
2464 {
2465 fprintf (stderr, "Classes:");
2466 for (i = 0; i < n; i++)
2467 {
2468 fprintf (stderr, " %s", x86_64_reg_class_name[class[i]]);
2469 }
2470 fprintf (stderr, "\n");
2471 }
2472 }
2473 if (!n)
2474 return NULL;
2475 if (!examine_argument (mode, type, in_return, &needed_intregs, &needed_sseregs))
2476 return NULL;
2477 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
2478 return NULL;
2479
2480 /* First construct simple cases. Avoid SCmode, since we want to use
2481 single register to pass this type. */
2482 if (n == 1 && mode != SCmode)
2483 switch (class[0])
2484 {
2485 case X86_64_INTEGER_CLASS:
2486 case X86_64_INTEGERSI_CLASS:
2487 return gen_rtx_REG (mode, intreg[0]);
2488 case X86_64_SSE_CLASS:
2489 case X86_64_SSESF_CLASS:
2490 case X86_64_SSEDF_CLASS:
2491 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
2492 case X86_64_X87_CLASS:
2493 return gen_rtx_REG (mode, FIRST_STACK_REG);
2494 case X86_64_NO_CLASS:
2495 /* Zero sized array, struct or class. */
2496 return NULL;
2497 default:
2498 abort ();
2499 }
2500 if (n == 2 && class[0] == X86_64_SSE_CLASS && class[1] == X86_64_SSEUP_CLASS
2501 && mode != BLKmode)
2502 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
2503 if (n == 2
2504 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS)
2505 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
2506 if (n == 2 && class[0] == X86_64_INTEGER_CLASS
2507 && class[1] == X86_64_INTEGER_CLASS
2508 && (mode == CDImode || mode == TImode || mode == TFmode)
2509 && intreg[0] + 1 == intreg[1])
2510 return gen_rtx_REG (mode, intreg[0]);
2511 if (n == 4
2512 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS
2513 && class[2] == X86_64_X87_CLASS && class[3] == X86_64_X87UP_CLASS
2514 && mode != BLKmode)
2515 return gen_rtx_REG (XCmode, FIRST_STACK_REG);
2516
2517 /* Otherwise figure out the entries of the PARALLEL. */
2518 for (i = 0; i < n; i++)
2519 {
2520 switch (class[i])
2521 {
2522 case X86_64_NO_CLASS:
2523 break;
2524 case X86_64_INTEGER_CLASS:
2525 case X86_64_INTEGERSI_CLASS:
2526 /* Merge TImodes on aligned occasions here too. */
2527 if (i * 8 + 8 > bytes)
2528 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
2529 else if (class[i] == X86_64_INTEGERSI_CLASS)
2530 tmpmode = SImode;
2531 else
2532 tmpmode = DImode;
2533 /* We've requested 24 bytes we don't have mode for. Use DImode. */
2534 if (tmpmode == BLKmode)
2535 tmpmode = DImode;
2536 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2537 gen_rtx_REG (tmpmode, *intreg),
2538 GEN_INT (i*8));
2539 intreg++;
2540 break;
2541 case X86_64_SSESF_CLASS:
2542 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2543 gen_rtx_REG (SFmode,
2544 SSE_REGNO (sse_regno)),
2545 GEN_INT (i*8));
2546 sse_regno++;
2547 break;
2548 case X86_64_SSEDF_CLASS:
2549 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2550 gen_rtx_REG (DFmode,
2551 SSE_REGNO (sse_regno)),
2552 GEN_INT (i*8));
2553 sse_regno++;
2554 break;
2555 case X86_64_SSE_CLASS:
2556 if (i < n - 1 && class[i + 1] == X86_64_SSEUP_CLASS)
2557 tmpmode = TImode;
2558 else
2559 tmpmode = DImode;
2560 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2561 gen_rtx_REG (tmpmode,
2562 SSE_REGNO (sse_regno)),
2563 GEN_INT (i*8));
2564 if (tmpmode == TImode)
2565 i++;
2566 sse_regno++;
2567 break;
2568 default:
2569 abort ();
2570 }
2571 }
2572 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
2573 for (i = 0; i < nexps; i++)
2574 XVECEXP (ret, 0, i) = exp [i];
2575 return ret;
2576 }
2577
2578 /* Update the data in CUM to advance over an argument
2579 of mode MODE and data type TYPE.
2580 (TYPE is null for libcalls where that information may not be available.) */
2581
2582 void
2583 function_arg_advance (CUMULATIVE_ARGS *cum, /* current arg information */
2584 enum machine_mode mode, /* current arg mode */
2585 tree type, /* type of the argument or 0 if lib support */
2586 int named) /* whether or not the argument was named */
2587 {
2588 int bytes =
2589 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2590 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2591
2592 if (TARGET_DEBUG_ARG)
2593 fprintf (stderr,
2594 "function_adv (sz=%d, wds=%2d, nregs=%d, ssenregs=%d, mode=%s, named=%d)\n\n",
2595 words, cum->words, cum->nregs, cum->sse_nregs, GET_MODE_NAME (mode), named);
2596 if (TARGET_64BIT)
2597 {
2598 int int_nregs, sse_nregs;
2599 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs))
2600 cum->words += words;
2601 else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
2602 {
2603 cum->nregs -= int_nregs;
2604 cum->sse_nregs -= sse_nregs;
2605 cum->regno += int_nregs;
2606 cum->sse_regno += sse_nregs;
2607 }
2608 else
2609 cum->words += words;
2610 }
2611 else
2612 {
2613 if (TARGET_SSE && SSE_REG_MODE_P (mode)
2614 && (!type || !AGGREGATE_TYPE_P (type)))
2615 {
2616 cum->sse_words += words;
2617 cum->sse_nregs -= 1;
2618 cum->sse_regno += 1;
2619 if (cum->sse_nregs <= 0)
2620 {
2621 cum->sse_nregs = 0;
2622 cum->sse_regno = 0;
2623 }
2624 }
2625 else if (TARGET_MMX && MMX_REG_MODE_P (mode)
2626 && (!type || !AGGREGATE_TYPE_P (type)))
2627 {
2628 cum->mmx_words += words;
2629 cum->mmx_nregs -= 1;
2630 cum->mmx_regno += 1;
2631 if (cum->mmx_nregs <= 0)
2632 {
2633 cum->mmx_nregs = 0;
2634 cum->mmx_regno = 0;
2635 }
2636 }
2637 else
2638 {
2639 cum->words += words;
2640 cum->nregs -= words;
2641 cum->regno += words;
2642
2643 if (cum->nregs <= 0)
2644 {
2645 cum->nregs = 0;
2646 cum->regno = 0;
2647 }
2648 }
2649 }
2650 return;
2651 }
2652
2653 /* Define where to put the arguments to a function.
2654 Value is zero to push the argument on the stack,
2655 or a hard register in which to store the argument.
2656
2657 MODE is the argument's machine mode.
2658 TYPE is the data type of the argument (as a tree).
2659 This is null for libcalls where that information may
2660 not be available.
2661 CUM is a variable of type CUMULATIVE_ARGS which gives info about
2662 the preceding args and about the function being called.
2663 NAMED is nonzero if this argument is a named parameter
2664 (otherwise it is an extra parameter matching an ellipsis). */
2665
2666 rtx
2667 function_arg (CUMULATIVE_ARGS *cum, /* current arg information */
2668 enum machine_mode mode, /* current arg mode */
2669 tree type, /* type of the argument or 0 if lib support */
2670 int named) /* != 0 for normal args, == 0 for ... args */
2671 {
2672 rtx ret = NULL_RTX;
2673 int bytes =
2674 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2675 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2676 static bool warnedsse, warnedmmx;
2677
2678 /* To simplify the code below, represent vector types with a vector mode
2679 even if MMX/SSE are not active. */
2680 if (type
2681 && TREE_CODE (type) == VECTOR_TYPE
2682 && (bytes == 8 || bytes == 16)
2683 && GET_MODE_CLASS (TYPE_MODE (type)) != MODE_VECTOR_INT
2684 && GET_MODE_CLASS (TYPE_MODE (type)) != MODE_VECTOR_FLOAT)
2685 {
2686 enum machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
2687 enum machine_mode newmode
2688 = TREE_CODE (TREE_TYPE (type)) == REAL_TYPE
2689 ? MIN_MODE_VECTOR_FLOAT : MIN_MODE_VECTOR_INT;
2690
2691 /* Get the mode which has this inner mode and number of units. */
2692 for (; newmode != VOIDmode; newmode = GET_MODE_WIDER_MODE (newmode))
2693 if (GET_MODE_NUNITS (newmode) == TYPE_VECTOR_SUBPARTS (type)
2694 && GET_MODE_INNER (newmode) == innermode)
2695 {
2696 mode = newmode;
2697 break;
2698 }
2699 }
2700
2701 /* Handle a hidden AL argument containing number of registers for varargs
2702 x86-64 functions. For i386 ABI just return constm1_rtx to avoid
2703 any AL settings. */
2704 if (mode == VOIDmode)
2705 {
2706 if (TARGET_64BIT)
2707 return GEN_INT (cum->maybe_vaarg
2708 ? (cum->sse_nregs < 0
2709 ? SSE_REGPARM_MAX
2710 : cum->sse_regno)
2711 : -1);
2712 else
2713 return constm1_rtx;
2714 }
2715 if (TARGET_64BIT)
2716 ret = construct_container (mode, type, 0, cum->nregs, cum->sse_nregs,
2717 &x86_64_int_parameter_registers [cum->regno],
2718 cum->sse_regno);
2719 else
2720 switch (mode)
2721 {
2722 /* For now, pass fp/complex values on the stack. */
2723 default:
2724 break;
2725
2726 case BLKmode:
2727 if (bytes < 0)
2728 break;
2729 /* FALLTHRU */
2730 case DImode:
2731 case SImode:
2732 case HImode:
2733 case QImode:
2734 if (words <= cum->nregs)
2735 {
2736 int regno = cum->regno;
2737
2738 /* Fastcall allocates the first two DWORD (SImode) or
2739 smaller arguments to ECX and EDX. */
2740 if (cum->fastcall)
2741 {
2742 if (mode == BLKmode || mode == DImode)
2743 break;
2744
2745 /* ECX not EAX is the first allocated register. */
2746 if (regno == 0)
2747 regno = 2;
2748 }
2749 ret = gen_rtx_REG (mode, regno);
2750 }
2751 break;
2752 case TImode:
2753 case V16QImode:
2754 case V8HImode:
2755 case V4SImode:
2756 case V2DImode:
2757 case V4SFmode:
2758 case V2DFmode:
2759 if (!type || !AGGREGATE_TYPE_P (type))
2760 {
2761 if (!TARGET_SSE && !warnedsse && cum->warn_sse)
2762 {
2763 warnedsse = true;
2764 warning ("SSE vector argument without SSE enabled "
2765 "changes the ABI");
2766 }
2767 if (cum->sse_nregs)
2768 ret = gen_rtx_REG (mode, cum->sse_regno + FIRST_SSE_REG);
2769 }
2770 break;
2771 case V8QImode:
2772 case V4HImode:
2773 case V2SImode:
2774 case V2SFmode:
2775 if (!type || !AGGREGATE_TYPE_P (type))
2776 {
2777 if (!TARGET_MMX && !warnedmmx && cum->warn_mmx)
2778 {
2779 warnedmmx = true;
2780 warning ("MMX vector argument without MMX enabled "
2781 "changes the ABI");
2782 }
2783 if (cum->mmx_nregs)
2784 ret = gen_rtx_REG (mode, cum->mmx_regno + FIRST_MMX_REG);
2785 }
2786 break;
2787 }
2788
2789 if (TARGET_DEBUG_ARG)
2790 {
2791 fprintf (stderr,
2792 "function_arg (size=%d, wds=%2d, nregs=%d, mode=%4s, named=%d, ",
2793 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
2794
2795 if (ret)
2796 print_simple_rtl (stderr, ret);
2797 else
2798 fprintf (stderr, ", stack");
2799
2800 fprintf (stderr, " )\n");
2801 }
2802
2803 return ret;
2804 }
2805
2806 /* A C expression that indicates when an argument must be passed by
2807 reference. If nonzero for an argument, a copy of that argument is
2808 made in memory and a pointer to the argument is passed instead of
2809 the argument itself. The pointer is passed in whatever way is
2810 appropriate for passing a pointer to that type. */
2811
2812 static bool
2813 ix86_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
2814 enum machine_mode mode ATTRIBUTE_UNUSED,
2815 tree type, bool named ATTRIBUTE_UNUSED)
2816 {
2817 if (!TARGET_64BIT)
2818 return 0;
2819
2820 if (type && int_size_in_bytes (type) == -1)
2821 {
2822 if (TARGET_DEBUG_ARG)
2823 fprintf (stderr, "function_arg_pass_by_reference\n");
2824 return 1;
2825 }
2826
2827 return 0;
2828 }
2829
2830 /* Return true when TYPE should be 128bit aligned for 32bit argument passing
2831 ABI. Only called if TARGET_SSE. */
2832 static bool
2833 contains_128bit_aligned_vector_p (tree type)
2834 {
2835 enum machine_mode mode = TYPE_MODE (type);
2836 if (SSE_REG_MODE_P (mode)
2837 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
2838 return true;
2839 if (TYPE_ALIGN (type) < 128)
2840 return false;
2841
2842 if (AGGREGATE_TYPE_P (type))
2843 {
2844 /* Walk the aggregates recursively. */
2845 if (TREE_CODE (type) == RECORD_TYPE
2846 || TREE_CODE (type) == UNION_TYPE
2847 || TREE_CODE (type) == QUAL_UNION_TYPE)
2848 {
2849 tree field;
2850
2851 if (TYPE_BINFO (type))
2852 {
2853 tree binfo, base_binfo;
2854 int i;
2855
2856 for (binfo = TYPE_BINFO (type), i = 0;
2857 BINFO_BASE_ITERATE (binfo, i, base_binfo); i++)
2858 if (contains_128bit_aligned_vector_p (BINFO_TYPE (base_binfo)))
2859 return true;
2860 }
2861 /* And now merge the fields of structure. */
2862 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
2863 {
2864 if (TREE_CODE (field) == FIELD_DECL
2865 && contains_128bit_aligned_vector_p (TREE_TYPE (field)))
2866 return true;
2867 }
2868 }
2869 /* Just for use if some languages passes arrays by value. */
2870 else if (TREE_CODE (type) == ARRAY_TYPE)
2871 {
2872 if (contains_128bit_aligned_vector_p (TREE_TYPE (type)))
2873 return true;
2874 }
2875 else
2876 abort ();
2877 }
2878 return false;
2879 }
2880
2881 /* Gives the alignment boundary, in bits, of an argument with the
2882 specified mode and type. */
2883
2884 int
2885 ix86_function_arg_boundary (enum machine_mode mode, tree type)
2886 {
2887 int align;
2888 if (type)
2889 align = TYPE_ALIGN (type);
2890 else
2891 align = GET_MODE_ALIGNMENT (mode);
2892 if (align < PARM_BOUNDARY)
2893 align = PARM_BOUNDARY;
2894 if (!TARGET_64BIT)
2895 {
2896 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
2897 make an exception for SSE modes since these require 128bit
2898 alignment.
2899
2900 The handling here differs from field_alignment. ICC aligns MMX
2901 arguments to 4 byte boundaries, while structure fields are aligned
2902 to 8 byte boundaries. */
2903 if (!TARGET_SSE)
2904 align = PARM_BOUNDARY;
2905 else if (!type)
2906 {
2907 if (!SSE_REG_MODE_P (mode))
2908 align = PARM_BOUNDARY;
2909 }
2910 else
2911 {
2912 if (!contains_128bit_aligned_vector_p (type))
2913 align = PARM_BOUNDARY;
2914 }
2915 }
2916 if (align > 128)
2917 align = 128;
2918 return align;
2919 }
2920
2921 /* Return true if N is a possible register number of function value. */
2922 bool
2923 ix86_function_value_regno_p (int regno)
2924 {
2925 if (!TARGET_64BIT)
2926 {
2927 return ((regno) == 0
2928 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387)
2929 || ((regno) == FIRST_SSE_REG && TARGET_SSE));
2930 }
2931 return ((regno) == 0 || (regno) == FIRST_FLOAT_REG
2932 || ((regno) == FIRST_SSE_REG && TARGET_SSE)
2933 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387));
2934 }
2935
2936 /* Define how to find the value returned by a function.
2937 VALTYPE is the data type of the value (as a tree).
2938 If the precise function being called is known, FUNC is its FUNCTION_DECL;
2939 otherwise, FUNC is 0. */
2940 rtx
2941 ix86_function_value (tree valtype)
2942 {
2943 if (TARGET_64BIT)
2944 {
2945 rtx ret = construct_container (TYPE_MODE (valtype), valtype, 1,
2946 REGPARM_MAX, SSE_REGPARM_MAX,
2947 x86_64_int_return_registers, 0);
2948 /* For zero sized structures, construct_container return NULL, but we need
2949 to keep rest of compiler happy by returning meaningful value. */
2950 if (!ret)
2951 ret = gen_rtx_REG (TYPE_MODE (valtype), 0);
2952 return ret;
2953 }
2954 else
2955 return gen_rtx_REG (TYPE_MODE (valtype),
2956 ix86_value_regno (TYPE_MODE (valtype)));
2957 }
2958
2959 /* Return false iff type is returned in memory. */
2960 int
2961 ix86_return_in_memory (tree type)
2962 {
2963 int needed_intregs, needed_sseregs, size;
2964 enum machine_mode mode = TYPE_MODE (type);
2965
2966 if (TARGET_64BIT)
2967 return !examine_argument (mode, type, 1, &needed_intregs, &needed_sseregs);
2968
2969 if (mode == BLKmode)
2970 return 1;
2971
2972 size = int_size_in_bytes (type);
2973
2974 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
2975 return 0;
2976
2977 if (VECTOR_MODE_P (mode) || mode == TImode)
2978 {
2979 /* User-created vectors small enough to fit in EAX. */
2980 if (size < 8)
2981 return 0;
2982
2983 /* MMX/3dNow values are returned on the stack, since we've
2984 got to EMMS/FEMMS before returning. */
2985 if (size == 8)
2986 return 1;
2987
2988 /* SSE values are returned in XMM0, except when it doesn't exist. */
2989 if (size == 16)
2990 return (TARGET_SSE ? 0 : 1);
2991 }
2992
2993 if (mode == XFmode)
2994 return 0;
2995
2996 if (size > 12)
2997 return 1;
2998 return 0;
2999 }
3000
3001 /* When returning SSE vector types, we have a choice of either
3002 (1) being abi incompatible with a -march switch, or
3003 (2) generating an error.
3004 Given no good solution, I think the safest thing is one warning.
3005 The user won't be able to use -Werror, but....
3006
3007 Choose the STRUCT_VALUE_RTX hook because that's (at present) only
3008 called in response to actually generating a caller or callee that
3009 uses such a type. As opposed to RETURN_IN_MEMORY, which is called
3010 via aggregate_value_p for general type probing from tree-ssa. */
3011
3012 static rtx
3013 ix86_struct_value_rtx (tree type, int incoming ATTRIBUTE_UNUSED)
3014 {
3015 static bool warned;
3016
3017 if (!TARGET_SSE && type && !warned)
3018 {
3019 /* Look at the return type of the function, not the function type. */
3020 enum machine_mode mode = TYPE_MODE (TREE_TYPE (type));
3021
3022 if (mode == TImode
3023 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
3024 {
3025 warned = true;
3026 warning ("SSE vector return without SSE enabled changes the ABI");
3027 }
3028 }
3029
3030 return NULL;
3031 }
3032
3033 /* Define how to find the value returned by a library function
3034 assuming the value has mode MODE. */
3035 rtx
3036 ix86_libcall_value (enum machine_mode mode)
3037 {
3038 if (TARGET_64BIT)
3039 {
3040 switch (mode)
3041 {
3042 case SFmode:
3043 case SCmode:
3044 case DFmode:
3045 case DCmode:
3046 case TFmode:
3047 return gen_rtx_REG (mode, FIRST_SSE_REG);
3048 case XFmode:
3049 return gen_rtx_REG (mode, FIRST_FLOAT_REG);
3050 case XCmode:
3051 case TCmode:
3052 return NULL;
3053 default:
3054 return gen_rtx_REG (mode, 0);
3055 }
3056 }
3057 else
3058 return gen_rtx_REG (mode, ix86_value_regno (mode));
3059 }
3060
3061 /* Given a mode, return the register to use for a return value. */
3062
3063 static int
3064 ix86_value_regno (enum machine_mode mode)
3065 {
3066 /* Floating point return values in %st(0). */
3067 if (GET_MODE_CLASS (mode) == MODE_FLOAT && TARGET_FLOAT_RETURNS_IN_80387)
3068 return FIRST_FLOAT_REG;
3069 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
3070 we prevent this case when sse is not available. */
3071 if (mode == TImode || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
3072 return FIRST_SSE_REG;
3073 /* Everything else in %eax. */
3074 return 0;
3075 }
3076 \f
3077 /* Create the va_list data type. */
3078
3079 static tree
3080 ix86_build_builtin_va_list (void)
3081 {
3082 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
3083
3084 /* For i386 we use plain pointer to argument area. */
3085 if (!TARGET_64BIT)
3086 return build_pointer_type (char_type_node);
3087
3088 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
3089 type_decl = build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
3090
3091 f_gpr = build_decl (FIELD_DECL, get_identifier ("gp_offset"),
3092 unsigned_type_node);
3093 f_fpr = build_decl (FIELD_DECL, get_identifier ("fp_offset"),
3094 unsigned_type_node);
3095 f_ovf = build_decl (FIELD_DECL, get_identifier ("overflow_arg_area"),
3096 ptr_type_node);
3097 f_sav = build_decl (FIELD_DECL, get_identifier ("reg_save_area"),
3098 ptr_type_node);
3099
3100 DECL_FIELD_CONTEXT (f_gpr) = record;
3101 DECL_FIELD_CONTEXT (f_fpr) = record;
3102 DECL_FIELD_CONTEXT (f_ovf) = record;
3103 DECL_FIELD_CONTEXT (f_sav) = record;
3104
3105 TREE_CHAIN (record) = type_decl;
3106 TYPE_NAME (record) = type_decl;
3107 TYPE_FIELDS (record) = f_gpr;
3108 TREE_CHAIN (f_gpr) = f_fpr;
3109 TREE_CHAIN (f_fpr) = f_ovf;
3110 TREE_CHAIN (f_ovf) = f_sav;
3111
3112 layout_type (record);
3113
3114 /* The correct type is an array type of one element. */
3115 return build_array_type (record, build_index_type (size_zero_node));
3116 }
3117
3118 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
3119
3120 static void
3121 ix86_setup_incoming_varargs (CUMULATIVE_ARGS *cum, enum machine_mode mode,
3122 tree type, int *pretend_size ATTRIBUTE_UNUSED,
3123 int no_rtl)
3124 {
3125 CUMULATIVE_ARGS next_cum;
3126 rtx save_area = NULL_RTX, mem;
3127 rtx label;
3128 rtx label_ref;
3129 rtx tmp_reg;
3130 rtx nsse_reg;
3131 int set;
3132 tree fntype;
3133 int stdarg_p;
3134 int i;
3135
3136 if (!TARGET_64BIT)
3137 return;
3138
3139 /* Indicate to allocate space on the stack for varargs save area. */
3140 ix86_save_varrargs_registers = 1;
3141
3142 cfun->stack_alignment_needed = 128;
3143
3144 fntype = TREE_TYPE (current_function_decl);
3145 stdarg_p = (TYPE_ARG_TYPES (fntype) != 0
3146 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype)))
3147 != void_type_node));
3148
3149 /* For varargs, we do not want to skip the dummy va_dcl argument.
3150 For stdargs, we do want to skip the last named argument. */
3151 next_cum = *cum;
3152 if (stdarg_p)
3153 function_arg_advance (&next_cum, mode, type, 1);
3154
3155 if (!no_rtl)
3156 save_area = frame_pointer_rtx;
3157
3158 set = get_varargs_alias_set ();
3159
3160 for (i = next_cum.regno; i < ix86_regparm; i++)
3161 {
3162 mem = gen_rtx_MEM (Pmode,
3163 plus_constant (save_area, i * UNITS_PER_WORD));
3164 set_mem_alias_set (mem, set);
3165 emit_move_insn (mem, gen_rtx_REG (Pmode,
3166 x86_64_int_parameter_registers[i]));
3167 }
3168
3169 if (next_cum.sse_nregs)
3170 {
3171 /* Now emit code to save SSE registers. The AX parameter contains number
3172 of SSE parameter registers used to call this function. We use
3173 sse_prologue_save insn template that produces computed jump across
3174 SSE saves. We need some preparation work to get this working. */
3175
3176 label = gen_label_rtx ();
3177 label_ref = gen_rtx_LABEL_REF (Pmode, label);
3178
3179 /* Compute address to jump to :
3180 label - 5*eax + nnamed_sse_arguments*5 */
3181 tmp_reg = gen_reg_rtx (Pmode);
3182 nsse_reg = gen_reg_rtx (Pmode);
3183 emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, 0)));
3184 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
3185 gen_rtx_MULT (Pmode, nsse_reg,
3186 GEN_INT (4))));
3187 if (next_cum.sse_regno)
3188 emit_move_insn
3189 (nsse_reg,
3190 gen_rtx_CONST (DImode,
3191 gen_rtx_PLUS (DImode,
3192 label_ref,
3193 GEN_INT (next_cum.sse_regno * 4))));
3194 else
3195 emit_move_insn (nsse_reg, label_ref);
3196 emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg));
3197
3198 /* Compute address of memory block we save into. We always use pointer
3199 pointing 127 bytes after first byte to store - this is needed to keep
3200 instruction size limited by 4 bytes. */
3201 tmp_reg = gen_reg_rtx (Pmode);
3202 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
3203 plus_constant (save_area,
3204 8 * REGPARM_MAX + 127)));
3205 mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127));
3206 set_mem_alias_set (mem, set);
3207 set_mem_align (mem, BITS_PER_WORD);
3208
3209 /* And finally do the dirty job! */
3210 emit_insn (gen_sse_prologue_save (mem, nsse_reg,
3211 GEN_INT (next_cum.sse_regno), label));
3212 }
3213
3214 }
3215
3216 /* Implement va_start. */
3217
3218 void
3219 ix86_va_start (tree valist, rtx nextarg)
3220 {
3221 HOST_WIDE_INT words, n_gpr, n_fpr;
3222 tree f_gpr, f_fpr, f_ovf, f_sav;
3223 tree gpr, fpr, ovf, sav, t;
3224
3225 /* Only 64bit target needs something special. */
3226 if (!TARGET_64BIT)
3227 {
3228 std_expand_builtin_va_start (valist, nextarg);
3229 return;
3230 }
3231
3232 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
3233 f_fpr = TREE_CHAIN (f_gpr);
3234 f_ovf = TREE_CHAIN (f_fpr);
3235 f_sav = TREE_CHAIN (f_ovf);
3236
3237 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
3238 gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
3239 fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
3240 ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
3241 sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
3242
3243 /* Count number of gp and fp argument registers used. */
3244 words = current_function_args_info.words;
3245 n_gpr = current_function_args_info.regno;
3246 n_fpr = current_function_args_info.sse_regno;
3247
3248 if (TARGET_DEBUG_ARG)
3249 fprintf (stderr, "va_start: words = %d, n_gpr = %d, n_fpr = %d\n",
3250 (int) words, (int) n_gpr, (int) n_fpr);
3251
3252 t = build (MODIFY_EXPR, TREE_TYPE (gpr), gpr,
3253 build_int_cst (NULL_TREE, n_gpr * 8));
3254 TREE_SIDE_EFFECTS (t) = 1;
3255 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3256
3257 t = build (MODIFY_EXPR, TREE_TYPE (fpr), fpr,
3258 build_int_cst (NULL_TREE, n_fpr * 16 + 8*REGPARM_MAX));
3259 TREE_SIDE_EFFECTS (t) = 1;
3260 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3261
3262 /* Find the overflow area. */
3263 t = make_tree (TREE_TYPE (ovf), virtual_incoming_args_rtx);
3264 if (words != 0)
3265 t = build (PLUS_EXPR, TREE_TYPE (ovf), t,
3266 build_int_cst (NULL_TREE, words * UNITS_PER_WORD));
3267 t = build (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
3268 TREE_SIDE_EFFECTS (t) = 1;
3269 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3270
3271 /* Find the register save area.
3272 Prologue of the function save it right above stack frame. */
3273 t = make_tree (TREE_TYPE (sav), frame_pointer_rtx);
3274 t = build (MODIFY_EXPR, TREE_TYPE (sav), sav, t);
3275 TREE_SIDE_EFFECTS (t) = 1;
3276 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3277 }
3278
3279 /* Implement va_arg. */
3280
3281 tree
3282 ix86_gimplify_va_arg (tree valist, tree type, tree *pre_p, tree *post_p)
3283 {
3284 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
3285 tree f_gpr, f_fpr, f_ovf, f_sav;
3286 tree gpr, fpr, ovf, sav, t;
3287 int size, rsize;
3288 tree lab_false, lab_over = NULL_TREE;
3289 tree addr, t2;
3290 rtx container;
3291 int indirect_p = 0;
3292 tree ptrtype;
3293
3294 /* Only 64bit target needs something special. */
3295 if (!TARGET_64BIT)
3296 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
3297
3298 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
3299 f_fpr = TREE_CHAIN (f_gpr);
3300 f_ovf = TREE_CHAIN (f_fpr);
3301 f_sav = TREE_CHAIN (f_ovf);
3302
3303 valist = build_va_arg_indirect_ref (valist);
3304 gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
3305 fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
3306 ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
3307 sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
3308
3309 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
3310 if (indirect_p)
3311 type = build_pointer_type (type);
3312 size = int_size_in_bytes (type);
3313 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3314
3315 container = construct_container (TYPE_MODE (type), type, 0,
3316 REGPARM_MAX, SSE_REGPARM_MAX, intreg, 0);
3317 /*
3318 * Pull the value out of the saved registers ...
3319 */
3320
3321 addr = create_tmp_var (ptr_type_node, "addr");
3322 DECL_POINTER_ALIAS_SET (addr) = get_varargs_alias_set ();
3323
3324 if (container)
3325 {
3326 int needed_intregs, needed_sseregs;
3327 bool need_temp;
3328 tree int_addr, sse_addr;
3329
3330 lab_false = create_artificial_label ();
3331 lab_over = create_artificial_label ();
3332
3333 examine_argument (TYPE_MODE (type), type, 0,
3334 &needed_intregs, &needed_sseregs);
3335
3336 need_temp = (!REG_P (container)
3337 && ((needed_intregs && TYPE_ALIGN (type) > 64)
3338 || TYPE_ALIGN (type) > 128));
3339
3340 /* In case we are passing structure, verify that it is consecutive block
3341 on the register save area. If not we need to do moves. */
3342 if (!need_temp && !REG_P (container))
3343 {
3344 /* Verify that all registers are strictly consecutive */
3345 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
3346 {
3347 int i;
3348
3349 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
3350 {
3351 rtx slot = XVECEXP (container, 0, i);
3352 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
3353 || INTVAL (XEXP (slot, 1)) != i * 16)
3354 need_temp = 1;
3355 }
3356 }
3357 else
3358 {
3359 int i;
3360
3361 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
3362 {
3363 rtx slot = XVECEXP (container, 0, i);
3364 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
3365 || INTVAL (XEXP (slot, 1)) != i * 8)
3366 need_temp = 1;
3367 }
3368 }
3369 }
3370 if (!need_temp)
3371 {
3372 int_addr = addr;
3373 sse_addr = addr;
3374 }
3375 else
3376 {
3377 int_addr = create_tmp_var (ptr_type_node, "int_addr");
3378 DECL_POINTER_ALIAS_SET (int_addr) = get_varargs_alias_set ();
3379 sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
3380 DECL_POINTER_ALIAS_SET (sse_addr) = get_varargs_alias_set ();
3381 }
3382 /* First ensure that we fit completely in registers. */
3383 if (needed_intregs)
3384 {
3385 t = build_int_cst (TREE_TYPE (gpr),
3386 (REGPARM_MAX - needed_intregs + 1) * 8);
3387 t = build2 (GE_EXPR, boolean_type_node, gpr, t);
3388 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
3389 t = build (COND_EXPR, void_type_node, t, t2, NULL_TREE);
3390 gimplify_and_add (t, pre_p);
3391 }
3392 if (needed_sseregs)
3393 {
3394 t = build_int_cst (TREE_TYPE (fpr),
3395 (SSE_REGPARM_MAX - needed_sseregs + 1) * 16
3396 + REGPARM_MAX * 8);
3397 t = build2 (GE_EXPR, boolean_type_node, fpr, t);
3398 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
3399 t = build (COND_EXPR, void_type_node, t, t2, NULL_TREE);
3400 gimplify_and_add (t, pre_p);
3401 }
3402
3403 /* Compute index to start of area used for integer regs. */
3404 if (needed_intregs)
3405 {
3406 /* int_addr = gpr + sav; */
3407 t = build2 (PLUS_EXPR, ptr_type_node, sav, gpr);
3408 t = build2 (MODIFY_EXPR, void_type_node, int_addr, t);
3409 gimplify_and_add (t, pre_p);
3410 }
3411 if (needed_sseregs)
3412 {
3413 /* sse_addr = fpr + sav; */
3414 t = build2 (PLUS_EXPR, ptr_type_node, sav, fpr);
3415 t = build2 (MODIFY_EXPR, void_type_node, sse_addr, t);
3416 gimplify_and_add (t, pre_p);
3417 }
3418 if (need_temp)
3419 {
3420 int i;
3421 tree temp = create_tmp_var (type, "va_arg_tmp");
3422
3423 /* addr = &temp; */
3424 t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
3425 t = build2 (MODIFY_EXPR, void_type_node, addr, t);
3426 gimplify_and_add (t, pre_p);
3427
3428 for (i = 0; i < XVECLEN (container, 0); i++)
3429 {
3430 rtx slot = XVECEXP (container, 0, i);
3431 rtx reg = XEXP (slot, 0);
3432 enum machine_mode mode = GET_MODE (reg);
3433 tree piece_type = lang_hooks.types.type_for_mode (mode, 1);
3434 tree addr_type = build_pointer_type (piece_type);
3435 tree src_addr, src;
3436 int src_offset;
3437 tree dest_addr, dest;
3438
3439 if (SSE_REGNO_P (REGNO (reg)))
3440 {
3441 src_addr = sse_addr;
3442 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
3443 }
3444 else
3445 {
3446 src_addr = int_addr;
3447 src_offset = REGNO (reg) * 8;
3448 }
3449 src_addr = fold_convert (addr_type, src_addr);
3450 src_addr = fold (build2 (PLUS_EXPR, addr_type, src_addr,
3451 size_int (src_offset)));
3452 src = build_va_arg_indirect_ref (src_addr);
3453
3454 dest_addr = fold_convert (addr_type, addr);
3455 dest_addr = fold (build2 (PLUS_EXPR, addr_type, dest_addr,
3456 size_int (INTVAL (XEXP (slot, 1)))));
3457 dest = build_va_arg_indirect_ref (dest_addr);
3458
3459 t = build2 (MODIFY_EXPR, void_type_node, dest, src);
3460 gimplify_and_add (t, pre_p);
3461 }
3462 }
3463
3464 if (needed_intregs)
3465 {
3466 t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
3467 build_int_cst (NULL_TREE, needed_intregs * 8));
3468 t = build2 (MODIFY_EXPR, TREE_TYPE (gpr), gpr, t);
3469 gimplify_and_add (t, pre_p);
3470 }
3471 if (needed_sseregs)
3472 {
3473 t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
3474 build_int_cst (NULL_TREE, needed_sseregs * 16));
3475 t = build2 (MODIFY_EXPR, TREE_TYPE (fpr), fpr, t);
3476 gimplify_and_add (t, pre_p);
3477 }
3478
3479 t = build1 (GOTO_EXPR, void_type_node, lab_over);
3480 gimplify_and_add (t, pre_p);
3481
3482 t = build1 (LABEL_EXPR, void_type_node, lab_false);
3483 append_to_statement_list (t, pre_p);
3484 }
3485
3486 /* ... otherwise out of the overflow area. */
3487
3488 /* Care for on-stack alignment if needed. */
3489 if (FUNCTION_ARG_BOUNDARY (VOIDmode, type) <= 64)
3490 t = ovf;
3491 else
3492 {
3493 HOST_WIDE_INT align = FUNCTION_ARG_BOUNDARY (VOIDmode, type) / 8;
3494 t = build (PLUS_EXPR, TREE_TYPE (ovf), ovf,
3495 build_int_cst (NULL_TREE, align - 1));
3496 t = build (BIT_AND_EXPR, TREE_TYPE (t), t,
3497 build_int_cst (NULL_TREE, -align));
3498 }
3499 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
3500
3501 t2 = build2 (MODIFY_EXPR, void_type_node, addr, t);
3502 gimplify_and_add (t2, pre_p);
3503
3504 t = build2 (PLUS_EXPR, TREE_TYPE (t), t,
3505 build_int_cst (NULL_TREE, rsize * UNITS_PER_WORD));
3506 t = build2 (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
3507 gimplify_and_add (t, pre_p);
3508
3509 if (container)
3510 {
3511 t = build1 (LABEL_EXPR, void_type_node, lab_over);
3512 append_to_statement_list (t, pre_p);
3513 }
3514
3515 ptrtype = build_pointer_type (type);
3516 addr = fold_convert (ptrtype, addr);
3517
3518 if (indirect_p)
3519 addr = build_va_arg_indirect_ref (addr);
3520 return build_va_arg_indirect_ref (addr);
3521 }
3522 \f
3523 /* Return nonzero if OPNUM's MEM should be matched
3524 in movabs* patterns. */
3525
3526 int
3527 ix86_check_movabs (rtx insn, int opnum)
3528 {
3529 rtx set, mem;
3530
3531 set = PATTERN (insn);
3532 if (GET_CODE (set) == PARALLEL)
3533 set = XVECEXP (set, 0, 0);
3534 if (GET_CODE (set) != SET)
3535 abort ();
3536 mem = XEXP (set, opnum);
3537 while (GET_CODE (mem) == SUBREG)
3538 mem = SUBREG_REG (mem);
3539 if (GET_CODE (mem) != MEM)
3540 abort ();
3541 return (volatile_ok || !MEM_VOLATILE_P (mem));
3542 }
3543 \f
3544 /* Initialize the table of extra 80387 mathematical constants. */
3545
3546 static void
3547 init_ext_80387_constants (void)
3548 {
3549 static const char * cst[5] =
3550 {
3551 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
3552 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
3553 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
3554 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
3555 "3.1415926535897932385128089594061862044", /* 4: fldpi */
3556 };
3557 int i;
3558
3559 for (i = 0; i < 5; i++)
3560 {
3561 real_from_string (&ext_80387_constants_table[i], cst[i]);
3562 /* Ensure each constant is rounded to XFmode precision. */
3563 real_convert (&ext_80387_constants_table[i],
3564 XFmode, &ext_80387_constants_table[i]);
3565 }
3566
3567 ext_80387_constants_init = 1;
3568 }
3569
3570 /* Return true if the constant is something that can be loaded with
3571 a special instruction. */
3572
3573 int
3574 standard_80387_constant_p (rtx x)
3575 {
3576 if (GET_CODE (x) != CONST_DOUBLE || !FLOAT_MODE_P (GET_MODE (x)))
3577 return -1;
3578
3579 if (x == CONST0_RTX (GET_MODE (x)))
3580 return 1;
3581 if (x == CONST1_RTX (GET_MODE (x)))
3582 return 2;
3583
3584 /* For XFmode constants, try to find a special 80387 instruction when
3585 optimizing for size or on those CPUs that benefit from them. */
3586 if (GET_MODE (x) == XFmode
3587 && (optimize_size || x86_ext_80387_constants & TUNEMASK))
3588 {
3589 REAL_VALUE_TYPE r;
3590 int i;
3591
3592 if (! ext_80387_constants_init)
3593 init_ext_80387_constants ();
3594
3595 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
3596 for (i = 0; i < 5; i++)
3597 if (real_identical (&r, &ext_80387_constants_table[i]))
3598 return i + 3;
3599 }
3600
3601 return 0;
3602 }
3603
3604 /* Return the opcode of the special instruction to be used to load
3605 the constant X. */
3606
3607 const char *
3608 standard_80387_constant_opcode (rtx x)
3609 {
3610 switch (standard_80387_constant_p (x))
3611 {
3612 case 1:
3613 return "fldz";
3614 case 2:
3615 return "fld1";
3616 case 3:
3617 return "fldlg2";
3618 case 4:
3619 return "fldln2";
3620 case 5:
3621 return "fldl2e";
3622 case 6:
3623 return "fldl2t";
3624 case 7:
3625 return "fldpi";
3626 }
3627 abort ();
3628 }
3629
3630 /* Return the CONST_DOUBLE representing the 80387 constant that is
3631 loaded by the specified special instruction. The argument IDX
3632 matches the return value from standard_80387_constant_p. */
3633
3634 rtx
3635 standard_80387_constant_rtx (int idx)
3636 {
3637 int i;
3638
3639 if (! ext_80387_constants_init)
3640 init_ext_80387_constants ();
3641
3642 switch (idx)
3643 {
3644 case 3:
3645 case 4:
3646 case 5:
3647 case 6:
3648 case 7:
3649 i = idx - 3;
3650 break;
3651
3652 default:
3653 abort ();
3654 }
3655
3656 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
3657 XFmode);
3658 }
3659
3660 /* Return 1 if X is FP constant we can load to SSE register w/o using memory.
3661 */
3662 int
3663 standard_sse_constant_p (rtx x)
3664 {
3665 if (x == const0_rtx)
3666 return 1;
3667 return (x == CONST0_RTX (GET_MODE (x)));
3668 }
3669
3670 /* Returns 1 if OP contains a symbol reference */
3671
3672 int
3673 symbolic_reference_mentioned_p (rtx op)
3674 {
3675 const char *fmt;
3676 int i;
3677
3678 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
3679 return 1;
3680
3681 fmt = GET_RTX_FORMAT (GET_CODE (op));
3682 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
3683 {
3684 if (fmt[i] == 'E')
3685 {
3686 int j;
3687
3688 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
3689 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
3690 return 1;
3691 }
3692
3693 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
3694 return 1;
3695 }
3696
3697 return 0;
3698 }
3699
3700 /* Return 1 if it is appropriate to emit `ret' instructions in the
3701 body of a function. Do this only if the epilogue is simple, needing a
3702 couple of insns. Prior to reloading, we can't tell how many registers
3703 must be saved, so return 0 then. Return 0 if there is no frame
3704 marker to de-allocate.
3705
3706 If NON_SAVING_SETJMP is defined and true, then it is not possible
3707 for the epilogue to be simple, so return 0. This is a special case
3708 since NON_SAVING_SETJMP will not cause regs_ever_live to change
3709 until final, but jump_optimize may need to know sooner if a
3710 `return' is OK. */
3711
3712 int
3713 ix86_can_use_return_insn_p (void)
3714 {
3715 struct ix86_frame frame;
3716
3717 #ifdef NON_SAVING_SETJMP
3718 if (NON_SAVING_SETJMP && current_function_calls_setjmp)
3719 return 0;
3720 #endif
3721
3722 if (! reload_completed || frame_pointer_needed)
3723 return 0;
3724
3725 /* Don't allow more than 32 pop, since that's all we can do
3726 with one instruction. */
3727 if (current_function_pops_args
3728 && current_function_args_size >= 32768)
3729 return 0;
3730
3731 ix86_compute_frame_layout (&frame);
3732 return frame.to_allocate == 0 && frame.nregs == 0;
3733 }
3734 \f
3735 /* Value should be nonzero if functions must have frame pointers.
3736 Zero means the frame pointer need not be set up (and parms may
3737 be accessed via the stack pointer) in functions that seem suitable. */
3738
3739 int
3740 ix86_frame_pointer_required (void)
3741 {
3742 /* If we accessed previous frames, then the generated code expects
3743 to be able to access the saved ebp value in our frame. */
3744 if (cfun->machine->accesses_prev_frame)
3745 return 1;
3746
3747 /* Several x86 os'es need a frame pointer for other reasons,
3748 usually pertaining to setjmp. */
3749 if (SUBTARGET_FRAME_POINTER_REQUIRED)
3750 return 1;
3751
3752 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
3753 the frame pointer by default. Turn it back on now if we've not
3754 got a leaf function. */
3755 if (TARGET_OMIT_LEAF_FRAME_POINTER
3756 && (!current_function_is_leaf))
3757 return 1;
3758
3759 if (current_function_profile)
3760 return 1;
3761
3762 return 0;
3763 }
3764
3765 /* Record that the current function accesses previous call frames. */
3766
3767 void
3768 ix86_setup_frame_addresses (void)
3769 {
3770 cfun->machine->accesses_prev_frame = 1;
3771 }
3772 \f
3773 #if defined(HAVE_GAS_HIDDEN) && defined(SUPPORTS_ONE_ONLY)
3774 # define USE_HIDDEN_LINKONCE 1
3775 #else
3776 # define USE_HIDDEN_LINKONCE 0
3777 #endif
3778
3779 static int pic_labels_used;
3780
3781 /* Fills in the label name that should be used for a pc thunk for
3782 the given register. */
3783
3784 static void
3785 get_pc_thunk_name (char name[32], unsigned int regno)
3786 {
3787 if (USE_HIDDEN_LINKONCE)
3788 sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]);
3789 else
3790 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
3791 }
3792
3793
3794 /* This function generates code for -fpic that loads %ebx with
3795 the return address of the caller and then returns. */
3796
3797 void
3798 ix86_file_end (void)
3799 {
3800 rtx xops[2];
3801 int regno;
3802
3803 for (regno = 0; regno < 8; ++regno)
3804 {
3805 char name[32];
3806
3807 if (! ((pic_labels_used >> regno) & 1))
3808 continue;
3809
3810 get_pc_thunk_name (name, regno);
3811
3812 if (USE_HIDDEN_LINKONCE)
3813 {
3814 tree decl;
3815
3816 decl = build_decl (FUNCTION_DECL, get_identifier (name),
3817 error_mark_node);
3818 TREE_PUBLIC (decl) = 1;
3819 TREE_STATIC (decl) = 1;
3820 DECL_ONE_ONLY (decl) = 1;
3821
3822 (*targetm.asm_out.unique_section) (decl, 0);
3823 named_section (decl, NULL, 0);
3824
3825 (*targetm.asm_out.globalize_label) (asm_out_file, name);
3826 fputs ("\t.hidden\t", asm_out_file);
3827 assemble_name (asm_out_file, name);
3828 fputc ('\n', asm_out_file);
3829 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
3830 }
3831 else
3832 {
3833 text_section ();
3834 ASM_OUTPUT_LABEL (asm_out_file, name);
3835 }
3836
3837 xops[0] = gen_rtx_REG (SImode, regno);
3838 xops[1] = gen_rtx_MEM (SImode, stack_pointer_rtx);
3839 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops);
3840 output_asm_insn ("ret", xops);
3841 }
3842
3843 if (NEED_INDICATE_EXEC_STACK)
3844 file_end_indicate_exec_stack ();
3845 }
3846
3847 /* Emit code for the SET_GOT patterns. */
3848
3849 const char *
3850 output_set_got (rtx dest)
3851 {
3852 rtx xops[3];
3853
3854 xops[0] = dest;
3855 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
3856
3857 if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic)
3858 {
3859 xops[2] = gen_rtx_LABEL_REF (Pmode, gen_label_rtx ());
3860
3861 if (!flag_pic)
3862 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
3863 else
3864 output_asm_insn ("call\t%a2", xops);
3865
3866 #if TARGET_MACHO
3867 /* Output the "canonical" label name ("Lxx$pb") here too. This
3868 is what will be referred to by the Mach-O PIC subsystem. */
3869 ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
3870 #endif
3871 (*targetm.asm_out.internal_label) (asm_out_file, "L",
3872 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
3873
3874 if (flag_pic)
3875 output_asm_insn ("pop{l}\t%0", xops);
3876 }
3877 else
3878 {
3879 char name[32];
3880 get_pc_thunk_name (name, REGNO (dest));
3881 pic_labels_used |= 1 << REGNO (dest);
3882
3883 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
3884 xops[2] = gen_rtx_MEM (QImode, xops[2]);
3885 output_asm_insn ("call\t%X2", xops);
3886 }
3887
3888 if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION)
3889 output_asm_insn ("add{l}\t{%1, %0|%0, %1}", xops);
3890 else if (!TARGET_MACHO)
3891 output_asm_insn ("add{l}\t{%1+[.-%a2], %0|%0, %a1+(.-%a2)}", xops);
3892
3893 return "";
3894 }
3895
3896 /* Generate an "push" pattern for input ARG. */
3897
3898 static rtx
3899 gen_push (rtx arg)
3900 {
3901 return gen_rtx_SET (VOIDmode,
3902 gen_rtx_MEM (Pmode,
3903 gen_rtx_PRE_DEC (Pmode,
3904 stack_pointer_rtx)),
3905 arg);
3906 }
3907
3908 /* Return >= 0 if there is an unused call-clobbered register available
3909 for the entire function. */
3910
3911 static unsigned int
3912 ix86_select_alt_pic_regnum (void)
3913 {
3914 if (current_function_is_leaf && !current_function_profile)
3915 {
3916 int i;
3917 for (i = 2; i >= 0; --i)
3918 if (!regs_ever_live[i])
3919 return i;
3920 }
3921
3922 return INVALID_REGNUM;
3923 }
3924
3925 /* Return 1 if we need to save REGNO. */
3926 static int
3927 ix86_save_reg (unsigned int regno, int maybe_eh_return)
3928 {
3929 if (pic_offset_table_rtx
3930 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
3931 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
3932 || current_function_profile
3933 || current_function_calls_eh_return
3934 || current_function_uses_const_pool))
3935 {
3936 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM)
3937 return 0;
3938 return 1;
3939 }
3940
3941 if (current_function_calls_eh_return && maybe_eh_return)
3942 {
3943 unsigned i;
3944 for (i = 0; ; i++)
3945 {
3946 unsigned test = EH_RETURN_DATA_REGNO (i);
3947 if (test == INVALID_REGNUM)
3948 break;
3949 if (test == regno)
3950 return 1;
3951 }
3952 }
3953
3954 return (regs_ever_live[regno]
3955 && !call_used_regs[regno]
3956 && !fixed_regs[regno]
3957 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
3958 }
3959
3960 /* Return number of registers to be saved on the stack. */
3961
3962 static int
3963 ix86_nsaved_regs (void)
3964 {
3965 int nregs = 0;
3966 int regno;
3967
3968 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
3969 if (ix86_save_reg (regno, true))
3970 nregs++;
3971 return nregs;
3972 }
3973
3974 /* Return the offset between two registers, one to be eliminated, and the other
3975 its replacement, at the start of a routine. */
3976
3977 HOST_WIDE_INT
3978 ix86_initial_elimination_offset (int from, int to)
3979 {
3980 struct ix86_frame frame;
3981 ix86_compute_frame_layout (&frame);
3982
3983 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
3984 return frame.hard_frame_pointer_offset;
3985 else if (from == FRAME_POINTER_REGNUM
3986 && to == HARD_FRAME_POINTER_REGNUM)
3987 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
3988 else
3989 {
3990 if (to != STACK_POINTER_REGNUM)
3991 abort ();
3992 else if (from == ARG_POINTER_REGNUM)
3993 return frame.stack_pointer_offset;
3994 else if (from != FRAME_POINTER_REGNUM)
3995 abort ();
3996 else
3997 return frame.stack_pointer_offset - frame.frame_pointer_offset;
3998 }
3999 }
4000
4001 /* Fill structure ix86_frame about frame of currently computed function. */
4002
4003 static void
4004 ix86_compute_frame_layout (struct ix86_frame *frame)
4005 {
4006 HOST_WIDE_INT total_size;
4007 unsigned int stack_alignment_needed;
4008 HOST_WIDE_INT offset;
4009 unsigned int preferred_alignment;
4010 HOST_WIDE_INT size = get_frame_size ();
4011
4012 frame->nregs = ix86_nsaved_regs ();
4013 total_size = size;
4014
4015 stack_alignment_needed = cfun->stack_alignment_needed / BITS_PER_UNIT;
4016 preferred_alignment = cfun->preferred_stack_boundary / BITS_PER_UNIT;
4017
4018 /* During reload iteration the amount of registers saved can change.
4019 Recompute the value as needed. Do not recompute when amount of registers
4020 didn't change as reload does mutiple calls to the function and does not
4021 expect the decision to change within single iteration. */
4022 if (!optimize_size
4023 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
4024 {
4025 int count = frame->nregs;
4026
4027 cfun->machine->use_fast_prologue_epilogue_nregs = count;
4028 /* The fast prologue uses move instead of push to save registers. This
4029 is significantly longer, but also executes faster as modern hardware
4030 can execute the moves in parallel, but can't do that for push/pop.
4031
4032 Be careful about choosing what prologue to emit: When function takes
4033 many instructions to execute we may use slow version as well as in
4034 case function is known to be outside hot spot (this is known with
4035 feedback only). Weight the size of function by number of registers
4036 to save as it is cheap to use one or two push instructions but very
4037 slow to use many of them. */
4038 if (count)
4039 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
4040 if (cfun->function_frequency < FUNCTION_FREQUENCY_NORMAL
4041 || (flag_branch_probabilities
4042 && cfun->function_frequency < FUNCTION_FREQUENCY_HOT))
4043 cfun->machine->use_fast_prologue_epilogue = false;
4044 else
4045 cfun->machine->use_fast_prologue_epilogue
4046 = !expensive_function_p (count);
4047 }
4048 if (TARGET_PROLOGUE_USING_MOVE
4049 && cfun->machine->use_fast_prologue_epilogue)
4050 frame->save_regs_using_mov = true;
4051 else
4052 frame->save_regs_using_mov = false;
4053
4054
4055 /* Skip return address and saved base pointer. */
4056 offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD;
4057
4058 frame->hard_frame_pointer_offset = offset;
4059
4060 /* Do some sanity checking of stack_alignment_needed and
4061 preferred_alignment, since i386 port is the only using those features
4062 that may break easily. */
4063
4064 if (size && !stack_alignment_needed)
4065 abort ();
4066 if (preferred_alignment < STACK_BOUNDARY / BITS_PER_UNIT)
4067 abort ();
4068 if (preferred_alignment > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
4069 abort ();
4070 if (stack_alignment_needed > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
4071 abort ();
4072
4073 if (stack_alignment_needed < STACK_BOUNDARY / BITS_PER_UNIT)
4074 stack_alignment_needed = STACK_BOUNDARY / BITS_PER_UNIT;
4075
4076 /* Register save area */
4077 offset += frame->nregs * UNITS_PER_WORD;
4078
4079 /* Va-arg area */
4080 if (ix86_save_varrargs_registers)
4081 {
4082 offset += X86_64_VARARGS_SIZE;
4083 frame->va_arg_size = X86_64_VARARGS_SIZE;
4084 }
4085 else
4086 frame->va_arg_size = 0;
4087
4088 /* Align start of frame for local function. */
4089 frame->padding1 = ((offset + stack_alignment_needed - 1)
4090 & -stack_alignment_needed) - offset;
4091
4092 offset += frame->padding1;
4093
4094 /* Frame pointer points here. */
4095 frame->frame_pointer_offset = offset;
4096
4097 offset += size;
4098
4099 /* Add outgoing arguments area. Can be skipped if we eliminated
4100 all the function calls as dead code.
4101 Skipping is however impossible when function calls alloca. Alloca
4102 expander assumes that last current_function_outgoing_args_size
4103 of stack frame are unused. */
4104 if (ACCUMULATE_OUTGOING_ARGS
4105 && (!current_function_is_leaf || current_function_calls_alloca))
4106 {
4107 offset += current_function_outgoing_args_size;
4108 frame->outgoing_arguments_size = current_function_outgoing_args_size;
4109 }
4110 else
4111 frame->outgoing_arguments_size = 0;
4112
4113 /* Align stack boundary. Only needed if we're calling another function
4114 or using alloca. */
4115 if (!current_function_is_leaf || current_function_calls_alloca)
4116 frame->padding2 = ((offset + preferred_alignment - 1)
4117 & -preferred_alignment) - offset;
4118 else
4119 frame->padding2 = 0;
4120
4121 offset += frame->padding2;
4122
4123 /* We've reached end of stack frame. */
4124 frame->stack_pointer_offset = offset;
4125
4126 /* Size prologue needs to allocate. */
4127 frame->to_allocate =
4128 (size + frame->padding1 + frame->padding2
4129 + frame->outgoing_arguments_size + frame->va_arg_size);
4130
4131 if ((!frame->to_allocate && frame->nregs <= 1)
4132 || (TARGET_64BIT && frame->to_allocate >= (HOST_WIDE_INT) 0x80000000))
4133 frame->save_regs_using_mov = false;
4134
4135 if (TARGET_RED_ZONE && current_function_sp_is_unchanging
4136 && current_function_is_leaf)
4137 {
4138 frame->red_zone_size = frame->to_allocate;
4139 if (frame->save_regs_using_mov)
4140 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
4141 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
4142 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
4143 }
4144 else
4145 frame->red_zone_size = 0;
4146 frame->to_allocate -= frame->red_zone_size;
4147 frame->stack_pointer_offset -= frame->red_zone_size;
4148 #if 0
4149 fprintf (stderr, "nregs: %i\n", frame->nregs);
4150 fprintf (stderr, "size: %i\n", size);
4151 fprintf (stderr, "alignment1: %i\n", stack_alignment_needed);
4152 fprintf (stderr, "padding1: %i\n", frame->padding1);
4153 fprintf (stderr, "va_arg: %i\n", frame->va_arg_size);
4154 fprintf (stderr, "padding2: %i\n", frame->padding2);
4155 fprintf (stderr, "to_allocate: %i\n", frame->to_allocate);
4156 fprintf (stderr, "red_zone_size: %i\n", frame->red_zone_size);
4157 fprintf (stderr, "frame_pointer_offset: %i\n", frame->frame_pointer_offset);
4158 fprintf (stderr, "hard_frame_pointer_offset: %i\n",
4159 frame->hard_frame_pointer_offset);
4160 fprintf (stderr, "stack_pointer_offset: %i\n", frame->stack_pointer_offset);
4161 #endif
4162 }
4163
4164 /* Emit code to save registers in the prologue. */
4165
4166 static void
4167 ix86_emit_save_regs (void)
4168 {
4169 int regno;
4170 rtx insn;
4171
4172 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
4173 if (ix86_save_reg (regno, true))
4174 {
4175 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
4176 RTX_FRAME_RELATED_P (insn) = 1;
4177 }
4178 }
4179
4180 /* Emit code to save registers using MOV insns. First register
4181 is restored from POINTER + OFFSET. */
4182 static void
4183 ix86_emit_save_regs_using_mov (rtx pointer, HOST_WIDE_INT offset)
4184 {
4185 int regno;
4186 rtx insn;
4187
4188 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
4189 if (ix86_save_reg (regno, true))
4190 {
4191 insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer),
4192 Pmode, offset),
4193 gen_rtx_REG (Pmode, regno));
4194 RTX_FRAME_RELATED_P (insn) = 1;
4195 offset += UNITS_PER_WORD;
4196 }
4197 }
4198
4199 /* Expand prologue or epilogue stack adjustment.
4200 The pattern exist to put a dependency on all ebp-based memory accesses.
4201 STYLE should be negative if instructions should be marked as frame related,
4202 zero if %r11 register is live and cannot be freely used and positive
4203 otherwise. */
4204
4205 static void
4206 pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset, int style)
4207 {
4208 rtx insn;
4209
4210 if (! TARGET_64BIT)
4211 insn = emit_insn (gen_pro_epilogue_adjust_stack_1 (dest, src, offset));
4212 else if (x86_64_immediate_operand (offset, DImode))
4213 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64 (dest, src, offset));
4214 else
4215 {
4216 rtx r11;
4217 /* r11 is used by indirect sibcall return as well, set before the
4218 epilogue and used after the epilogue. ATM indirect sibcall
4219 shouldn't be used together with huge frame sizes in one
4220 function because of the frame_size check in sibcall.c. */
4221 if (style == 0)
4222 abort ();
4223 r11 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
4224 insn = emit_insn (gen_rtx_SET (DImode, r11, offset));
4225 if (style < 0)
4226 RTX_FRAME_RELATED_P (insn) = 1;
4227 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64_2 (dest, src, r11,
4228 offset));
4229 }
4230 if (style < 0)
4231 RTX_FRAME_RELATED_P (insn) = 1;
4232 }
4233
4234 /* Expand the prologue into a bunch of separate insns. */
4235
4236 void
4237 ix86_expand_prologue (void)
4238 {
4239 rtx insn;
4240 bool pic_reg_used;
4241 struct ix86_frame frame;
4242 HOST_WIDE_INT allocate;
4243
4244 ix86_compute_frame_layout (&frame);
4245
4246 /* Note: AT&T enter does NOT have reversed args. Enter is probably
4247 slower on all targets. Also sdb doesn't like it. */
4248
4249 if (frame_pointer_needed)
4250 {
4251 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
4252 RTX_FRAME_RELATED_P (insn) = 1;
4253
4254 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
4255 RTX_FRAME_RELATED_P (insn) = 1;
4256 }
4257
4258 allocate = frame.to_allocate;
4259
4260 if (!frame.save_regs_using_mov)
4261 ix86_emit_save_regs ();
4262 else
4263 allocate += frame.nregs * UNITS_PER_WORD;
4264
4265 /* When using red zone we may start register saving before allocating
4266 the stack frame saving one cycle of the prologue. */
4267 if (TARGET_RED_ZONE && frame.save_regs_using_mov)
4268 ix86_emit_save_regs_using_mov (frame_pointer_needed ? hard_frame_pointer_rtx
4269 : stack_pointer_rtx,
4270 -frame.nregs * UNITS_PER_WORD);
4271
4272 if (allocate == 0)
4273 ;
4274 else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)
4275 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
4276 GEN_INT (-allocate), -1);
4277 else
4278 {
4279 /* Only valid for Win32. */
4280 rtx eax = gen_rtx_REG (SImode, 0);
4281 bool eax_live = ix86_eax_live_at_start_p ();
4282
4283 if (TARGET_64BIT)
4284 abort ();
4285
4286 if (eax_live)
4287 {
4288 emit_insn (gen_push (eax));
4289 allocate -= 4;
4290 }
4291
4292 insn = emit_move_insn (eax, GEN_INT (allocate));
4293 RTX_FRAME_RELATED_P (insn) = 1;
4294
4295 insn = emit_insn (gen_allocate_stack_worker (eax));
4296 RTX_FRAME_RELATED_P (insn) = 1;
4297
4298 if (eax_live)
4299 {
4300 rtx t;
4301 if (frame_pointer_needed)
4302 t = plus_constant (hard_frame_pointer_rtx,
4303 allocate
4304 - frame.to_allocate
4305 - frame.nregs * UNITS_PER_WORD);
4306 else
4307 t = plus_constant (stack_pointer_rtx, allocate);
4308 emit_move_insn (eax, gen_rtx_MEM (SImode, t));
4309 }
4310 }
4311
4312 if (frame.save_regs_using_mov && !TARGET_RED_ZONE)
4313 {
4314 if (!frame_pointer_needed || !frame.to_allocate)
4315 ix86_emit_save_regs_using_mov (stack_pointer_rtx, frame.to_allocate);
4316 else
4317 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx,
4318 -frame.nregs * UNITS_PER_WORD);
4319 }
4320
4321 pic_reg_used = false;
4322 if (pic_offset_table_rtx
4323 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
4324 || current_function_profile))
4325 {
4326 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
4327
4328 if (alt_pic_reg_used != INVALID_REGNUM)
4329 REGNO (pic_offset_table_rtx) = alt_pic_reg_used;
4330
4331 pic_reg_used = true;
4332 }
4333
4334 if (pic_reg_used)
4335 {
4336 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
4337
4338 /* Even with accurate pre-reload life analysis, we can wind up
4339 deleting all references to the pic register after reload.
4340 Consider if cross-jumping unifies two sides of a branch
4341 controlled by a comparison vs the only read from a global.
4342 In which case, allow the set_got to be deleted, though we're
4343 too late to do anything about the ebx save in the prologue. */
4344 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx, NULL);
4345 }
4346
4347 /* Prevent function calls from be scheduled before the call to mcount.
4348 In the pic_reg_used case, make sure that the got load isn't deleted. */
4349 if (current_function_profile)
4350 emit_insn (gen_blockage (pic_reg_used ? pic_offset_table_rtx : const0_rtx));
4351 }
4352
4353 /* Emit code to restore saved registers using MOV insns. First register
4354 is restored from POINTER + OFFSET. */
4355 static void
4356 ix86_emit_restore_regs_using_mov (rtx pointer, HOST_WIDE_INT offset,
4357 int maybe_eh_return)
4358 {
4359 int regno;
4360 rtx base_address = gen_rtx_MEM (Pmode, pointer);
4361
4362 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
4363 if (ix86_save_reg (regno, maybe_eh_return))
4364 {
4365 /* Ensure that adjust_address won't be forced to produce pointer
4366 out of range allowed by x86-64 instruction set. */
4367 if (TARGET_64BIT && offset != trunc_int_for_mode (offset, SImode))
4368 {
4369 rtx r11;
4370
4371 r11 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
4372 emit_move_insn (r11, GEN_INT (offset));
4373 emit_insn (gen_adddi3 (r11, r11, pointer));
4374 base_address = gen_rtx_MEM (Pmode, r11);
4375 offset = 0;
4376 }
4377 emit_move_insn (gen_rtx_REG (Pmode, regno),
4378 adjust_address (base_address, Pmode, offset));
4379 offset += UNITS_PER_WORD;
4380 }
4381 }
4382
4383 /* Restore function stack, frame, and registers. */
4384
4385 void
4386 ix86_expand_epilogue (int style)
4387 {
4388 int regno;
4389 int sp_valid = !frame_pointer_needed || current_function_sp_is_unchanging;
4390 struct ix86_frame frame;
4391 HOST_WIDE_INT offset;
4392
4393 ix86_compute_frame_layout (&frame);
4394
4395 /* Calculate start of saved registers relative to ebp. Special care
4396 must be taken for the normal return case of a function using
4397 eh_return: the eax and edx registers are marked as saved, but not
4398 restored along this path. */
4399 offset = frame.nregs;
4400 if (current_function_calls_eh_return && style != 2)
4401 offset -= 2;
4402 offset *= -UNITS_PER_WORD;
4403
4404 /* If we're only restoring one register and sp is not valid then
4405 using a move instruction to restore the register since it's
4406 less work than reloading sp and popping the register.
4407
4408 The default code result in stack adjustment using add/lea instruction,
4409 while this code results in LEAVE instruction (or discrete equivalent),
4410 so it is profitable in some other cases as well. Especially when there
4411 are no registers to restore. We also use this code when TARGET_USE_LEAVE
4412 and there is exactly one register to pop. This heuristic may need some
4413 tuning in future. */
4414 if ((!sp_valid && frame.nregs <= 1)
4415 || (TARGET_EPILOGUE_USING_MOVE
4416 && cfun->machine->use_fast_prologue_epilogue
4417 && (frame.nregs > 1 || frame.to_allocate))
4418 || (frame_pointer_needed && !frame.nregs && frame.to_allocate)
4419 || (frame_pointer_needed && TARGET_USE_LEAVE
4420 && cfun->machine->use_fast_prologue_epilogue
4421 && frame.nregs == 1)
4422 || current_function_calls_eh_return)
4423 {
4424 /* Restore registers. We can use ebp or esp to address the memory
4425 locations. If both are available, default to ebp, since offsets
4426 are known to be small. Only exception is esp pointing directly to the
4427 end of block of saved registers, where we may simplify addressing
4428 mode. */
4429
4430 if (!frame_pointer_needed || (sp_valid && !frame.to_allocate))
4431 ix86_emit_restore_regs_using_mov (stack_pointer_rtx,
4432 frame.to_allocate, style == 2);
4433 else
4434 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx,
4435 offset, style == 2);
4436
4437 /* eh_return epilogues need %ecx added to the stack pointer. */
4438 if (style == 2)
4439 {
4440 rtx tmp, sa = EH_RETURN_STACKADJ_RTX;
4441
4442 if (frame_pointer_needed)
4443 {
4444 tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
4445 tmp = plus_constant (tmp, UNITS_PER_WORD);
4446 emit_insn (gen_rtx_SET (VOIDmode, sa, tmp));
4447
4448 tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx);
4449 emit_move_insn (hard_frame_pointer_rtx, tmp);
4450
4451 pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
4452 const0_rtx, style);
4453 }
4454 else
4455 {
4456 tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
4457 tmp = plus_constant (tmp, (frame.to_allocate
4458 + frame.nregs * UNITS_PER_WORD));
4459 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp));
4460 }
4461 }
4462 else if (!frame_pointer_needed)
4463 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
4464 GEN_INT (frame.to_allocate
4465 + frame.nregs * UNITS_PER_WORD),
4466 style);
4467 /* If not an i386, mov & pop is faster than "leave". */
4468 else if (TARGET_USE_LEAVE || optimize_size
4469 || !cfun->machine->use_fast_prologue_epilogue)
4470 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
4471 else
4472 {
4473 pro_epilogue_adjust_stack (stack_pointer_rtx,
4474 hard_frame_pointer_rtx,
4475 const0_rtx, style);
4476 if (TARGET_64BIT)
4477 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
4478 else
4479 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
4480 }
4481 }
4482 else
4483 {
4484 /* First step is to deallocate the stack frame so that we can
4485 pop the registers. */
4486 if (!sp_valid)
4487 {
4488 if (!frame_pointer_needed)
4489 abort ();
4490 pro_epilogue_adjust_stack (stack_pointer_rtx,
4491 hard_frame_pointer_rtx,
4492 GEN_INT (offset), style);
4493 }
4494 else if (frame.to_allocate)
4495 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
4496 GEN_INT (frame.to_allocate), style);
4497
4498 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
4499 if (ix86_save_reg (regno, false))
4500 {
4501 if (TARGET_64BIT)
4502 emit_insn (gen_popdi1 (gen_rtx_REG (Pmode, regno)));
4503 else
4504 emit_insn (gen_popsi1 (gen_rtx_REG (Pmode, regno)));
4505 }
4506 if (frame_pointer_needed)
4507 {
4508 /* Leave results in shorter dependency chains on CPUs that are
4509 able to grok it fast. */
4510 if (TARGET_USE_LEAVE)
4511 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
4512 else if (TARGET_64BIT)
4513 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
4514 else
4515 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
4516 }
4517 }
4518
4519 /* Sibcall epilogues don't want a return instruction. */
4520 if (style == 0)
4521 return;
4522
4523 if (current_function_pops_args && current_function_args_size)
4524 {
4525 rtx popc = GEN_INT (current_function_pops_args);
4526
4527 /* i386 can only pop 64K bytes. If asked to pop more, pop
4528 return address, do explicit add, and jump indirectly to the
4529 caller. */
4530
4531 if (current_function_pops_args >= 65536)
4532 {
4533 rtx ecx = gen_rtx_REG (SImode, 2);
4534
4535 /* There is no "pascal" calling convention in 64bit ABI. */
4536 if (TARGET_64BIT)
4537 abort ();
4538
4539 emit_insn (gen_popsi1 (ecx));
4540 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
4541 emit_jump_insn (gen_return_indirect_internal (ecx));
4542 }
4543 else
4544 emit_jump_insn (gen_return_pop_internal (popc));
4545 }
4546 else
4547 emit_jump_insn (gen_return_internal ());
4548 }
4549
4550 /* Reset from the function's potential modifications. */
4551
4552 static void
4553 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
4554 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
4555 {
4556 if (pic_offset_table_rtx)
4557 REGNO (pic_offset_table_rtx) = REAL_PIC_OFFSET_TABLE_REGNUM;
4558 }
4559 \f
4560 /* Extract the parts of an RTL expression that is a valid memory address
4561 for an instruction. Return 0 if the structure of the address is
4562 grossly off. Return -1 if the address contains ASHIFT, so it is not
4563 strictly valid, but still used for computing length of lea instruction. */
4564
4565 int
4566 ix86_decompose_address (rtx addr, struct ix86_address *out)
4567 {
4568 rtx base = NULL_RTX;
4569 rtx index = NULL_RTX;
4570 rtx disp = NULL_RTX;
4571 HOST_WIDE_INT scale = 1;
4572 rtx scale_rtx = NULL_RTX;
4573 int retval = 1;
4574 enum ix86_address_seg seg = SEG_DEFAULT;
4575
4576 if (GET_CODE (addr) == REG || GET_CODE (addr) == SUBREG)
4577 base = addr;
4578 else if (GET_CODE (addr) == PLUS)
4579 {
4580 rtx addends[4], op;
4581 int n = 0, i;
4582
4583 op = addr;
4584 do
4585 {
4586 if (n >= 4)
4587 return 0;
4588 addends[n++] = XEXP (op, 1);
4589 op = XEXP (op, 0);
4590 }
4591 while (GET_CODE (op) == PLUS);
4592 if (n >= 4)
4593 return 0;
4594 addends[n] = op;
4595
4596 for (i = n; i >= 0; --i)
4597 {
4598 op = addends[i];
4599 switch (GET_CODE (op))
4600 {
4601 case MULT:
4602 if (index)
4603 return 0;
4604 index = XEXP (op, 0);
4605 scale_rtx = XEXP (op, 1);
4606 break;
4607
4608 case UNSPEC:
4609 if (XINT (op, 1) == UNSPEC_TP
4610 && TARGET_TLS_DIRECT_SEG_REFS
4611 && seg == SEG_DEFAULT)
4612 seg = TARGET_64BIT ? SEG_FS : SEG_GS;
4613 else
4614 return 0;
4615 break;
4616
4617 case REG:
4618 case SUBREG:
4619 if (!base)
4620 base = op;
4621 else if (!index)
4622 index = op;
4623 else
4624 return 0;
4625 break;
4626
4627 case CONST:
4628 case CONST_INT:
4629 case SYMBOL_REF:
4630 case LABEL_REF:
4631 if (disp)
4632 return 0;
4633 disp = op;
4634 break;
4635
4636 default:
4637 return 0;
4638 }
4639 }
4640 }
4641 else if (GET_CODE (addr) == MULT)
4642 {
4643 index = XEXP (addr, 0); /* index*scale */
4644 scale_rtx = XEXP (addr, 1);
4645 }
4646 else if (GET_CODE (addr) == ASHIFT)
4647 {
4648 rtx tmp;
4649
4650 /* We're called for lea too, which implements ashift on occasion. */
4651 index = XEXP (addr, 0);
4652 tmp = XEXP (addr, 1);
4653 if (GET_CODE (tmp) != CONST_INT)
4654 return 0;
4655 scale = INTVAL (tmp);
4656 if ((unsigned HOST_WIDE_INT) scale > 3)
4657 return 0;
4658 scale = 1 << scale;
4659 retval = -1;
4660 }
4661 else
4662 disp = addr; /* displacement */
4663
4664 /* Extract the integral value of scale. */
4665 if (scale_rtx)
4666 {
4667 if (GET_CODE (scale_rtx) != CONST_INT)
4668 return 0;
4669 scale = INTVAL (scale_rtx);
4670 }
4671
4672 /* Allow arg pointer and stack pointer as index if there is not scaling. */
4673 if (base && index && scale == 1
4674 && (index == arg_pointer_rtx
4675 || index == frame_pointer_rtx
4676 || (REG_P (index) && REGNO (index) == STACK_POINTER_REGNUM)))
4677 {
4678 rtx tmp = base;
4679 base = index;
4680 index = tmp;
4681 }
4682
4683 /* Special case: %ebp cannot be encoded as a base without a displacement. */
4684 if ((base == hard_frame_pointer_rtx
4685 || base == frame_pointer_rtx
4686 || base == arg_pointer_rtx) && !disp)
4687 disp = const0_rtx;
4688
4689 /* Special case: on K6, [%esi] makes the instruction vector decoded.
4690 Avoid this by transforming to [%esi+0]. */
4691 if (ix86_tune == PROCESSOR_K6 && !optimize_size
4692 && base && !index && !disp
4693 && REG_P (base)
4694 && REGNO_REG_CLASS (REGNO (base)) == SIREG)
4695 disp = const0_rtx;
4696
4697 /* Special case: encode reg+reg instead of reg*2. */
4698 if (!base && index && scale && scale == 2)
4699 base = index, scale = 1;
4700
4701 /* Special case: scaling cannot be encoded without base or displacement. */
4702 if (!base && !disp && index && scale != 1)
4703 disp = const0_rtx;
4704
4705 out->base = base;
4706 out->index = index;
4707 out->disp = disp;
4708 out->scale = scale;
4709 out->seg = seg;
4710
4711 return retval;
4712 }
4713 \f
4714 /* Return cost of the memory address x.
4715 For i386, it is better to use a complex address than let gcc copy
4716 the address into a reg and make a new pseudo. But not if the address
4717 requires to two regs - that would mean more pseudos with longer
4718 lifetimes. */
4719 static int
4720 ix86_address_cost (rtx x)
4721 {
4722 struct ix86_address parts;
4723 int cost = 1;
4724
4725 if (!ix86_decompose_address (x, &parts))
4726 abort ();
4727
4728 /* More complex memory references are better. */
4729 if (parts.disp && parts.disp != const0_rtx)
4730 cost--;
4731 if (parts.seg != SEG_DEFAULT)
4732 cost--;
4733
4734 /* Attempt to minimize number of registers in the address. */
4735 if ((parts.base
4736 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
4737 || (parts.index
4738 && (!REG_P (parts.index)
4739 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
4740 cost++;
4741
4742 if (parts.base
4743 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
4744 && parts.index
4745 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
4746 && parts.base != parts.index)
4747 cost++;
4748
4749 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
4750 since it's predecode logic can't detect the length of instructions
4751 and it degenerates to vector decoded. Increase cost of such
4752 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
4753 to split such addresses or even refuse such addresses at all.
4754
4755 Following addressing modes are affected:
4756 [base+scale*index]
4757 [scale*index+disp]
4758 [base+index]
4759
4760 The first and last case may be avoidable by explicitly coding the zero in
4761 memory address, but I don't have AMD-K6 machine handy to check this
4762 theory. */
4763
4764 if (TARGET_K6
4765 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
4766 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
4767 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
4768 cost += 10;
4769
4770 return cost;
4771 }
4772 \f
4773 /* If X is a machine specific address (i.e. a symbol or label being
4774 referenced as a displacement from the GOT implemented using an
4775 UNSPEC), then return the base term. Otherwise return X. */
4776
4777 rtx
4778 ix86_find_base_term (rtx x)
4779 {
4780 rtx term;
4781
4782 if (TARGET_64BIT)
4783 {
4784 if (GET_CODE (x) != CONST)
4785 return x;
4786 term = XEXP (x, 0);
4787 if (GET_CODE (term) == PLUS
4788 && (GET_CODE (XEXP (term, 1)) == CONST_INT
4789 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
4790 term = XEXP (term, 0);
4791 if (GET_CODE (term) != UNSPEC
4792 || XINT (term, 1) != UNSPEC_GOTPCREL)
4793 return x;
4794
4795 term = XVECEXP (term, 0, 0);
4796
4797 if (GET_CODE (term) != SYMBOL_REF
4798 && GET_CODE (term) != LABEL_REF)
4799 return x;
4800
4801 return term;
4802 }
4803
4804 term = ix86_delegitimize_address (x);
4805
4806 if (GET_CODE (term) != SYMBOL_REF
4807 && GET_CODE (term) != LABEL_REF)
4808 return x;
4809
4810 return term;
4811 }
4812
4813 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
4814 this is used for to form addresses to local data when -fPIC is in
4815 use. */
4816
4817 static bool
4818 darwin_local_data_pic (rtx disp)
4819 {
4820 if (GET_CODE (disp) == MINUS)
4821 {
4822 if (GET_CODE (XEXP (disp, 0)) == LABEL_REF
4823 || GET_CODE (XEXP (disp, 0)) == SYMBOL_REF)
4824 if (GET_CODE (XEXP (disp, 1)) == SYMBOL_REF)
4825 {
4826 const char *sym_name = XSTR (XEXP (disp, 1), 0);
4827 if (! strcmp (sym_name, "<pic base>"))
4828 return true;
4829 }
4830 }
4831
4832 return false;
4833 }
4834 \f
4835 /* Determine if a given RTX is a valid constant. We already know this
4836 satisfies CONSTANT_P. */
4837
4838 bool
4839 legitimate_constant_p (rtx x)
4840 {
4841 switch (GET_CODE (x))
4842 {
4843 case CONST:
4844 x = XEXP (x, 0);
4845
4846 if (GET_CODE (x) == PLUS)
4847 {
4848 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
4849 return false;
4850 x = XEXP (x, 0);
4851 }
4852
4853 if (TARGET_MACHO && darwin_local_data_pic (x))
4854 return true;
4855
4856 /* Only some unspecs are valid as "constants". */
4857 if (GET_CODE (x) == UNSPEC)
4858 switch (XINT (x, 1))
4859 {
4860 case UNSPEC_TPOFF:
4861 case UNSPEC_NTPOFF:
4862 return local_exec_symbolic_operand (XVECEXP (x, 0, 0), Pmode);
4863 case UNSPEC_DTPOFF:
4864 return local_dynamic_symbolic_operand (XVECEXP (x, 0, 0), Pmode);
4865 default:
4866 return false;
4867 }
4868
4869 /* We must have drilled down to a symbol. */
4870 if (!symbolic_operand (x, Pmode))
4871 return false;
4872 /* FALLTHRU */
4873
4874 case SYMBOL_REF:
4875 /* TLS symbols are never valid. */
4876 if (tls_symbolic_operand (x, Pmode))
4877 return false;
4878 break;
4879
4880 default:
4881 break;
4882 }
4883
4884 /* Otherwise we handle everything else in the move patterns. */
4885 return true;
4886 }
4887
4888 /* Determine if it's legal to put X into the constant pool. This
4889 is not possible for the address of thread-local symbols, which
4890 is checked above. */
4891
4892 static bool
4893 ix86_cannot_force_const_mem (rtx x)
4894 {
4895 return !legitimate_constant_p (x);
4896 }
4897
4898 /* Determine if a given RTX is a valid constant address. */
4899
4900 bool
4901 constant_address_p (rtx x)
4902 {
4903 return CONSTANT_P (x) && legitimate_address_p (Pmode, x, 1);
4904 }
4905
4906 /* Nonzero if the constant value X is a legitimate general operand
4907 when generating PIC code. It is given that flag_pic is on and
4908 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
4909
4910 bool
4911 legitimate_pic_operand_p (rtx x)
4912 {
4913 rtx inner;
4914
4915 switch (GET_CODE (x))
4916 {
4917 case CONST:
4918 inner = XEXP (x, 0);
4919
4920 /* Only some unspecs are valid as "constants". */
4921 if (GET_CODE (inner) == UNSPEC)
4922 switch (XINT (inner, 1))
4923 {
4924 case UNSPEC_TPOFF:
4925 return local_exec_symbolic_operand (XVECEXP (inner, 0, 0), Pmode);
4926 default:
4927 return false;
4928 }
4929 /* FALLTHRU */
4930
4931 case SYMBOL_REF:
4932 case LABEL_REF:
4933 return legitimate_pic_address_disp_p (x);
4934
4935 default:
4936 return true;
4937 }
4938 }
4939
4940 /* Determine if a given CONST RTX is a valid memory displacement
4941 in PIC mode. */
4942
4943 int
4944 legitimate_pic_address_disp_p (rtx disp)
4945 {
4946 bool saw_plus;
4947
4948 /* In 64bit mode we can allow direct addresses of symbols and labels
4949 when they are not dynamic symbols. */
4950 if (TARGET_64BIT)
4951 {
4952 /* TLS references should always be enclosed in UNSPEC. */
4953 if (tls_symbolic_operand (disp, GET_MODE (disp)))
4954 return 0;
4955 if (GET_CODE (disp) == SYMBOL_REF
4956 && ix86_cmodel == CM_SMALL_PIC
4957 && SYMBOL_REF_LOCAL_P (disp))
4958 return 1;
4959 if (GET_CODE (disp) == LABEL_REF)
4960 return 1;
4961 if (GET_CODE (disp) == CONST
4962 && GET_CODE (XEXP (disp, 0)) == PLUS)
4963 {
4964 rtx op0 = XEXP (XEXP (disp, 0), 0);
4965 rtx op1 = XEXP (XEXP (disp, 0), 1);
4966
4967 /* TLS references should always be enclosed in UNSPEC. */
4968 if (tls_symbolic_operand (op0, GET_MODE (op0)))
4969 return 0;
4970 if (((GET_CODE (op0) == SYMBOL_REF
4971 && ix86_cmodel == CM_SMALL_PIC
4972 && SYMBOL_REF_LOCAL_P (op0))
4973 || GET_CODE (op0) == LABEL_REF)
4974 && GET_CODE (op1) == CONST_INT
4975 && INTVAL (op1) < 16*1024*1024
4976 && INTVAL (op1) >= -16*1024*1024)
4977 return 1;
4978 }
4979 }
4980 if (GET_CODE (disp) != CONST)
4981 return 0;
4982 disp = XEXP (disp, 0);
4983
4984 if (TARGET_64BIT)
4985 {
4986 /* We are unsafe to allow PLUS expressions. This limit allowed distance
4987 of GOT tables. We should not need these anyway. */
4988 if (GET_CODE (disp) != UNSPEC
4989 || XINT (disp, 1) != UNSPEC_GOTPCREL)
4990 return 0;
4991
4992 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
4993 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
4994 return 0;
4995 return 1;
4996 }
4997
4998 saw_plus = false;
4999 if (GET_CODE (disp) == PLUS)
5000 {
5001 if (GET_CODE (XEXP (disp, 1)) != CONST_INT)
5002 return 0;
5003 disp = XEXP (disp, 0);
5004 saw_plus = true;
5005 }
5006
5007 if (TARGET_MACHO && darwin_local_data_pic (disp))
5008 return 1;
5009
5010 if (GET_CODE (disp) != UNSPEC)
5011 return 0;
5012
5013 switch (XINT (disp, 1))
5014 {
5015 case UNSPEC_GOT:
5016 if (saw_plus)
5017 return false;
5018 return GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF;
5019 case UNSPEC_GOTOFF:
5020 if (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
5021 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
5022 return local_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5023 return false;
5024 case UNSPEC_GOTTPOFF:
5025 case UNSPEC_GOTNTPOFF:
5026 case UNSPEC_INDNTPOFF:
5027 if (saw_plus)
5028 return false;
5029 return initial_exec_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5030 case UNSPEC_NTPOFF:
5031 return local_exec_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5032 case UNSPEC_DTPOFF:
5033 return local_dynamic_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5034 }
5035
5036 return 0;
5037 }
5038
5039 /* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
5040 memory address for an instruction. The MODE argument is the machine mode
5041 for the MEM expression that wants to use this address.
5042
5043 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
5044 convert common non-canonical forms to canonical form so that they will
5045 be recognized. */
5046
5047 int
5048 legitimate_address_p (enum machine_mode mode, rtx addr, int strict)
5049 {
5050 struct ix86_address parts;
5051 rtx base, index, disp;
5052 HOST_WIDE_INT scale;
5053 const char *reason = NULL;
5054 rtx reason_rtx = NULL_RTX;
5055
5056 if (TARGET_DEBUG_ADDR)
5057 {
5058 fprintf (stderr,
5059 "\n======\nGO_IF_LEGITIMATE_ADDRESS, mode = %s, strict = %d\n",
5060 GET_MODE_NAME (mode), strict);
5061 debug_rtx (addr);
5062 }
5063
5064 if (ix86_decompose_address (addr, &parts) <= 0)
5065 {
5066 reason = "decomposition failed";
5067 goto report_error;
5068 }
5069
5070 base = parts.base;
5071 index = parts.index;
5072 disp = parts.disp;
5073 scale = parts.scale;
5074
5075 /* Validate base register.
5076
5077 Don't allow SUBREG's here, it can lead to spill failures when the base
5078 is one word out of a two word structure, which is represented internally
5079 as a DImode int. */
5080
5081 if (base)
5082 {
5083 reason_rtx = base;
5084
5085 if (GET_CODE (base) != REG)
5086 {
5087 reason = "base is not a register";
5088 goto report_error;
5089 }
5090
5091 if (GET_MODE (base) != Pmode)
5092 {
5093 reason = "base is not in Pmode";
5094 goto report_error;
5095 }
5096
5097 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (base))
5098 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (base)))
5099 {
5100 reason = "base is not valid";
5101 goto report_error;
5102 }
5103 }
5104
5105 /* Validate index register.
5106
5107 Don't allow SUBREG's here, it can lead to spill failures when the index
5108 is one word out of a two word structure, which is represented internally
5109 as a DImode int. */
5110
5111 if (index)
5112 {
5113 reason_rtx = index;
5114
5115 if (GET_CODE (index) != REG)
5116 {
5117 reason = "index is not a register";
5118 goto report_error;
5119 }
5120
5121 if (GET_MODE (index) != Pmode)
5122 {
5123 reason = "index is not in Pmode";
5124 goto report_error;
5125 }
5126
5127 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (index))
5128 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (index)))
5129 {
5130 reason = "index is not valid";
5131 goto report_error;
5132 }
5133 }
5134
5135 /* Validate scale factor. */
5136 if (scale != 1)
5137 {
5138 reason_rtx = GEN_INT (scale);
5139 if (!index)
5140 {
5141 reason = "scale without index";
5142 goto report_error;
5143 }
5144
5145 if (scale != 2 && scale != 4 && scale != 8)
5146 {
5147 reason = "scale is not a valid multiplier";
5148 goto report_error;
5149 }
5150 }
5151
5152 /* Validate displacement. */
5153 if (disp)
5154 {
5155 reason_rtx = disp;
5156
5157 if (GET_CODE (disp) == CONST
5158 && GET_CODE (XEXP (disp, 0)) == UNSPEC)
5159 switch (XINT (XEXP (disp, 0), 1))
5160 {
5161 case UNSPEC_GOT:
5162 case UNSPEC_GOTOFF:
5163 case UNSPEC_GOTPCREL:
5164 if (!flag_pic)
5165 abort ();
5166 goto is_legitimate_pic;
5167
5168 case UNSPEC_GOTTPOFF:
5169 case UNSPEC_GOTNTPOFF:
5170 case UNSPEC_INDNTPOFF:
5171 case UNSPEC_NTPOFF:
5172 case UNSPEC_DTPOFF:
5173 break;
5174
5175 default:
5176 reason = "invalid address unspec";
5177 goto report_error;
5178 }
5179
5180 else if (flag_pic && (SYMBOLIC_CONST (disp)
5181 #if TARGET_MACHO
5182 && !machopic_operand_p (disp)
5183 #endif
5184 ))
5185 {
5186 is_legitimate_pic:
5187 if (TARGET_64BIT && (index || base))
5188 {
5189 /* foo@dtpoff(%rX) is ok. */
5190 if (GET_CODE (disp) != CONST
5191 || GET_CODE (XEXP (disp, 0)) != PLUS
5192 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
5193 || GET_CODE (XEXP (XEXP (disp, 0), 1)) != CONST_INT
5194 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
5195 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
5196 {
5197 reason = "non-constant pic memory reference";
5198 goto report_error;
5199 }
5200 }
5201 else if (! legitimate_pic_address_disp_p (disp))
5202 {
5203 reason = "displacement is an invalid pic construct";
5204 goto report_error;
5205 }
5206
5207 /* This code used to verify that a symbolic pic displacement
5208 includes the pic_offset_table_rtx register.
5209
5210 While this is good idea, unfortunately these constructs may
5211 be created by "adds using lea" optimization for incorrect
5212 code like:
5213
5214 int a;
5215 int foo(int i)
5216 {
5217 return *(&a+i);
5218 }
5219
5220 This code is nonsensical, but results in addressing
5221 GOT table with pic_offset_table_rtx base. We can't
5222 just refuse it easily, since it gets matched by
5223 "addsi3" pattern, that later gets split to lea in the
5224 case output register differs from input. While this
5225 can be handled by separate addsi pattern for this case
5226 that never results in lea, this seems to be easier and
5227 correct fix for crash to disable this test. */
5228 }
5229 else if (GET_CODE (disp) != LABEL_REF
5230 && GET_CODE (disp) != CONST_INT
5231 && (GET_CODE (disp) != CONST
5232 || !legitimate_constant_p (disp))
5233 && (GET_CODE (disp) != SYMBOL_REF
5234 || !legitimate_constant_p (disp)))
5235 {
5236 reason = "displacement is not constant";
5237 goto report_error;
5238 }
5239 else if (TARGET_64BIT
5240 && !x86_64_immediate_operand (disp, VOIDmode))
5241 {
5242 reason = "displacement is out of range";
5243 goto report_error;
5244 }
5245 }
5246
5247 /* Everything looks valid. */
5248 if (TARGET_DEBUG_ADDR)
5249 fprintf (stderr, "Success.\n");
5250 return TRUE;
5251
5252 report_error:
5253 if (TARGET_DEBUG_ADDR)
5254 {
5255 fprintf (stderr, "Error: %s\n", reason);
5256 debug_rtx (reason_rtx);
5257 }
5258 return FALSE;
5259 }
5260 \f
5261 /* Return an unique alias set for the GOT. */
5262
5263 static HOST_WIDE_INT
5264 ix86_GOT_alias_set (void)
5265 {
5266 static HOST_WIDE_INT set = -1;
5267 if (set == -1)
5268 set = new_alias_set ();
5269 return set;
5270 }
5271
5272 /* Return a legitimate reference for ORIG (an address) using the
5273 register REG. If REG is 0, a new pseudo is generated.
5274
5275 There are two types of references that must be handled:
5276
5277 1. Global data references must load the address from the GOT, via
5278 the PIC reg. An insn is emitted to do this load, and the reg is
5279 returned.
5280
5281 2. Static data references, constant pool addresses, and code labels
5282 compute the address as an offset from the GOT, whose base is in
5283 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
5284 differentiate them from global data objects. The returned
5285 address is the PIC reg + an unspec constant.
5286
5287 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
5288 reg also appears in the address. */
5289
5290 static rtx
5291 legitimize_pic_address (rtx orig, rtx reg)
5292 {
5293 rtx addr = orig;
5294 rtx new = orig;
5295 rtx base;
5296
5297 #if TARGET_MACHO
5298 if (reg == 0)
5299 reg = gen_reg_rtx (Pmode);
5300 /* Use the generic Mach-O PIC machinery. */
5301 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
5302 #endif
5303
5304 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
5305 new = addr;
5306 else if (!TARGET_64BIT && local_symbolic_operand (addr, Pmode))
5307 {
5308 /* This symbol may be referenced via a displacement from the PIC
5309 base address (@GOTOFF). */
5310
5311 if (reload_in_progress)
5312 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
5313 if (GET_CODE (addr) == CONST)
5314 addr = XEXP (addr, 0);
5315 if (GET_CODE (addr) == PLUS)
5316 {
5317 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)), UNSPEC_GOTOFF);
5318 new = gen_rtx_PLUS (Pmode, new, XEXP (addr, 1));
5319 }
5320 else
5321 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
5322 new = gen_rtx_CONST (Pmode, new);
5323 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
5324
5325 if (reg != 0)
5326 {
5327 emit_move_insn (reg, new);
5328 new = reg;
5329 }
5330 }
5331 else if (GET_CODE (addr) == SYMBOL_REF)
5332 {
5333 if (TARGET_64BIT)
5334 {
5335 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
5336 new = gen_rtx_CONST (Pmode, new);
5337 new = gen_const_mem (Pmode, new);
5338 set_mem_alias_set (new, ix86_GOT_alias_set ());
5339
5340 if (reg == 0)
5341 reg = gen_reg_rtx (Pmode);
5342 /* Use directly gen_movsi, otherwise the address is loaded
5343 into register for CSE. We don't want to CSE this addresses,
5344 instead we CSE addresses from the GOT table, so skip this. */
5345 emit_insn (gen_movsi (reg, new));
5346 new = reg;
5347 }
5348 else
5349 {
5350 /* This symbol must be referenced via a load from the
5351 Global Offset Table (@GOT). */
5352
5353 if (reload_in_progress)
5354 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
5355 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
5356 new = gen_rtx_CONST (Pmode, new);
5357 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
5358 new = gen_const_mem (Pmode, new);
5359 set_mem_alias_set (new, ix86_GOT_alias_set ());
5360
5361 if (reg == 0)
5362 reg = gen_reg_rtx (Pmode);
5363 emit_move_insn (reg, new);
5364 new = reg;
5365 }
5366 }
5367 else
5368 {
5369 if (GET_CODE (addr) == CONST)
5370 {
5371 addr = XEXP (addr, 0);
5372
5373 /* We must match stuff we generate before. Assume the only
5374 unspecs that can get here are ours. Not that we could do
5375 anything with them anyway.... */
5376 if (GET_CODE (addr) == UNSPEC
5377 || (GET_CODE (addr) == PLUS
5378 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
5379 return orig;
5380 if (GET_CODE (addr) != PLUS)
5381 abort ();
5382 }
5383 if (GET_CODE (addr) == PLUS)
5384 {
5385 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
5386
5387 /* Check first to see if this is a constant offset from a @GOTOFF
5388 symbol reference. */
5389 if (local_symbolic_operand (op0, Pmode)
5390 && GET_CODE (op1) == CONST_INT)
5391 {
5392 if (!TARGET_64BIT)
5393 {
5394 if (reload_in_progress)
5395 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
5396 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
5397 UNSPEC_GOTOFF);
5398 new = gen_rtx_PLUS (Pmode, new, op1);
5399 new = gen_rtx_CONST (Pmode, new);
5400 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
5401
5402 if (reg != 0)
5403 {
5404 emit_move_insn (reg, new);
5405 new = reg;
5406 }
5407 }
5408 else
5409 {
5410 if (INTVAL (op1) < -16*1024*1024
5411 || INTVAL (op1) >= 16*1024*1024)
5412 new = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1);
5413 }
5414 }
5415 else
5416 {
5417 base = legitimize_pic_address (XEXP (addr, 0), reg);
5418 new = legitimize_pic_address (XEXP (addr, 1),
5419 base == reg ? NULL_RTX : reg);
5420
5421 if (GET_CODE (new) == CONST_INT)
5422 new = plus_constant (base, INTVAL (new));
5423 else
5424 {
5425 if (GET_CODE (new) == PLUS && CONSTANT_P (XEXP (new, 1)))
5426 {
5427 base = gen_rtx_PLUS (Pmode, base, XEXP (new, 0));
5428 new = XEXP (new, 1);
5429 }
5430 new = gen_rtx_PLUS (Pmode, base, new);
5431 }
5432 }
5433 }
5434 }
5435 return new;
5436 }
5437 \f
5438 /* Load the thread pointer. If TO_REG is true, force it into a register. */
5439
5440 static rtx
5441 get_thread_pointer (int to_reg)
5442 {
5443 rtx tp, reg, insn;
5444
5445 tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
5446 if (!to_reg)
5447 return tp;
5448
5449 reg = gen_reg_rtx (Pmode);
5450 insn = gen_rtx_SET (VOIDmode, reg, tp);
5451 insn = emit_insn (insn);
5452
5453 return reg;
5454 }
5455
5456 /* A subroutine of legitimize_address and ix86_expand_move. FOR_MOV is
5457 false if we expect this to be used for a memory address and true if
5458 we expect to load the address into a register. */
5459
5460 static rtx
5461 legitimize_tls_address (rtx x, enum tls_model model, int for_mov)
5462 {
5463 rtx dest, base, off, pic;
5464 int type;
5465
5466 switch (model)
5467 {
5468 case TLS_MODEL_GLOBAL_DYNAMIC:
5469 dest = gen_reg_rtx (Pmode);
5470 if (TARGET_64BIT)
5471 {
5472 rtx rax = gen_rtx_REG (Pmode, 0), insns;
5473
5474 start_sequence ();
5475 emit_call_insn (gen_tls_global_dynamic_64 (rax, x));
5476 insns = get_insns ();
5477 end_sequence ();
5478
5479 emit_libcall_block (insns, dest, rax, x);
5480 }
5481 else
5482 emit_insn (gen_tls_global_dynamic_32 (dest, x));
5483 break;
5484
5485 case TLS_MODEL_LOCAL_DYNAMIC:
5486 base = gen_reg_rtx (Pmode);
5487 if (TARGET_64BIT)
5488 {
5489 rtx rax = gen_rtx_REG (Pmode, 0), insns, note;
5490
5491 start_sequence ();
5492 emit_call_insn (gen_tls_local_dynamic_base_64 (rax));
5493 insns = get_insns ();
5494 end_sequence ();
5495
5496 note = gen_rtx_EXPR_LIST (VOIDmode, const0_rtx, NULL);
5497 note = gen_rtx_EXPR_LIST (VOIDmode, ix86_tls_get_addr (), note);
5498 emit_libcall_block (insns, base, rax, note);
5499 }
5500 else
5501 emit_insn (gen_tls_local_dynamic_base_32 (base));
5502
5503 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
5504 off = gen_rtx_CONST (Pmode, off);
5505
5506 return gen_rtx_PLUS (Pmode, base, off);
5507
5508 case TLS_MODEL_INITIAL_EXEC:
5509 if (TARGET_64BIT)
5510 {
5511 pic = NULL;
5512 type = UNSPEC_GOTNTPOFF;
5513 }
5514 else if (flag_pic)
5515 {
5516 if (reload_in_progress)
5517 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
5518 pic = pic_offset_table_rtx;
5519 type = TARGET_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
5520 }
5521 else if (!TARGET_GNU_TLS)
5522 {
5523 pic = gen_reg_rtx (Pmode);
5524 emit_insn (gen_set_got (pic));
5525 type = UNSPEC_GOTTPOFF;
5526 }
5527 else
5528 {
5529 pic = NULL;
5530 type = UNSPEC_INDNTPOFF;
5531 }
5532
5533 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type);
5534 off = gen_rtx_CONST (Pmode, off);
5535 if (pic)
5536 off = gen_rtx_PLUS (Pmode, pic, off);
5537 off = gen_const_mem (Pmode, off);
5538 set_mem_alias_set (off, ix86_GOT_alias_set ());
5539
5540 if (TARGET_64BIT || TARGET_GNU_TLS)
5541 {
5542 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
5543 off = force_reg (Pmode, off);
5544 return gen_rtx_PLUS (Pmode, base, off);
5545 }
5546 else
5547 {
5548 base = get_thread_pointer (true);
5549 dest = gen_reg_rtx (Pmode);
5550 emit_insn (gen_subsi3 (dest, base, off));
5551 }
5552 break;
5553
5554 case TLS_MODEL_LOCAL_EXEC:
5555 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
5556 (TARGET_64BIT || TARGET_GNU_TLS)
5557 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
5558 off = gen_rtx_CONST (Pmode, off);
5559
5560 if (TARGET_64BIT || TARGET_GNU_TLS)
5561 {
5562 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
5563 return gen_rtx_PLUS (Pmode, base, off);
5564 }
5565 else
5566 {
5567 base = get_thread_pointer (true);
5568 dest = gen_reg_rtx (Pmode);
5569 emit_insn (gen_subsi3 (dest, base, off));
5570 }
5571 break;
5572
5573 default:
5574 abort ();
5575 }
5576
5577 return dest;
5578 }
5579
5580 /* Try machine-dependent ways of modifying an illegitimate address
5581 to be legitimate. If we find one, return the new, valid address.
5582 This macro is used in only one place: `memory_address' in explow.c.
5583
5584 OLDX is the address as it was before break_out_memory_refs was called.
5585 In some cases it is useful to look at this to decide what needs to be done.
5586
5587 MODE and WIN are passed so that this macro can use
5588 GO_IF_LEGITIMATE_ADDRESS.
5589
5590 It is always safe for this macro to do nothing. It exists to recognize
5591 opportunities to optimize the output.
5592
5593 For the 80386, we handle X+REG by loading X into a register R and
5594 using R+REG. R will go in a general reg and indexing will be used.
5595 However, if REG is a broken-out memory address or multiplication,
5596 nothing needs to be done because REG can certainly go in a general reg.
5597
5598 When -fpic is used, special handling is needed for symbolic references.
5599 See comments by legitimize_pic_address in i386.c for details. */
5600
5601 rtx
5602 legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED, enum machine_mode mode)
5603 {
5604 int changed = 0;
5605 unsigned log;
5606
5607 if (TARGET_DEBUG_ADDR)
5608 {
5609 fprintf (stderr, "\n==========\nLEGITIMIZE_ADDRESS, mode = %s\n",
5610 GET_MODE_NAME (mode));
5611 debug_rtx (x);
5612 }
5613
5614 log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0;
5615 if (log)
5616 return legitimize_tls_address (x, log, false);
5617 if (GET_CODE (x) == CONST
5618 && GET_CODE (XEXP (x, 0)) == PLUS
5619 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
5620 && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
5621 {
5622 rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0), log, false);
5623 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
5624 }
5625
5626 if (flag_pic && SYMBOLIC_CONST (x))
5627 return legitimize_pic_address (x, 0);
5628
5629 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
5630 if (GET_CODE (x) == ASHIFT
5631 && GET_CODE (XEXP (x, 1)) == CONST_INT
5632 && (log = (unsigned) exact_log2 (INTVAL (XEXP (x, 1)))) < 4)
5633 {
5634 changed = 1;
5635 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
5636 GEN_INT (1 << log));
5637 }
5638
5639 if (GET_CODE (x) == PLUS)
5640 {
5641 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
5642
5643 if (GET_CODE (XEXP (x, 0)) == ASHIFT
5644 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
5645 && (log = (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)))) < 4)
5646 {
5647 changed = 1;
5648 XEXP (x, 0) = gen_rtx_MULT (Pmode,
5649 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
5650 GEN_INT (1 << log));
5651 }
5652
5653 if (GET_CODE (XEXP (x, 1)) == ASHIFT
5654 && GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT
5655 && (log = (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x, 1), 1)))) < 4)
5656 {
5657 changed = 1;
5658 XEXP (x, 1) = gen_rtx_MULT (Pmode,
5659 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
5660 GEN_INT (1 << log));
5661 }
5662
5663 /* Put multiply first if it isn't already. */
5664 if (GET_CODE (XEXP (x, 1)) == MULT)
5665 {
5666 rtx tmp = XEXP (x, 0);
5667 XEXP (x, 0) = XEXP (x, 1);
5668 XEXP (x, 1) = tmp;
5669 changed = 1;
5670 }
5671
5672 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
5673 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
5674 created by virtual register instantiation, register elimination, and
5675 similar optimizations. */
5676 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
5677 {
5678 changed = 1;
5679 x = gen_rtx_PLUS (Pmode,
5680 gen_rtx_PLUS (Pmode, XEXP (x, 0),
5681 XEXP (XEXP (x, 1), 0)),
5682 XEXP (XEXP (x, 1), 1));
5683 }
5684
5685 /* Canonicalize
5686 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
5687 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
5688 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
5689 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
5690 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
5691 && CONSTANT_P (XEXP (x, 1)))
5692 {
5693 rtx constant;
5694 rtx other = NULL_RTX;
5695
5696 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
5697 {
5698 constant = XEXP (x, 1);
5699 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
5700 }
5701 else if (GET_CODE (XEXP (XEXP (XEXP (x, 0), 1), 1)) == CONST_INT)
5702 {
5703 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
5704 other = XEXP (x, 1);
5705 }
5706 else
5707 constant = 0;
5708
5709 if (constant)
5710 {
5711 changed = 1;
5712 x = gen_rtx_PLUS (Pmode,
5713 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
5714 XEXP (XEXP (XEXP (x, 0), 1), 0)),
5715 plus_constant (other, INTVAL (constant)));
5716 }
5717 }
5718
5719 if (changed && legitimate_address_p (mode, x, FALSE))
5720 return x;
5721
5722 if (GET_CODE (XEXP (x, 0)) == MULT)
5723 {
5724 changed = 1;
5725 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
5726 }
5727
5728 if (GET_CODE (XEXP (x, 1)) == MULT)
5729 {
5730 changed = 1;
5731 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
5732 }
5733
5734 if (changed
5735 && GET_CODE (XEXP (x, 1)) == REG
5736 && GET_CODE (XEXP (x, 0)) == REG)
5737 return x;
5738
5739 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
5740 {
5741 changed = 1;
5742 x = legitimize_pic_address (x, 0);
5743 }
5744
5745 if (changed && legitimate_address_p (mode, x, FALSE))
5746 return x;
5747
5748 if (GET_CODE (XEXP (x, 0)) == REG)
5749 {
5750 rtx temp = gen_reg_rtx (Pmode);
5751 rtx val = force_operand (XEXP (x, 1), temp);
5752 if (val != temp)
5753 emit_move_insn (temp, val);
5754
5755 XEXP (x, 1) = temp;
5756 return x;
5757 }
5758
5759 else if (GET_CODE (XEXP (x, 1)) == REG)
5760 {
5761 rtx temp = gen_reg_rtx (Pmode);
5762 rtx val = force_operand (XEXP (x, 0), temp);
5763 if (val != temp)
5764 emit_move_insn (temp, val);
5765
5766 XEXP (x, 0) = temp;
5767 return x;
5768 }
5769 }
5770
5771 return x;
5772 }
5773 \f
5774 /* Print an integer constant expression in assembler syntax. Addition
5775 and subtraction are the only arithmetic that may appear in these
5776 expressions. FILE is the stdio stream to write to, X is the rtx, and
5777 CODE is the operand print code from the output string. */
5778
5779 static void
5780 output_pic_addr_const (FILE *file, rtx x, int code)
5781 {
5782 char buf[256];
5783
5784 switch (GET_CODE (x))
5785 {
5786 case PC:
5787 if (flag_pic)
5788 putc ('.', file);
5789 else
5790 abort ();
5791 break;
5792
5793 case SYMBOL_REF:
5794 /* Mark the decl as referenced so that cgraph will output the function. */
5795 if (SYMBOL_REF_DECL (x))
5796 mark_decl_referenced (SYMBOL_REF_DECL (x));
5797
5798 assemble_name (file, XSTR (x, 0));
5799 if (!TARGET_MACHO && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
5800 fputs ("@PLT", file);
5801 break;
5802
5803 case LABEL_REF:
5804 x = XEXP (x, 0);
5805 /* FALLTHRU */
5806 case CODE_LABEL:
5807 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
5808 assemble_name (asm_out_file, buf);
5809 break;
5810
5811 case CONST_INT:
5812 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
5813 break;
5814
5815 case CONST:
5816 /* This used to output parentheses around the expression,
5817 but that does not work on the 386 (either ATT or BSD assembler). */
5818 output_pic_addr_const (file, XEXP (x, 0), code);
5819 break;
5820
5821 case CONST_DOUBLE:
5822 if (GET_MODE (x) == VOIDmode)
5823 {
5824 /* We can use %d if the number is <32 bits and positive. */
5825 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
5826 fprintf (file, "0x%lx%08lx",
5827 (unsigned long) CONST_DOUBLE_HIGH (x),
5828 (unsigned long) CONST_DOUBLE_LOW (x));
5829 else
5830 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
5831 }
5832 else
5833 /* We can't handle floating point constants;
5834 PRINT_OPERAND must handle them. */
5835 output_operand_lossage ("floating constant misused");
5836 break;
5837
5838 case PLUS:
5839 /* Some assemblers need integer constants to appear first. */
5840 if (GET_CODE (XEXP (x, 0)) == CONST_INT)
5841 {
5842 output_pic_addr_const (file, XEXP (x, 0), code);
5843 putc ('+', file);
5844 output_pic_addr_const (file, XEXP (x, 1), code);
5845 }
5846 else if (GET_CODE (XEXP (x, 1)) == CONST_INT)
5847 {
5848 output_pic_addr_const (file, XEXP (x, 1), code);
5849 putc ('+', file);
5850 output_pic_addr_const (file, XEXP (x, 0), code);
5851 }
5852 else
5853 abort ();
5854 break;
5855
5856 case MINUS:
5857 if (!TARGET_MACHO)
5858 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
5859 output_pic_addr_const (file, XEXP (x, 0), code);
5860 putc ('-', file);
5861 output_pic_addr_const (file, XEXP (x, 1), code);
5862 if (!TARGET_MACHO)
5863 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
5864 break;
5865
5866 case UNSPEC:
5867 if (XVECLEN (x, 0) != 1)
5868 abort ();
5869 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
5870 switch (XINT (x, 1))
5871 {
5872 case UNSPEC_GOT:
5873 fputs ("@GOT", file);
5874 break;
5875 case UNSPEC_GOTOFF:
5876 fputs ("@GOTOFF", file);
5877 break;
5878 case UNSPEC_GOTPCREL:
5879 fputs ("@GOTPCREL(%rip)", file);
5880 break;
5881 case UNSPEC_GOTTPOFF:
5882 /* FIXME: This might be @TPOFF in Sun ld too. */
5883 fputs ("@GOTTPOFF", file);
5884 break;
5885 case UNSPEC_TPOFF:
5886 fputs ("@TPOFF", file);
5887 break;
5888 case UNSPEC_NTPOFF:
5889 if (TARGET_64BIT)
5890 fputs ("@TPOFF", file);
5891 else
5892 fputs ("@NTPOFF", file);
5893 break;
5894 case UNSPEC_DTPOFF:
5895 fputs ("@DTPOFF", file);
5896 break;
5897 case UNSPEC_GOTNTPOFF:
5898 if (TARGET_64BIT)
5899 fputs ("@GOTTPOFF(%rip)", file);
5900 else
5901 fputs ("@GOTNTPOFF", file);
5902 break;
5903 case UNSPEC_INDNTPOFF:
5904 fputs ("@INDNTPOFF", file);
5905 break;
5906 default:
5907 output_operand_lossage ("invalid UNSPEC as operand");
5908 break;
5909 }
5910 break;
5911
5912 default:
5913 output_operand_lossage ("invalid expression as operand");
5914 }
5915 }
5916
5917 /* This is called from dwarfout.c via ASM_OUTPUT_DWARF_ADDR_CONST.
5918 We need to handle our special PIC relocations. */
5919
5920 void
5921 i386_dwarf_output_addr_const (FILE *file, rtx x)
5922 {
5923 #ifdef ASM_QUAD
5924 fprintf (file, "%s", TARGET_64BIT ? ASM_QUAD : ASM_LONG);
5925 #else
5926 if (TARGET_64BIT)
5927 abort ();
5928 fprintf (file, "%s", ASM_LONG);
5929 #endif
5930 if (flag_pic)
5931 output_pic_addr_const (file, x, '\0');
5932 else
5933 output_addr_const (file, x);
5934 fputc ('\n', file);
5935 }
5936
5937 /* This is called from dwarf2out.c via ASM_OUTPUT_DWARF_DTPREL.
5938 We need to emit DTP-relative relocations. */
5939
5940 void
5941 i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
5942 {
5943 fputs (ASM_LONG, file);
5944 output_addr_const (file, x);
5945 fputs ("@DTPOFF", file);
5946 switch (size)
5947 {
5948 case 4:
5949 break;
5950 case 8:
5951 fputs (", 0", file);
5952 break;
5953 default:
5954 abort ();
5955 }
5956 }
5957
5958 /* In the name of slightly smaller debug output, and to cater to
5959 general assembler losage, recognize PIC+GOTOFF and turn it back
5960 into a direct symbol reference. */
5961
5962 static rtx
5963 ix86_delegitimize_address (rtx orig_x)
5964 {
5965 rtx x = orig_x, y;
5966
5967 if (GET_CODE (x) == MEM)
5968 x = XEXP (x, 0);
5969
5970 if (TARGET_64BIT)
5971 {
5972 if (GET_CODE (x) != CONST
5973 || GET_CODE (XEXP (x, 0)) != UNSPEC
5974 || XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
5975 || GET_CODE (orig_x) != MEM)
5976 return orig_x;
5977 return XVECEXP (XEXP (x, 0), 0, 0);
5978 }
5979
5980 if (GET_CODE (x) != PLUS
5981 || GET_CODE (XEXP (x, 1)) != CONST)
5982 return orig_x;
5983
5984 if (GET_CODE (XEXP (x, 0)) == REG
5985 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM)
5986 /* %ebx + GOT/GOTOFF */
5987 y = NULL;
5988 else if (GET_CODE (XEXP (x, 0)) == PLUS)
5989 {
5990 /* %ebx + %reg * scale + GOT/GOTOFF */
5991 y = XEXP (x, 0);
5992 if (GET_CODE (XEXP (y, 0)) == REG
5993 && REGNO (XEXP (y, 0)) == PIC_OFFSET_TABLE_REGNUM)
5994 y = XEXP (y, 1);
5995 else if (GET_CODE (XEXP (y, 1)) == REG
5996 && REGNO (XEXP (y, 1)) == PIC_OFFSET_TABLE_REGNUM)
5997 y = XEXP (y, 0);
5998 else
5999 return orig_x;
6000 if (GET_CODE (y) != REG
6001 && GET_CODE (y) != MULT
6002 && GET_CODE (y) != ASHIFT)
6003 return orig_x;
6004 }
6005 else
6006 return orig_x;
6007
6008 x = XEXP (XEXP (x, 1), 0);
6009 if (GET_CODE (x) == UNSPEC
6010 && ((XINT (x, 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM)
6011 || (XINT (x, 1) == UNSPEC_GOTOFF && GET_CODE (orig_x) != MEM)))
6012 {
6013 if (y)
6014 return gen_rtx_PLUS (Pmode, y, XVECEXP (x, 0, 0));
6015 return XVECEXP (x, 0, 0);
6016 }
6017
6018 if (GET_CODE (x) == PLUS
6019 && GET_CODE (XEXP (x, 0)) == UNSPEC
6020 && GET_CODE (XEXP (x, 1)) == CONST_INT
6021 && ((XINT (XEXP (x, 0), 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM)
6022 || (XINT (XEXP (x, 0), 1) == UNSPEC_GOTOFF
6023 && GET_CODE (orig_x) != MEM)))
6024 {
6025 x = gen_rtx_PLUS (VOIDmode, XVECEXP (XEXP (x, 0), 0, 0), XEXP (x, 1));
6026 if (y)
6027 return gen_rtx_PLUS (Pmode, y, x);
6028 return x;
6029 }
6030
6031 return orig_x;
6032 }
6033 \f
6034 static void
6035 put_condition_code (enum rtx_code code, enum machine_mode mode, int reverse,
6036 int fp, FILE *file)
6037 {
6038 const char *suffix;
6039
6040 if (mode == CCFPmode || mode == CCFPUmode)
6041 {
6042 enum rtx_code second_code, bypass_code;
6043 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
6044 if (bypass_code != UNKNOWN || second_code != UNKNOWN)
6045 abort ();
6046 code = ix86_fp_compare_code_to_integer (code);
6047 mode = CCmode;
6048 }
6049 if (reverse)
6050 code = reverse_condition (code);
6051
6052 switch (code)
6053 {
6054 case EQ:
6055 suffix = "e";
6056 break;
6057 case NE:
6058 suffix = "ne";
6059 break;
6060 case GT:
6061 if (mode != CCmode && mode != CCNOmode && mode != CCGCmode)
6062 abort ();
6063 suffix = "g";
6064 break;
6065 case GTU:
6066 /* ??? Use "nbe" instead of "a" for fcmov losage on some assemblers.
6067 Those same assemblers have the same but opposite losage on cmov. */
6068 if (mode != CCmode)
6069 abort ();
6070 suffix = fp ? "nbe" : "a";
6071 break;
6072 case LT:
6073 if (mode == CCNOmode || mode == CCGOCmode)
6074 suffix = "s";
6075 else if (mode == CCmode || mode == CCGCmode)
6076 suffix = "l";
6077 else
6078 abort ();
6079 break;
6080 case LTU:
6081 if (mode != CCmode)
6082 abort ();
6083 suffix = "b";
6084 break;
6085 case GE:
6086 if (mode == CCNOmode || mode == CCGOCmode)
6087 suffix = "ns";
6088 else if (mode == CCmode || mode == CCGCmode)
6089 suffix = "ge";
6090 else
6091 abort ();
6092 break;
6093 case GEU:
6094 /* ??? As above. */
6095 if (mode != CCmode)
6096 abort ();
6097 suffix = fp ? "nb" : "ae";
6098 break;
6099 case LE:
6100 if (mode != CCmode && mode != CCGCmode && mode != CCNOmode)
6101 abort ();
6102 suffix = "le";
6103 break;
6104 case LEU:
6105 if (mode != CCmode)
6106 abort ();
6107 suffix = "be";
6108 break;
6109 case UNORDERED:
6110 suffix = fp ? "u" : "p";
6111 break;
6112 case ORDERED:
6113 suffix = fp ? "nu" : "np";
6114 break;
6115 default:
6116 abort ();
6117 }
6118 fputs (suffix, file);
6119 }
6120
6121 /* Print the name of register X to FILE based on its machine mode and number.
6122 If CODE is 'w', pretend the mode is HImode.
6123 If CODE is 'b', pretend the mode is QImode.
6124 If CODE is 'k', pretend the mode is SImode.
6125 If CODE is 'q', pretend the mode is DImode.
6126 If CODE is 'h', pretend the reg is the `high' byte register.
6127 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op. */
6128
6129 void
6130 print_reg (rtx x, int code, FILE *file)
6131 {
6132 if (REGNO (x) == ARG_POINTER_REGNUM
6133 || REGNO (x) == FRAME_POINTER_REGNUM
6134 || REGNO (x) == FLAGS_REG
6135 || REGNO (x) == FPSR_REG)
6136 abort ();
6137
6138 if (ASSEMBLER_DIALECT == ASM_ATT || USER_LABEL_PREFIX[0] == 0)
6139 putc ('%', file);
6140
6141 if (code == 'w' || MMX_REG_P (x))
6142 code = 2;
6143 else if (code == 'b')
6144 code = 1;
6145 else if (code == 'k')
6146 code = 4;
6147 else if (code == 'q')
6148 code = 8;
6149 else if (code == 'y')
6150 code = 3;
6151 else if (code == 'h')
6152 code = 0;
6153 else
6154 code = GET_MODE_SIZE (GET_MODE (x));
6155
6156 /* Irritatingly, AMD extended registers use different naming convention
6157 from the normal registers. */
6158 if (REX_INT_REG_P (x))
6159 {
6160 if (!TARGET_64BIT)
6161 abort ();
6162 switch (code)
6163 {
6164 case 0:
6165 error ("extended registers have no high halves");
6166 break;
6167 case 1:
6168 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
6169 break;
6170 case 2:
6171 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
6172 break;
6173 case 4:
6174 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
6175 break;
6176 case 8:
6177 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
6178 break;
6179 default:
6180 error ("unsupported operand size for extended register");
6181 break;
6182 }
6183 return;
6184 }
6185 switch (code)
6186 {
6187 case 3:
6188 if (STACK_TOP_P (x))
6189 {
6190 fputs ("st(0)", file);
6191 break;
6192 }
6193 /* FALLTHRU */
6194 case 8:
6195 case 4:
6196 case 12:
6197 if (! ANY_FP_REG_P (x))
6198 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
6199 /* FALLTHRU */
6200 case 16:
6201 case 2:
6202 normal:
6203 fputs (hi_reg_name[REGNO (x)], file);
6204 break;
6205 case 1:
6206 if (REGNO (x) >= ARRAY_SIZE (qi_reg_name))
6207 goto normal;
6208 fputs (qi_reg_name[REGNO (x)], file);
6209 break;
6210 case 0:
6211 if (REGNO (x) >= ARRAY_SIZE (qi_high_reg_name))
6212 goto normal;
6213 fputs (qi_high_reg_name[REGNO (x)], file);
6214 break;
6215 default:
6216 abort ();
6217 }
6218 }
6219
6220 /* Locate some local-dynamic symbol still in use by this function
6221 so that we can print its name in some tls_local_dynamic_base
6222 pattern. */
6223
6224 static const char *
6225 get_some_local_dynamic_name (void)
6226 {
6227 rtx insn;
6228
6229 if (cfun->machine->some_ld_name)
6230 return cfun->machine->some_ld_name;
6231
6232 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
6233 if (INSN_P (insn)
6234 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
6235 return cfun->machine->some_ld_name;
6236
6237 abort ();
6238 }
6239
6240 static int
6241 get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
6242 {
6243 rtx x = *px;
6244
6245 if (GET_CODE (x) == SYMBOL_REF
6246 && local_dynamic_symbolic_operand (x, Pmode))
6247 {
6248 cfun->machine->some_ld_name = XSTR (x, 0);
6249 return 1;
6250 }
6251
6252 return 0;
6253 }
6254
6255 /* Meaning of CODE:
6256 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
6257 C -- print opcode suffix for set/cmov insn.
6258 c -- like C, but print reversed condition
6259 F,f -- likewise, but for floating-point.
6260 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
6261 otherwise nothing
6262 R -- print the prefix for register names.
6263 z -- print the opcode suffix for the size of the current operand.
6264 * -- print a star (in certain assembler syntax)
6265 A -- print an absolute memory reference.
6266 w -- print the operand as if it's a "word" (HImode) even if it isn't.
6267 s -- print a shift double count, followed by the assemblers argument
6268 delimiter.
6269 b -- print the QImode name of the register for the indicated operand.
6270 %b0 would print %al if operands[0] is reg 0.
6271 w -- likewise, print the HImode name of the register.
6272 k -- likewise, print the SImode name of the register.
6273 q -- likewise, print the DImode name of the register.
6274 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
6275 y -- print "st(0)" instead of "st" as a register.
6276 D -- print condition for SSE cmp instruction.
6277 P -- if PIC, print an @PLT suffix.
6278 X -- don't print any sort of PIC '@' suffix for a symbol.
6279 & -- print some in-use local-dynamic symbol name.
6280 */
6281
6282 void
6283 print_operand (FILE *file, rtx x, int code)
6284 {
6285 if (code)
6286 {
6287 switch (code)
6288 {
6289 case '*':
6290 if (ASSEMBLER_DIALECT == ASM_ATT)
6291 putc ('*', file);
6292 return;
6293
6294 case '&':
6295 assemble_name (file, get_some_local_dynamic_name ());
6296 return;
6297
6298 case 'A':
6299 if (ASSEMBLER_DIALECT == ASM_ATT)
6300 putc ('*', file);
6301 else if (ASSEMBLER_DIALECT == ASM_INTEL)
6302 {
6303 /* Intel syntax. For absolute addresses, registers should not
6304 be surrounded by braces. */
6305 if (GET_CODE (x) != REG)
6306 {
6307 putc ('[', file);
6308 PRINT_OPERAND (file, x, 0);
6309 putc (']', file);
6310 return;
6311 }
6312 }
6313 else
6314 abort ();
6315
6316 PRINT_OPERAND (file, x, 0);
6317 return;
6318
6319
6320 case 'L':
6321 if (ASSEMBLER_DIALECT == ASM_ATT)
6322 putc ('l', file);
6323 return;
6324
6325 case 'W':
6326 if (ASSEMBLER_DIALECT == ASM_ATT)
6327 putc ('w', file);
6328 return;
6329
6330 case 'B':
6331 if (ASSEMBLER_DIALECT == ASM_ATT)
6332 putc ('b', file);
6333 return;
6334
6335 case 'Q':
6336 if (ASSEMBLER_DIALECT == ASM_ATT)
6337 putc ('l', file);
6338 return;
6339
6340 case 'S':
6341 if (ASSEMBLER_DIALECT == ASM_ATT)
6342 putc ('s', file);
6343 return;
6344
6345 case 'T':
6346 if (ASSEMBLER_DIALECT == ASM_ATT)
6347 putc ('t', file);
6348 return;
6349
6350 case 'z':
6351 /* 387 opcodes don't get size suffixes if the operands are
6352 registers. */
6353 if (STACK_REG_P (x))
6354 return;
6355
6356 /* Likewise if using Intel opcodes. */
6357 if (ASSEMBLER_DIALECT == ASM_INTEL)
6358 return;
6359
6360 /* This is the size of op from size of operand. */
6361 switch (GET_MODE_SIZE (GET_MODE (x)))
6362 {
6363 case 2:
6364 #ifdef HAVE_GAS_FILDS_FISTS
6365 putc ('s', file);
6366 #endif
6367 return;
6368
6369 case 4:
6370 if (GET_MODE (x) == SFmode)
6371 {
6372 putc ('s', file);
6373 return;
6374 }
6375 else
6376 putc ('l', file);
6377 return;
6378
6379 case 12:
6380 case 16:
6381 putc ('t', file);
6382 return;
6383
6384 case 8:
6385 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
6386 {
6387 #ifdef GAS_MNEMONICS
6388 putc ('q', file);
6389 #else
6390 putc ('l', file);
6391 putc ('l', file);
6392 #endif
6393 }
6394 else
6395 putc ('l', file);
6396 return;
6397
6398 default:
6399 abort ();
6400 }
6401
6402 case 'b':
6403 case 'w':
6404 case 'k':
6405 case 'q':
6406 case 'h':
6407 case 'y':
6408 case 'X':
6409 case 'P':
6410 break;
6411
6412 case 's':
6413 if (GET_CODE (x) == CONST_INT || ! SHIFT_DOUBLE_OMITS_COUNT)
6414 {
6415 PRINT_OPERAND (file, x, 0);
6416 putc (',', file);
6417 }
6418 return;
6419
6420 case 'D':
6421 /* Little bit of braindamage here. The SSE compare instructions
6422 does use completely different names for the comparisons that the
6423 fp conditional moves. */
6424 switch (GET_CODE (x))
6425 {
6426 case EQ:
6427 case UNEQ:
6428 fputs ("eq", file);
6429 break;
6430 case LT:
6431 case UNLT:
6432 fputs ("lt", file);
6433 break;
6434 case LE:
6435 case UNLE:
6436 fputs ("le", file);
6437 break;
6438 case UNORDERED:
6439 fputs ("unord", file);
6440 break;
6441 case NE:
6442 case LTGT:
6443 fputs ("neq", file);
6444 break;
6445 case UNGE:
6446 case GE:
6447 fputs ("nlt", file);
6448 break;
6449 case UNGT:
6450 case GT:
6451 fputs ("nle", file);
6452 break;
6453 case ORDERED:
6454 fputs ("ord", file);
6455 break;
6456 default:
6457 abort ();
6458 break;
6459 }
6460 return;
6461 case 'O':
6462 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
6463 if (ASSEMBLER_DIALECT == ASM_ATT)
6464 {
6465 switch (GET_MODE (x))
6466 {
6467 case HImode: putc ('w', file); break;
6468 case SImode:
6469 case SFmode: putc ('l', file); break;
6470 case DImode:
6471 case DFmode: putc ('q', file); break;
6472 default: abort ();
6473 }
6474 putc ('.', file);
6475 }
6476 #endif
6477 return;
6478 case 'C':
6479 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
6480 return;
6481 case 'F':
6482 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
6483 if (ASSEMBLER_DIALECT == ASM_ATT)
6484 putc ('.', file);
6485 #endif
6486 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
6487 return;
6488
6489 /* Like above, but reverse condition */
6490 case 'c':
6491 /* Check to see if argument to %c is really a constant
6492 and not a condition code which needs to be reversed. */
6493 if (!COMPARISON_P (x))
6494 {
6495 output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'");
6496 return;
6497 }
6498 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
6499 return;
6500 case 'f':
6501 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
6502 if (ASSEMBLER_DIALECT == ASM_ATT)
6503 putc ('.', file);
6504 #endif
6505 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
6506 return;
6507 case '+':
6508 {
6509 rtx x;
6510
6511 if (!optimize || optimize_size || !TARGET_BRANCH_PREDICTION_HINTS)
6512 return;
6513
6514 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
6515 if (x)
6516 {
6517 int pred_val = INTVAL (XEXP (x, 0));
6518
6519 if (pred_val < REG_BR_PROB_BASE * 45 / 100
6520 || pred_val > REG_BR_PROB_BASE * 55 / 100)
6521 {
6522 int taken = pred_val > REG_BR_PROB_BASE / 2;
6523 int cputaken = final_forward_branch_p (current_output_insn) == 0;
6524
6525 /* Emit hints only in the case default branch prediction
6526 heuristics would fail. */
6527 if (taken != cputaken)
6528 {
6529 /* We use 3e (DS) prefix for taken branches and
6530 2e (CS) prefix for not taken branches. */
6531 if (taken)
6532 fputs ("ds ; ", file);
6533 else
6534 fputs ("cs ; ", file);
6535 }
6536 }
6537 }
6538 return;
6539 }
6540 default:
6541 output_operand_lossage ("invalid operand code '%c'", code);
6542 }
6543 }
6544
6545 if (GET_CODE (x) == REG)
6546 print_reg (x, code, file);
6547
6548 else if (GET_CODE (x) == MEM)
6549 {
6550 /* No `byte ptr' prefix for call instructions. */
6551 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P')
6552 {
6553 const char * size;
6554 switch (GET_MODE_SIZE (GET_MODE (x)))
6555 {
6556 case 1: size = "BYTE"; break;
6557 case 2: size = "WORD"; break;
6558 case 4: size = "DWORD"; break;
6559 case 8: size = "QWORD"; break;
6560 case 12: size = "XWORD"; break;
6561 case 16: size = "XMMWORD"; break;
6562 default:
6563 abort ();
6564 }
6565
6566 /* Check for explicit size override (codes 'b', 'w' and 'k') */
6567 if (code == 'b')
6568 size = "BYTE";
6569 else if (code == 'w')
6570 size = "WORD";
6571 else if (code == 'k')
6572 size = "DWORD";
6573
6574 fputs (size, file);
6575 fputs (" PTR ", file);
6576 }
6577
6578 x = XEXP (x, 0);
6579 /* Avoid (%rip) for call operands. */
6580 if (CONSTANT_ADDRESS_P (x) && code == 'P'
6581 && GET_CODE (x) != CONST_INT)
6582 output_addr_const (file, x);
6583 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
6584 output_operand_lossage ("invalid constraints for operand");
6585 else
6586 output_address (x);
6587 }
6588
6589 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
6590 {
6591 REAL_VALUE_TYPE r;
6592 long l;
6593
6594 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
6595 REAL_VALUE_TO_TARGET_SINGLE (r, l);
6596
6597 if (ASSEMBLER_DIALECT == ASM_ATT)
6598 putc ('$', file);
6599 fprintf (file, "0x%08lx", l);
6600 }
6601
6602 /* These float cases don't actually occur as immediate operands. */
6603 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
6604 {
6605 char dstr[30];
6606
6607 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
6608 fprintf (file, "%s", dstr);
6609 }
6610
6611 else if (GET_CODE (x) == CONST_DOUBLE
6612 && GET_MODE (x) == XFmode)
6613 {
6614 char dstr[30];
6615
6616 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
6617 fprintf (file, "%s", dstr);
6618 }
6619
6620 else
6621 {
6622 if (code != 'P')
6623 {
6624 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
6625 {
6626 if (ASSEMBLER_DIALECT == ASM_ATT)
6627 putc ('$', file);
6628 }
6629 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
6630 || GET_CODE (x) == LABEL_REF)
6631 {
6632 if (ASSEMBLER_DIALECT == ASM_ATT)
6633 putc ('$', file);
6634 else
6635 fputs ("OFFSET FLAT:", file);
6636 }
6637 }
6638 if (GET_CODE (x) == CONST_INT)
6639 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
6640 else if (flag_pic)
6641 output_pic_addr_const (file, x, code);
6642 else
6643 output_addr_const (file, x);
6644 }
6645 }
6646 \f
6647 /* Print a memory operand whose address is ADDR. */
6648
6649 void
6650 print_operand_address (FILE *file, rtx addr)
6651 {
6652 struct ix86_address parts;
6653 rtx base, index, disp;
6654 int scale;
6655
6656 if (! ix86_decompose_address (addr, &parts))
6657 abort ();
6658
6659 base = parts.base;
6660 index = parts.index;
6661 disp = parts.disp;
6662 scale = parts.scale;
6663
6664 switch (parts.seg)
6665 {
6666 case SEG_DEFAULT:
6667 break;
6668 case SEG_FS:
6669 case SEG_GS:
6670 if (USER_LABEL_PREFIX[0] == 0)
6671 putc ('%', file);
6672 fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
6673 break;
6674 default:
6675 abort ();
6676 }
6677
6678 if (!base && !index)
6679 {
6680 /* Displacement only requires special attention. */
6681
6682 if (GET_CODE (disp) == CONST_INT)
6683 {
6684 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
6685 {
6686 if (USER_LABEL_PREFIX[0] == 0)
6687 putc ('%', file);
6688 fputs ("ds:", file);
6689 }
6690 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
6691 }
6692 else if (flag_pic)
6693 output_pic_addr_const (file, disp, 0);
6694 else
6695 output_addr_const (file, disp);
6696
6697 /* Use one byte shorter RIP relative addressing for 64bit mode. */
6698 if (TARGET_64BIT
6699 && ((GET_CODE (disp) == SYMBOL_REF
6700 && ! tls_symbolic_operand (disp, GET_MODE (disp)))
6701 || GET_CODE (disp) == LABEL_REF
6702 || (GET_CODE (disp) == CONST
6703 && GET_CODE (XEXP (disp, 0)) == PLUS
6704 && (GET_CODE (XEXP (XEXP (disp, 0), 0)) == SYMBOL_REF
6705 || GET_CODE (XEXP (XEXP (disp, 0), 0)) == LABEL_REF)
6706 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)))
6707 fputs ("(%rip)", file);
6708 }
6709 else
6710 {
6711 if (ASSEMBLER_DIALECT == ASM_ATT)
6712 {
6713 if (disp)
6714 {
6715 if (flag_pic)
6716 output_pic_addr_const (file, disp, 0);
6717 else if (GET_CODE (disp) == LABEL_REF)
6718 output_asm_label (disp);
6719 else
6720 output_addr_const (file, disp);
6721 }
6722
6723 putc ('(', file);
6724 if (base)
6725 print_reg (base, 0, file);
6726 if (index)
6727 {
6728 putc (',', file);
6729 print_reg (index, 0, file);
6730 if (scale != 1)
6731 fprintf (file, ",%d", scale);
6732 }
6733 putc (')', file);
6734 }
6735 else
6736 {
6737 rtx offset = NULL_RTX;
6738
6739 if (disp)
6740 {
6741 /* Pull out the offset of a symbol; print any symbol itself. */
6742 if (GET_CODE (disp) == CONST
6743 && GET_CODE (XEXP (disp, 0)) == PLUS
6744 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
6745 {
6746 offset = XEXP (XEXP (disp, 0), 1);
6747 disp = gen_rtx_CONST (VOIDmode,
6748 XEXP (XEXP (disp, 0), 0));
6749 }
6750
6751 if (flag_pic)
6752 output_pic_addr_const (file, disp, 0);
6753 else if (GET_CODE (disp) == LABEL_REF)
6754 output_asm_label (disp);
6755 else if (GET_CODE (disp) == CONST_INT)
6756 offset = disp;
6757 else
6758 output_addr_const (file, disp);
6759 }
6760
6761 putc ('[', file);
6762 if (base)
6763 {
6764 print_reg (base, 0, file);
6765 if (offset)
6766 {
6767 if (INTVAL (offset) >= 0)
6768 putc ('+', file);
6769 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
6770 }
6771 }
6772 else if (offset)
6773 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
6774 else
6775 putc ('0', file);
6776
6777 if (index)
6778 {
6779 putc ('+', file);
6780 print_reg (index, 0, file);
6781 if (scale != 1)
6782 fprintf (file, "*%d", scale);
6783 }
6784 putc (']', file);
6785 }
6786 }
6787 }
6788
6789 bool
6790 output_addr_const_extra (FILE *file, rtx x)
6791 {
6792 rtx op;
6793
6794 if (GET_CODE (x) != UNSPEC)
6795 return false;
6796
6797 op = XVECEXP (x, 0, 0);
6798 switch (XINT (x, 1))
6799 {
6800 case UNSPEC_GOTTPOFF:
6801 output_addr_const (file, op);
6802 /* FIXME: This might be @TPOFF in Sun ld. */
6803 fputs ("@GOTTPOFF", file);
6804 break;
6805 case UNSPEC_TPOFF:
6806 output_addr_const (file, op);
6807 fputs ("@TPOFF", file);
6808 break;
6809 case UNSPEC_NTPOFF:
6810 output_addr_const (file, op);
6811 if (TARGET_64BIT)
6812 fputs ("@TPOFF", file);
6813 else
6814 fputs ("@NTPOFF", file);
6815 break;
6816 case UNSPEC_DTPOFF:
6817 output_addr_const (file, op);
6818 fputs ("@DTPOFF", file);
6819 break;
6820 case UNSPEC_GOTNTPOFF:
6821 output_addr_const (file, op);
6822 if (TARGET_64BIT)
6823 fputs ("@GOTTPOFF(%rip)", file);
6824 else
6825 fputs ("@GOTNTPOFF", file);
6826 break;
6827 case UNSPEC_INDNTPOFF:
6828 output_addr_const (file, op);
6829 fputs ("@INDNTPOFF", file);
6830 break;
6831
6832 default:
6833 return false;
6834 }
6835
6836 return true;
6837 }
6838 \f
6839 /* Split one or more DImode RTL references into pairs of SImode
6840 references. The RTL can be REG, offsettable MEM, integer constant, or
6841 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
6842 split and "num" is its length. lo_half and hi_half are output arrays
6843 that parallel "operands". */
6844
6845 void
6846 split_di (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
6847 {
6848 while (num--)
6849 {
6850 rtx op = operands[num];
6851
6852 /* simplify_subreg refuse to split volatile memory addresses,
6853 but we still have to handle it. */
6854 if (GET_CODE (op) == MEM)
6855 {
6856 lo_half[num] = adjust_address (op, SImode, 0);
6857 hi_half[num] = adjust_address (op, SImode, 4);
6858 }
6859 else
6860 {
6861 lo_half[num] = simplify_gen_subreg (SImode, op,
6862 GET_MODE (op) == VOIDmode
6863 ? DImode : GET_MODE (op), 0);
6864 hi_half[num] = simplify_gen_subreg (SImode, op,
6865 GET_MODE (op) == VOIDmode
6866 ? DImode : GET_MODE (op), 4);
6867 }
6868 }
6869 }
6870 /* Split one or more TImode RTL references into pairs of SImode
6871 references. The RTL can be REG, offsettable MEM, integer constant, or
6872 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
6873 split and "num" is its length. lo_half and hi_half are output arrays
6874 that parallel "operands". */
6875
6876 void
6877 split_ti (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
6878 {
6879 while (num--)
6880 {
6881 rtx op = operands[num];
6882
6883 /* simplify_subreg refuse to split volatile memory addresses, but we
6884 still have to handle it. */
6885 if (GET_CODE (op) == MEM)
6886 {
6887 lo_half[num] = adjust_address (op, DImode, 0);
6888 hi_half[num] = adjust_address (op, DImode, 8);
6889 }
6890 else
6891 {
6892 lo_half[num] = simplify_gen_subreg (DImode, op, TImode, 0);
6893 hi_half[num] = simplify_gen_subreg (DImode, op, TImode, 8);
6894 }
6895 }
6896 }
6897 \f
6898 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
6899 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
6900 is the expression of the binary operation. The output may either be
6901 emitted here, or returned to the caller, like all output_* functions.
6902
6903 There is no guarantee that the operands are the same mode, as they
6904 might be within FLOAT or FLOAT_EXTEND expressions. */
6905
6906 #ifndef SYSV386_COMPAT
6907 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
6908 wants to fix the assemblers because that causes incompatibility
6909 with gcc. No-one wants to fix gcc because that causes
6910 incompatibility with assemblers... You can use the option of
6911 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
6912 #define SYSV386_COMPAT 1
6913 #endif
6914
6915 const char *
6916 output_387_binary_op (rtx insn, rtx *operands)
6917 {
6918 static char buf[30];
6919 const char *p;
6920 const char *ssep;
6921 int is_sse = SSE_REG_P (operands[0]) | SSE_REG_P (operands[1]) | SSE_REG_P (operands[2]);
6922
6923 #ifdef ENABLE_CHECKING
6924 /* Even if we do not want to check the inputs, this documents input
6925 constraints. Which helps in understanding the following code. */
6926 if (STACK_REG_P (operands[0])
6927 && ((REG_P (operands[1])
6928 && REGNO (operands[0]) == REGNO (operands[1])
6929 && (STACK_REG_P (operands[2]) || GET_CODE (operands[2]) == MEM))
6930 || (REG_P (operands[2])
6931 && REGNO (operands[0]) == REGNO (operands[2])
6932 && (STACK_REG_P (operands[1]) || GET_CODE (operands[1]) == MEM)))
6933 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
6934 ; /* ok */
6935 else if (!is_sse)
6936 abort ();
6937 #endif
6938
6939 switch (GET_CODE (operands[3]))
6940 {
6941 case PLUS:
6942 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
6943 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
6944 p = "fiadd";
6945 else
6946 p = "fadd";
6947 ssep = "add";
6948 break;
6949
6950 case MINUS:
6951 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
6952 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
6953 p = "fisub";
6954 else
6955 p = "fsub";
6956 ssep = "sub";
6957 break;
6958
6959 case MULT:
6960 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
6961 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
6962 p = "fimul";
6963 else
6964 p = "fmul";
6965 ssep = "mul";
6966 break;
6967
6968 case DIV:
6969 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
6970 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
6971 p = "fidiv";
6972 else
6973 p = "fdiv";
6974 ssep = "div";
6975 break;
6976
6977 default:
6978 abort ();
6979 }
6980
6981 if (is_sse)
6982 {
6983 strcpy (buf, ssep);
6984 if (GET_MODE (operands[0]) == SFmode)
6985 strcat (buf, "ss\t{%2, %0|%0, %2}");
6986 else
6987 strcat (buf, "sd\t{%2, %0|%0, %2}");
6988 return buf;
6989 }
6990 strcpy (buf, p);
6991
6992 switch (GET_CODE (operands[3]))
6993 {
6994 case MULT:
6995 case PLUS:
6996 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
6997 {
6998 rtx temp = operands[2];
6999 operands[2] = operands[1];
7000 operands[1] = temp;
7001 }
7002
7003 /* know operands[0] == operands[1]. */
7004
7005 if (GET_CODE (operands[2]) == MEM)
7006 {
7007 p = "%z2\t%2";
7008 break;
7009 }
7010
7011 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
7012 {
7013 if (STACK_TOP_P (operands[0]))
7014 /* How is it that we are storing to a dead operand[2]?
7015 Well, presumably operands[1] is dead too. We can't
7016 store the result to st(0) as st(0) gets popped on this
7017 instruction. Instead store to operands[2] (which I
7018 think has to be st(1)). st(1) will be popped later.
7019 gcc <= 2.8.1 didn't have this check and generated
7020 assembly code that the Unixware assembler rejected. */
7021 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
7022 else
7023 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
7024 break;
7025 }
7026
7027 if (STACK_TOP_P (operands[0]))
7028 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
7029 else
7030 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
7031 break;
7032
7033 case MINUS:
7034 case DIV:
7035 if (GET_CODE (operands[1]) == MEM)
7036 {
7037 p = "r%z1\t%1";
7038 break;
7039 }
7040
7041 if (GET_CODE (operands[2]) == MEM)
7042 {
7043 p = "%z2\t%2";
7044 break;
7045 }
7046
7047 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
7048 {
7049 #if SYSV386_COMPAT
7050 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
7051 derived assemblers, confusingly reverse the direction of
7052 the operation for fsub{r} and fdiv{r} when the
7053 destination register is not st(0). The Intel assembler
7054 doesn't have this brain damage. Read !SYSV386_COMPAT to
7055 figure out what the hardware really does. */
7056 if (STACK_TOP_P (operands[0]))
7057 p = "{p\t%0, %2|rp\t%2, %0}";
7058 else
7059 p = "{rp\t%2, %0|p\t%0, %2}";
7060 #else
7061 if (STACK_TOP_P (operands[0]))
7062 /* As above for fmul/fadd, we can't store to st(0). */
7063 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
7064 else
7065 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
7066 #endif
7067 break;
7068 }
7069
7070 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
7071 {
7072 #if SYSV386_COMPAT
7073 if (STACK_TOP_P (operands[0]))
7074 p = "{rp\t%0, %1|p\t%1, %0}";
7075 else
7076 p = "{p\t%1, %0|rp\t%0, %1}";
7077 #else
7078 if (STACK_TOP_P (operands[0]))
7079 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
7080 else
7081 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
7082 #endif
7083 break;
7084 }
7085
7086 if (STACK_TOP_P (operands[0]))
7087 {
7088 if (STACK_TOP_P (operands[1]))
7089 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
7090 else
7091 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
7092 break;
7093 }
7094 else if (STACK_TOP_P (operands[1]))
7095 {
7096 #if SYSV386_COMPAT
7097 p = "{\t%1, %0|r\t%0, %1}";
7098 #else
7099 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
7100 #endif
7101 }
7102 else
7103 {
7104 #if SYSV386_COMPAT
7105 p = "{r\t%2, %0|\t%0, %2}";
7106 #else
7107 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
7108 #endif
7109 }
7110 break;
7111
7112 default:
7113 abort ();
7114 }
7115
7116 strcat (buf, p);
7117 return buf;
7118 }
7119
7120 /* Output code to initialize control word copies used by trunc?f?i and
7121 rounding patterns. CURRENT_MODE is set to current control word,
7122 while NEW_MODE is set to new control word. */
7123
7124 void
7125 emit_i387_cw_initialization (rtx current_mode, rtx new_mode, int mode)
7126 {
7127 rtx reg = gen_reg_rtx (HImode);
7128
7129 emit_insn (gen_x86_fnstcw_1 (current_mode));
7130 emit_move_insn (reg, current_mode);
7131
7132 if (!TARGET_PARTIAL_REG_STALL && !optimize_size
7133 && !TARGET_64BIT)
7134 {
7135 switch (mode)
7136 {
7137 case I387_CW_FLOOR:
7138 /* round down toward -oo */
7139 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x4)));
7140 break;
7141
7142 case I387_CW_CEIL:
7143 /* round up toward +oo */
7144 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x8)));
7145 break;
7146
7147 case I387_CW_TRUNC:
7148 /* round toward zero (truncate) */
7149 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
7150 break;
7151
7152 case I387_CW_MASK_PM:
7153 /* mask precision exception for nearbyint() */
7154 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
7155 break;
7156
7157 default:
7158 abort();
7159 }
7160 }
7161 else
7162 {
7163 switch (mode)
7164 {
7165 case I387_CW_FLOOR:
7166 /* round down toward -oo */
7167 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
7168 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
7169 break;
7170
7171 case I387_CW_CEIL:
7172 /* round up toward +oo */
7173 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
7174 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
7175 break;
7176
7177 case I387_CW_TRUNC:
7178 /* round toward zero (truncate) */
7179 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
7180 break;
7181
7182 case I387_CW_MASK_PM:
7183 /* mask precision exception for nearbyint() */
7184 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
7185 break;
7186
7187 default:
7188 abort();
7189 }
7190 }
7191
7192 emit_move_insn (new_mode, reg);
7193 }
7194
7195 /* Output code for INSN to convert a float to a signed int. OPERANDS
7196 are the insn operands. The output may be [HSD]Imode and the input
7197 operand may be [SDX]Fmode. */
7198
7199 const char *
7200 output_fix_trunc (rtx insn, rtx *operands)
7201 {
7202 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
7203 int dimode_p = GET_MODE (operands[0]) == DImode;
7204
7205 /* Jump through a hoop or two for DImode, since the hardware has no
7206 non-popping instruction. We used to do this a different way, but
7207 that was somewhat fragile and broke with post-reload splitters. */
7208 if (dimode_p && !stack_top_dies)
7209 output_asm_insn ("fld\t%y1", operands);
7210
7211 if (!STACK_TOP_P (operands[1]))
7212 abort ();
7213
7214 if (GET_CODE (operands[0]) != MEM)
7215 abort ();
7216
7217 output_asm_insn ("fldcw\t%3", operands);
7218 if (stack_top_dies || dimode_p)
7219 output_asm_insn ("fistp%z0\t%0", operands);
7220 else
7221 output_asm_insn ("fist%z0\t%0", operands);
7222 output_asm_insn ("fldcw\t%2", operands);
7223
7224 return "";
7225 }
7226
7227 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
7228 should be used and 2 when fnstsw should be used. UNORDERED_P is true
7229 when fucom should be used. */
7230
7231 const char *
7232 output_fp_compare (rtx insn, rtx *operands, int eflags_p, int unordered_p)
7233 {
7234 int stack_top_dies;
7235 rtx cmp_op0, cmp_op1;
7236 int is_sse = SSE_REG_P (operands[0]) | SSE_REG_P (operands[1]);
7237
7238 if (eflags_p == 2)
7239 {
7240 cmp_op0 = operands[1];
7241 cmp_op1 = operands[2];
7242 }
7243 else
7244 {
7245 cmp_op0 = operands[0];
7246 cmp_op1 = operands[1];
7247 }
7248
7249 if (is_sse)
7250 {
7251 if (GET_MODE (operands[0]) == SFmode)
7252 if (unordered_p)
7253 return "ucomiss\t{%1, %0|%0, %1}";
7254 else
7255 return "comiss\t{%1, %0|%0, %1}";
7256 else
7257 if (unordered_p)
7258 return "ucomisd\t{%1, %0|%0, %1}";
7259 else
7260 return "comisd\t{%1, %0|%0, %1}";
7261 }
7262
7263 if (! STACK_TOP_P (cmp_op0))
7264 abort ();
7265
7266 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
7267
7268 if (cmp_op1 == CONST0_RTX (GET_MODE (cmp_op1)))
7269 {
7270 if (stack_top_dies)
7271 {
7272 output_asm_insn ("ftst\n\tfnstsw\t%0", operands);
7273 return TARGET_USE_FFREEP ? "ffreep\t%y1" : "fstp\t%y1";
7274 }
7275 else
7276 return "ftst\n\tfnstsw\t%0";
7277 }
7278
7279 if (STACK_REG_P (cmp_op1)
7280 && stack_top_dies
7281 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
7282 && REGNO (cmp_op1) != FIRST_STACK_REG)
7283 {
7284 /* If both the top of the 387 stack dies, and the other operand
7285 is also a stack register that dies, then this must be a
7286 `fcompp' float compare */
7287
7288 if (eflags_p == 1)
7289 {
7290 /* There is no double popping fcomi variant. Fortunately,
7291 eflags is immune from the fstp's cc clobbering. */
7292 if (unordered_p)
7293 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
7294 else
7295 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
7296 return TARGET_USE_FFREEP ? "ffreep\t%y0" : "fstp\t%y0";
7297 }
7298 else
7299 {
7300 if (eflags_p == 2)
7301 {
7302 if (unordered_p)
7303 return "fucompp\n\tfnstsw\t%0";
7304 else
7305 return "fcompp\n\tfnstsw\t%0";
7306 }
7307 else
7308 {
7309 if (unordered_p)
7310 return "fucompp";
7311 else
7312 return "fcompp";
7313 }
7314 }
7315 }
7316 else
7317 {
7318 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
7319
7320 static const char * const alt[24] =
7321 {
7322 "fcom%z1\t%y1",
7323 "fcomp%z1\t%y1",
7324 "fucom%z1\t%y1",
7325 "fucomp%z1\t%y1",
7326
7327 "ficom%z1\t%y1",
7328 "ficomp%z1\t%y1",
7329 NULL,
7330 NULL,
7331
7332 "fcomi\t{%y1, %0|%0, %y1}",
7333 "fcomip\t{%y1, %0|%0, %y1}",
7334 "fucomi\t{%y1, %0|%0, %y1}",
7335 "fucomip\t{%y1, %0|%0, %y1}",
7336
7337 NULL,
7338 NULL,
7339 NULL,
7340 NULL,
7341
7342 "fcom%z2\t%y2\n\tfnstsw\t%0",
7343 "fcomp%z2\t%y2\n\tfnstsw\t%0",
7344 "fucom%z2\t%y2\n\tfnstsw\t%0",
7345 "fucomp%z2\t%y2\n\tfnstsw\t%0",
7346
7347 "ficom%z2\t%y2\n\tfnstsw\t%0",
7348 "ficomp%z2\t%y2\n\tfnstsw\t%0",
7349 NULL,
7350 NULL
7351 };
7352
7353 int mask;
7354 const char *ret;
7355
7356 mask = eflags_p << 3;
7357 mask |= (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT) << 2;
7358 mask |= unordered_p << 1;
7359 mask |= stack_top_dies;
7360
7361 if (mask >= 24)
7362 abort ();
7363 ret = alt[mask];
7364 if (ret == NULL)
7365 abort ();
7366
7367 return ret;
7368 }
7369 }
7370
7371 void
7372 ix86_output_addr_vec_elt (FILE *file, int value)
7373 {
7374 const char *directive = ASM_LONG;
7375
7376 if (TARGET_64BIT)
7377 {
7378 #ifdef ASM_QUAD
7379 directive = ASM_QUAD;
7380 #else
7381 abort ();
7382 #endif
7383 }
7384
7385 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
7386 }
7387
7388 void
7389 ix86_output_addr_diff_elt (FILE *file, int value, int rel)
7390 {
7391 if (TARGET_64BIT)
7392 fprintf (file, "%s%s%d-%s%d\n",
7393 ASM_LONG, LPREFIX, value, LPREFIX, rel);
7394 else if (HAVE_AS_GOTOFF_IN_DATA)
7395 fprintf (file, "%s%s%d@GOTOFF\n", ASM_LONG, LPREFIX, value);
7396 #if TARGET_MACHO
7397 else if (TARGET_MACHO)
7398 {
7399 fprintf (file, "%s%s%d-", ASM_LONG, LPREFIX, value);
7400 machopic_output_function_base_name (file);
7401 fprintf(file, "\n");
7402 }
7403 #endif
7404 else
7405 asm_fprintf (file, "%s%U%s+[.-%s%d]\n",
7406 ASM_LONG, GOT_SYMBOL_NAME, LPREFIX, value);
7407 }
7408 \f
7409 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
7410 for the target. */
7411
7412 void
7413 ix86_expand_clear (rtx dest)
7414 {
7415 rtx tmp;
7416
7417 /* We play register width games, which are only valid after reload. */
7418 if (!reload_completed)
7419 abort ();
7420
7421 /* Avoid HImode and its attendant prefix byte. */
7422 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
7423 dest = gen_rtx_REG (SImode, REGNO (dest));
7424
7425 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
7426
7427 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
7428 if (reload_completed && (!TARGET_USE_MOV0 || optimize_size))
7429 {
7430 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, 17));
7431 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
7432 }
7433
7434 emit_insn (tmp);
7435 }
7436
7437 /* X is an unchanging MEM. If it is a constant pool reference, return
7438 the constant pool rtx, else NULL. */
7439
7440 rtx
7441 maybe_get_pool_constant (rtx x)
7442 {
7443 x = ix86_delegitimize_address (XEXP (x, 0));
7444
7445 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
7446 return get_pool_constant (x);
7447
7448 return NULL_RTX;
7449 }
7450
7451 void
7452 ix86_expand_move (enum machine_mode mode, rtx operands[])
7453 {
7454 int strict = (reload_in_progress || reload_completed);
7455 rtx op0, op1;
7456 enum tls_model model;
7457
7458 op0 = operands[0];
7459 op1 = operands[1];
7460
7461 model = GET_CODE (op1) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (op1) : 0;
7462 if (model)
7463 {
7464 op1 = legitimize_tls_address (op1, model, true);
7465 op1 = force_operand (op1, op0);
7466 if (op1 == op0)
7467 return;
7468 }
7469
7470 if (flag_pic && mode == Pmode && symbolic_operand (op1, Pmode))
7471 {
7472 #if TARGET_MACHO
7473 if (MACHOPIC_PURE)
7474 {
7475 rtx temp = ((reload_in_progress
7476 || ((op0 && GET_CODE (op0) == REG)
7477 && mode == Pmode))
7478 ? op0 : gen_reg_rtx (Pmode));
7479 op1 = machopic_indirect_data_reference (op1, temp);
7480 op1 = machopic_legitimize_pic_address (op1, mode,
7481 temp == op1 ? 0 : temp);
7482 }
7483 else if (MACHOPIC_INDIRECT)
7484 op1 = machopic_indirect_data_reference (op1, 0);
7485 if (op0 == op1)
7486 return;
7487 #else
7488 if (GET_CODE (op0) == MEM)
7489 op1 = force_reg (Pmode, op1);
7490 else
7491 op1 = legitimize_address (op1, op1, Pmode);
7492 #endif /* TARGET_MACHO */
7493 }
7494 else
7495 {
7496 if (GET_CODE (op0) == MEM
7497 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
7498 || !push_operand (op0, mode))
7499 && GET_CODE (op1) == MEM)
7500 op1 = force_reg (mode, op1);
7501
7502 if (push_operand (op0, mode)
7503 && ! general_no_elim_operand (op1, mode))
7504 op1 = copy_to_mode_reg (mode, op1);
7505
7506 /* Force large constants in 64bit compilation into register
7507 to get them CSEed. */
7508 if (TARGET_64BIT && mode == DImode
7509 && immediate_operand (op1, mode)
7510 && !x86_64_zext_immediate_operand (op1, VOIDmode)
7511 && !register_operand (op0, mode)
7512 && optimize && !reload_completed && !reload_in_progress)
7513 op1 = copy_to_mode_reg (mode, op1);
7514
7515 if (FLOAT_MODE_P (mode))
7516 {
7517 /* If we are loading a floating point constant to a register,
7518 force the value to memory now, since we'll get better code
7519 out the back end. */
7520
7521 if (strict)
7522 ;
7523 else if (GET_CODE (op1) == CONST_DOUBLE)
7524 {
7525 op1 = validize_mem (force_const_mem (mode, op1));
7526 if (!register_operand (op0, mode))
7527 {
7528 rtx temp = gen_reg_rtx (mode);
7529 emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
7530 emit_move_insn (op0, temp);
7531 return;
7532 }
7533 }
7534 }
7535 }
7536
7537 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
7538 }
7539
7540 void
7541 ix86_expand_vector_move (enum machine_mode mode, rtx operands[])
7542 {
7543 /* Force constants other than zero into memory. We do not know how
7544 the instructions used to build constants modify the upper 64 bits
7545 of the register, once we have that information we may be able
7546 to handle some of them more efficiently. */
7547 if ((reload_in_progress | reload_completed) == 0
7548 && register_operand (operands[0], mode)
7549 && CONSTANT_P (operands[1]) && operands[1] != CONST0_RTX (mode))
7550 operands[1] = validize_mem (force_const_mem (mode, operands[1]));
7551
7552 /* Make operand1 a register if it isn't already. */
7553 if (!no_new_pseudos
7554 && !register_operand (operands[0], mode)
7555 && !register_operand (operands[1], mode))
7556 {
7557 rtx temp = force_reg (GET_MODE (operands[1]), operands[1]);
7558 emit_move_insn (operands[0], temp);
7559 return;
7560 }
7561
7562 emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[1]));
7563 }
7564
7565 /* Attempt to expand a binary operator. Make the expansion closer to the
7566 actual machine, then just general_operand, which will allow 3 separate
7567 memory references (one output, two input) in a single insn. */
7568
7569 void
7570 ix86_expand_binary_operator (enum rtx_code code, enum machine_mode mode,
7571 rtx operands[])
7572 {
7573 int matching_memory;
7574 rtx src1, src2, dst, op, clob;
7575
7576 dst = operands[0];
7577 src1 = operands[1];
7578 src2 = operands[2];
7579
7580 /* Recognize <var1> = <value> <op> <var1> for commutative operators */
7581 if (GET_RTX_CLASS (code) == RTX_COMM_ARITH
7582 && (rtx_equal_p (dst, src2)
7583 || immediate_operand (src1, mode)))
7584 {
7585 rtx temp = src1;
7586 src1 = src2;
7587 src2 = temp;
7588 }
7589
7590 /* If the destination is memory, and we do not have matching source
7591 operands, do things in registers. */
7592 matching_memory = 0;
7593 if (GET_CODE (dst) == MEM)
7594 {
7595 if (rtx_equal_p (dst, src1))
7596 matching_memory = 1;
7597 else if (GET_RTX_CLASS (code) == RTX_COMM_ARITH
7598 && rtx_equal_p (dst, src2))
7599 matching_memory = 2;
7600 else
7601 dst = gen_reg_rtx (mode);
7602 }
7603
7604 /* Both source operands cannot be in memory. */
7605 if (GET_CODE (src1) == MEM && GET_CODE (src2) == MEM)
7606 {
7607 if (matching_memory != 2)
7608 src2 = force_reg (mode, src2);
7609 else
7610 src1 = force_reg (mode, src1);
7611 }
7612
7613 /* If the operation is not commutable, source 1 cannot be a constant
7614 or non-matching memory. */
7615 if ((CONSTANT_P (src1)
7616 || (!matching_memory && GET_CODE (src1) == MEM))
7617 && GET_RTX_CLASS (code) != RTX_COMM_ARITH)
7618 src1 = force_reg (mode, src1);
7619
7620 /* If optimizing, copy to regs to improve CSE */
7621 if (optimize && ! no_new_pseudos)
7622 {
7623 if (GET_CODE (dst) == MEM)
7624 dst = gen_reg_rtx (mode);
7625 if (GET_CODE (src1) == MEM)
7626 src1 = force_reg (mode, src1);
7627 if (GET_CODE (src2) == MEM)
7628 src2 = force_reg (mode, src2);
7629 }
7630
7631 /* Emit the instruction. */
7632
7633 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
7634 if (reload_in_progress)
7635 {
7636 /* Reload doesn't know about the flags register, and doesn't know that
7637 it doesn't want to clobber it. We can only do this with PLUS. */
7638 if (code != PLUS)
7639 abort ();
7640 emit_insn (op);
7641 }
7642 else
7643 {
7644 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
7645 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
7646 }
7647
7648 /* Fix up the destination if needed. */
7649 if (dst != operands[0])
7650 emit_move_insn (operands[0], dst);
7651 }
7652
7653 /* Return TRUE or FALSE depending on whether the binary operator meets the
7654 appropriate constraints. */
7655
7656 int
7657 ix86_binary_operator_ok (enum rtx_code code,
7658 enum machine_mode mode ATTRIBUTE_UNUSED,
7659 rtx operands[3])
7660 {
7661 /* Both source operands cannot be in memory. */
7662 if (GET_CODE (operands[1]) == MEM && GET_CODE (operands[2]) == MEM)
7663 return 0;
7664 /* If the operation is not commutable, source 1 cannot be a constant. */
7665 if (CONSTANT_P (operands[1]) && GET_RTX_CLASS (code) != RTX_COMM_ARITH)
7666 return 0;
7667 /* If the destination is memory, we must have a matching source operand. */
7668 if (GET_CODE (operands[0]) == MEM
7669 && ! (rtx_equal_p (operands[0], operands[1])
7670 || (GET_RTX_CLASS (code) == RTX_COMM_ARITH
7671 && rtx_equal_p (operands[0], operands[2]))))
7672 return 0;
7673 /* If the operation is not commutable and the source 1 is memory, we must
7674 have a matching destination. */
7675 if (GET_CODE (operands[1]) == MEM
7676 && GET_RTX_CLASS (code) != RTX_COMM_ARITH
7677 && ! rtx_equal_p (operands[0], operands[1]))
7678 return 0;
7679 return 1;
7680 }
7681
7682 /* Attempt to expand a unary operator. Make the expansion closer to the
7683 actual machine, then just general_operand, which will allow 2 separate
7684 memory references (one output, one input) in a single insn. */
7685
7686 void
7687 ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode,
7688 rtx operands[])
7689 {
7690 int matching_memory;
7691 rtx src, dst, op, clob;
7692
7693 dst = operands[0];
7694 src = operands[1];
7695
7696 /* If the destination is memory, and we do not have matching source
7697 operands, do things in registers. */
7698 matching_memory = 0;
7699 if (GET_CODE (dst) == MEM)
7700 {
7701 if (rtx_equal_p (dst, src))
7702 matching_memory = 1;
7703 else
7704 dst = gen_reg_rtx (mode);
7705 }
7706
7707 /* When source operand is memory, destination must match. */
7708 if (!matching_memory && GET_CODE (src) == MEM)
7709 src = force_reg (mode, src);
7710
7711 /* If optimizing, copy to regs to improve CSE */
7712 if (optimize && ! no_new_pseudos)
7713 {
7714 if (GET_CODE (dst) == MEM)
7715 dst = gen_reg_rtx (mode);
7716 if (GET_CODE (src) == MEM)
7717 src = force_reg (mode, src);
7718 }
7719
7720 /* Emit the instruction. */
7721
7722 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
7723 if (reload_in_progress || code == NOT)
7724 {
7725 /* Reload doesn't know about the flags register, and doesn't know that
7726 it doesn't want to clobber it. */
7727 if (code != NOT)
7728 abort ();
7729 emit_insn (op);
7730 }
7731 else
7732 {
7733 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
7734 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
7735 }
7736
7737 /* Fix up the destination if needed. */
7738 if (dst != operands[0])
7739 emit_move_insn (operands[0], dst);
7740 }
7741
7742 /* Return TRUE or FALSE depending on whether the unary operator meets the
7743 appropriate constraints. */
7744
7745 int
7746 ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED,
7747 enum machine_mode mode ATTRIBUTE_UNUSED,
7748 rtx operands[2] ATTRIBUTE_UNUSED)
7749 {
7750 /* If one of operands is memory, source and destination must match. */
7751 if ((GET_CODE (operands[0]) == MEM
7752 || GET_CODE (operands[1]) == MEM)
7753 && ! rtx_equal_p (operands[0], operands[1]))
7754 return FALSE;
7755 return TRUE;
7756 }
7757
7758 /* Return TRUE or FALSE depending on whether the first SET in INSN
7759 has source and destination with matching CC modes, and that the
7760 CC mode is at least as constrained as REQ_MODE. */
7761
7762 int
7763 ix86_match_ccmode (rtx insn, enum machine_mode req_mode)
7764 {
7765 rtx set;
7766 enum machine_mode set_mode;
7767
7768 set = PATTERN (insn);
7769 if (GET_CODE (set) == PARALLEL)
7770 set = XVECEXP (set, 0, 0);
7771 if (GET_CODE (set) != SET)
7772 abort ();
7773 if (GET_CODE (SET_SRC (set)) != COMPARE)
7774 abort ();
7775
7776 set_mode = GET_MODE (SET_DEST (set));
7777 switch (set_mode)
7778 {
7779 case CCNOmode:
7780 if (req_mode != CCNOmode
7781 && (req_mode != CCmode
7782 || XEXP (SET_SRC (set), 1) != const0_rtx))
7783 return 0;
7784 break;
7785 case CCmode:
7786 if (req_mode == CCGCmode)
7787 return 0;
7788 /* FALLTHRU */
7789 case CCGCmode:
7790 if (req_mode == CCGOCmode || req_mode == CCNOmode)
7791 return 0;
7792 /* FALLTHRU */
7793 case CCGOCmode:
7794 if (req_mode == CCZmode)
7795 return 0;
7796 /* FALLTHRU */
7797 case CCZmode:
7798 break;
7799
7800 default:
7801 abort ();
7802 }
7803
7804 return (GET_MODE (SET_SRC (set)) == set_mode);
7805 }
7806
7807 /* Generate insn patterns to do an integer compare of OPERANDS. */
7808
7809 static rtx
7810 ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
7811 {
7812 enum machine_mode cmpmode;
7813 rtx tmp, flags;
7814
7815 cmpmode = SELECT_CC_MODE (code, op0, op1);
7816 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
7817
7818 /* This is very simple, but making the interface the same as in the
7819 FP case makes the rest of the code easier. */
7820 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
7821 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
7822
7823 /* Return the test that should be put into the flags user, i.e.
7824 the bcc, scc, or cmov instruction. */
7825 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
7826 }
7827
7828 /* Figure out whether to use ordered or unordered fp comparisons.
7829 Return the appropriate mode to use. */
7830
7831 enum machine_mode
7832 ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED)
7833 {
7834 /* ??? In order to make all comparisons reversible, we do all comparisons
7835 non-trapping when compiling for IEEE. Once gcc is able to distinguish
7836 all forms trapping and nontrapping comparisons, we can make inequality
7837 comparisons trapping again, since it results in better code when using
7838 FCOM based compares. */
7839 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
7840 }
7841
7842 enum machine_mode
7843 ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
7844 {
7845 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
7846 return ix86_fp_compare_mode (code);
7847 switch (code)
7848 {
7849 /* Only zero flag is needed. */
7850 case EQ: /* ZF=0 */
7851 case NE: /* ZF!=0 */
7852 return CCZmode;
7853 /* Codes needing carry flag. */
7854 case GEU: /* CF=0 */
7855 case GTU: /* CF=0 & ZF=0 */
7856 case LTU: /* CF=1 */
7857 case LEU: /* CF=1 | ZF=1 */
7858 return CCmode;
7859 /* Codes possibly doable only with sign flag when
7860 comparing against zero. */
7861 case GE: /* SF=OF or SF=0 */
7862 case LT: /* SF<>OF or SF=1 */
7863 if (op1 == const0_rtx)
7864 return CCGOCmode;
7865 else
7866 /* For other cases Carry flag is not required. */
7867 return CCGCmode;
7868 /* Codes doable only with sign flag when comparing
7869 against zero, but we miss jump instruction for it
7870 so we need to use relational tests against overflow
7871 that thus needs to be zero. */
7872 case GT: /* ZF=0 & SF=OF */
7873 case LE: /* ZF=1 | SF<>OF */
7874 if (op1 == const0_rtx)
7875 return CCNOmode;
7876 else
7877 return CCGCmode;
7878 /* strcmp pattern do (use flags) and combine may ask us for proper
7879 mode. */
7880 case USE:
7881 return CCmode;
7882 default:
7883 abort ();
7884 }
7885 }
7886
7887 /* Return the fixed registers used for condition codes. */
7888
7889 static bool
7890 ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
7891 {
7892 *p1 = FLAGS_REG;
7893 *p2 = FPSR_REG;
7894 return true;
7895 }
7896
7897 /* If two condition code modes are compatible, return a condition code
7898 mode which is compatible with both. Otherwise, return
7899 VOIDmode. */
7900
7901 static enum machine_mode
7902 ix86_cc_modes_compatible (enum machine_mode m1, enum machine_mode m2)
7903 {
7904 if (m1 == m2)
7905 return m1;
7906
7907 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
7908 return VOIDmode;
7909
7910 if ((m1 == CCGCmode && m2 == CCGOCmode)
7911 || (m1 == CCGOCmode && m2 == CCGCmode))
7912 return CCGCmode;
7913
7914 switch (m1)
7915 {
7916 default:
7917 abort ();
7918
7919 case CCmode:
7920 case CCGCmode:
7921 case CCGOCmode:
7922 case CCNOmode:
7923 case CCZmode:
7924 switch (m2)
7925 {
7926 default:
7927 return VOIDmode;
7928
7929 case CCmode:
7930 case CCGCmode:
7931 case CCGOCmode:
7932 case CCNOmode:
7933 case CCZmode:
7934 return CCmode;
7935 }
7936
7937 case CCFPmode:
7938 case CCFPUmode:
7939 /* These are only compatible with themselves, which we already
7940 checked above. */
7941 return VOIDmode;
7942 }
7943 }
7944
7945 /* Return true if we should use an FCOMI instruction for this fp comparison. */
7946
7947 int
7948 ix86_use_fcomi_compare (enum rtx_code code ATTRIBUTE_UNUSED)
7949 {
7950 enum rtx_code swapped_code = swap_condition (code);
7951 return ((ix86_fp_comparison_cost (code) == ix86_fp_comparison_fcomi_cost (code))
7952 || (ix86_fp_comparison_cost (swapped_code)
7953 == ix86_fp_comparison_fcomi_cost (swapped_code)));
7954 }
7955
7956 /* Swap, force into registers, or otherwise massage the two operands
7957 to a fp comparison. The operands are updated in place; the new
7958 comparison code is returned. */
7959
7960 static enum rtx_code
7961 ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
7962 {
7963 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
7964 rtx op0 = *pop0, op1 = *pop1;
7965 enum machine_mode op_mode = GET_MODE (op0);
7966 int is_sse = SSE_REG_P (op0) | SSE_REG_P (op1);
7967
7968 /* All of the unordered compare instructions only work on registers.
7969 The same is true of the fcomi compare instructions. The same is
7970 true of the XFmode compare instructions if not comparing with
7971 zero (ftst insn is used in this case). */
7972
7973 if (!is_sse
7974 && (fpcmp_mode == CCFPUmode
7975 || (op_mode == XFmode
7976 && ! (standard_80387_constant_p (op0) == 1
7977 || standard_80387_constant_p (op1) == 1))
7978 || ix86_use_fcomi_compare (code)))
7979 {
7980 op0 = force_reg (op_mode, op0);
7981 op1 = force_reg (op_mode, op1);
7982 }
7983 else
7984 {
7985 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
7986 things around if they appear profitable, otherwise force op0
7987 into a register. */
7988
7989 if (standard_80387_constant_p (op0) == 0
7990 || (GET_CODE (op0) == MEM
7991 && ! (standard_80387_constant_p (op1) == 0
7992 || GET_CODE (op1) == MEM)))
7993 {
7994 rtx tmp;
7995 tmp = op0, op0 = op1, op1 = tmp;
7996 code = swap_condition (code);
7997 }
7998
7999 if (GET_CODE (op0) != REG)
8000 op0 = force_reg (op_mode, op0);
8001
8002 if (CONSTANT_P (op1))
8003 {
8004 int tmp = standard_80387_constant_p (op1);
8005 if (tmp == 0)
8006 op1 = validize_mem (force_const_mem (op_mode, op1));
8007 else if (tmp == 1)
8008 {
8009 if (TARGET_CMOVE)
8010 op1 = force_reg (op_mode, op1);
8011 }
8012 else
8013 op1 = force_reg (op_mode, op1);
8014 }
8015 }
8016
8017 /* Try to rearrange the comparison to make it cheaper. */
8018 if (ix86_fp_comparison_cost (code)
8019 > ix86_fp_comparison_cost (swap_condition (code))
8020 && (GET_CODE (op1) == REG || !no_new_pseudos))
8021 {
8022 rtx tmp;
8023 tmp = op0, op0 = op1, op1 = tmp;
8024 code = swap_condition (code);
8025 if (GET_CODE (op0) != REG)
8026 op0 = force_reg (op_mode, op0);
8027 }
8028
8029 *pop0 = op0;
8030 *pop1 = op1;
8031 return code;
8032 }
8033
8034 /* Convert comparison codes we use to represent FP comparison to integer
8035 code that will result in proper branch. Return UNKNOWN if no such code
8036 is available. */
8037
8038 enum rtx_code
8039 ix86_fp_compare_code_to_integer (enum rtx_code code)
8040 {
8041 switch (code)
8042 {
8043 case GT:
8044 return GTU;
8045 case GE:
8046 return GEU;
8047 case ORDERED:
8048 case UNORDERED:
8049 return code;
8050 break;
8051 case UNEQ:
8052 return EQ;
8053 break;
8054 case UNLT:
8055 return LTU;
8056 break;
8057 case UNLE:
8058 return LEU;
8059 break;
8060 case LTGT:
8061 return NE;
8062 break;
8063 default:
8064 return UNKNOWN;
8065 }
8066 }
8067
8068 /* Split comparison code CODE into comparisons we can do using branch
8069 instructions. BYPASS_CODE is comparison code for branch that will
8070 branch around FIRST_CODE and SECOND_CODE. If some of branches
8071 is not required, set value to UNKNOWN.
8072 We never require more than two branches. */
8073
8074 void
8075 ix86_fp_comparison_codes (enum rtx_code code, enum rtx_code *bypass_code,
8076 enum rtx_code *first_code,
8077 enum rtx_code *second_code)
8078 {
8079 *first_code = code;
8080 *bypass_code = UNKNOWN;
8081 *second_code = UNKNOWN;
8082
8083 /* The fcomi comparison sets flags as follows:
8084
8085 cmp ZF PF CF
8086 > 0 0 0
8087 < 0 0 1
8088 = 1 0 0
8089 un 1 1 1 */
8090
8091 switch (code)
8092 {
8093 case GT: /* GTU - CF=0 & ZF=0 */
8094 case GE: /* GEU - CF=0 */
8095 case ORDERED: /* PF=0 */
8096 case UNORDERED: /* PF=1 */
8097 case UNEQ: /* EQ - ZF=1 */
8098 case UNLT: /* LTU - CF=1 */
8099 case UNLE: /* LEU - CF=1 | ZF=1 */
8100 case LTGT: /* EQ - ZF=0 */
8101 break;
8102 case LT: /* LTU - CF=1 - fails on unordered */
8103 *first_code = UNLT;
8104 *bypass_code = UNORDERED;
8105 break;
8106 case LE: /* LEU - CF=1 | ZF=1 - fails on unordered */
8107 *first_code = UNLE;
8108 *bypass_code = UNORDERED;
8109 break;
8110 case EQ: /* EQ - ZF=1 - fails on unordered */
8111 *first_code = UNEQ;
8112 *bypass_code = UNORDERED;
8113 break;
8114 case NE: /* NE - ZF=0 - fails on unordered */
8115 *first_code = LTGT;
8116 *second_code = UNORDERED;
8117 break;
8118 case UNGE: /* GEU - CF=0 - fails on unordered */
8119 *first_code = GE;
8120 *second_code = UNORDERED;
8121 break;
8122 case UNGT: /* GTU - CF=0 & ZF=0 - fails on unordered */
8123 *first_code = GT;
8124 *second_code = UNORDERED;
8125 break;
8126 default:
8127 abort ();
8128 }
8129 if (!TARGET_IEEE_FP)
8130 {
8131 *second_code = UNKNOWN;
8132 *bypass_code = UNKNOWN;
8133 }
8134 }
8135
8136 /* Return cost of comparison done fcom + arithmetics operations on AX.
8137 All following functions do use number of instructions as a cost metrics.
8138 In future this should be tweaked to compute bytes for optimize_size and
8139 take into account performance of various instructions on various CPUs. */
8140 static int
8141 ix86_fp_comparison_arithmetics_cost (enum rtx_code code)
8142 {
8143 if (!TARGET_IEEE_FP)
8144 return 4;
8145 /* The cost of code output by ix86_expand_fp_compare. */
8146 switch (code)
8147 {
8148 case UNLE:
8149 case UNLT:
8150 case LTGT:
8151 case GT:
8152 case GE:
8153 case UNORDERED:
8154 case ORDERED:
8155 case UNEQ:
8156 return 4;
8157 break;
8158 case LT:
8159 case NE:
8160 case EQ:
8161 case UNGE:
8162 return 5;
8163 break;
8164 case LE:
8165 case UNGT:
8166 return 6;
8167 break;
8168 default:
8169 abort ();
8170 }
8171 }
8172
8173 /* Return cost of comparison done using fcomi operation.
8174 See ix86_fp_comparison_arithmetics_cost for the metrics. */
8175 static int
8176 ix86_fp_comparison_fcomi_cost (enum rtx_code code)
8177 {
8178 enum rtx_code bypass_code, first_code, second_code;
8179 /* Return arbitrarily high cost when instruction is not supported - this
8180 prevents gcc from using it. */
8181 if (!TARGET_CMOVE)
8182 return 1024;
8183 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8184 return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 2;
8185 }
8186
8187 /* Return cost of comparison done using sahf operation.
8188 See ix86_fp_comparison_arithmetics_cost for the metrics. */
8189 static int
8190 ix86_fp_comparison_sahf_cost (enum rtx_code code)
8191 {
8192 enum rtx_code bypass_code, first_code, second_code;
8193 /* Return arbitrarily high cost when instruction is not preferred - this
8194 avoids gcc from using it. */
8195 if (!TARGET_USE_SAHF && !optimize_size)
8196 return 1024;
8197 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8198 return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 3;
8199 }
8200
8201 /* Compute cost of the comparison done using any method.
8202 See ix86_fp_comparison_arithmetics_cost for the metrics. */
8203 static int
8204 ix86_fp_comparison_cost (enum rtx_code code)
8205 {
8206 int fcomi_cost, sahf_cost, arithmetics_cost = 1024;
8207 int min;
8208
8209 fcomi_cost = ix86_fp_comparison_fcomi_cost (code);
8210 sahf_cost = ix86_fp_comparison_sahf_cost (code);
8211
8212 min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code);
8213 if (min > sahf_cost)
8214 min = sahf_cost;
8215 if (min > fcomi_cost)
8216 min = fcomi_cost;
8217 return min;
8218 }
8219
8220 /* Generate insn patterns to do a floating point compare of OPERANDS. */
8221
8222 static rtx
8223 ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch,
8224 rtx *second_test, rtx *bypass_test)
8225 {
8226 enum machine_mode fpcmp_mode, intcmp_mode;
8227 rtx tmp, tmp2;
8228 int cost = ix86_fp_comparison_cost (code);
8229 enum rtx_code bypass_code, first_code, second_code;
8230
8231 fpcmp_mode = ix86_fp_compare_mode (code);
8232 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
8233
8234 if (second_test)
8235 *second_test = NULL_RTX;
8236 if (bypass_test)
8237 *bypass_test = NULL_RTX;
8238
8239 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8240
8241 /* Do fcomi/sahf based test when profitable. */
8242 if ((bypass_code == UNKNOWN || bypass_test)
8243 && (second_code == UNKNOWN || second_test)
8244 && ix86_fp_comparison_arithmetics_cost (code) > cost)
8245 {
8246 if (TARGET_CMOVE)
8247 {
8248 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
8249 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
8250 tmp);
8251 emit_insn (tmp);
8252 }
8253 else
8254 {
8255 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
8256 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
8257 if (!scratch)
8258 scratch = gen_reg_rtx (HImode);
8259 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
8260 emit_insn (gen_x86_sahf_1 (scratch));
8261 }
8262
8263 /* The FP codes work out to act like unsigned. */
8264 intcmp_mode = fpcmp_mode;
8265 code = first_code;
8266 if (bypass_code != UNKNOWN)
8267 *bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode,
8268 gen_rtx_REG (intcmp_mode, FLAGS_REG),
8269 const0_rtx);
8270 if (second_code != UNKNOWN)
8271 *second_test = gen_rtx_fmt_ee (second_code, VOIDmode,
8272 gen_rtx_REG (intcmp_mode, FLAGS_REG),
8273 const0_rtx);
8274 }
8275 else
8276 {
8277 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
8278 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
8279 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
8280 if (!scratch)
8281 scratch = gen_reg_rtx (HImode);
8282 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
8283
8284 /* In the unordered case, we have to check C2 for NaN's, which
8285 doesn't happen to work out to anything nice combination-wise.
8286 So do some bit twiddling on the value we've got in AH to come
8287 up with an appropriate set of condition codes. */
8288
8289 intcmp_mode = CCNOmode;
8290 switch (code)
8291 {
8292 case GT:
8293 case UNGT:
8294 if (code == GT || !TARGET_IEEE_FP)
8295 {
8296 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
8297 code = EQ;
8298 }
8299 else
8300 {
8301 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8302 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
8303 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
8304 intcmp_mode = CCmode;
8305 code = GEU;
8306 }
8307 break;
8308 case LT:
8309 case UNLT:
8310 if (code == LT && TARGET_IEEE_FP)
8311 {
8312 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8313 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01)));
8314 intcmp_mode = CCmode;
8315 code = EQ;
8316 }
8317 else
8318 {
8319 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01)));
8320 code = NE;
8321 }
8322 break;
8323 case GE:
8324 case UNGE:
8325 if (code == GE || !TARGET_IEEE_FP)
8326 {
8327 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
8328 code = EQ;
8329 }
8330 else
8331 {
8332 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8333 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
8334 GEN_INT (0x01)));
8335 code = NE;
8336 }
8337 break;
8338 case LE:
8339 case UNLE:
8340 if (code == LE && TARGET_IEEE_FP)
8341 {
8342 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8343 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
8344 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
8345 intcmp_mode = CCmode;
8346 code = LTU;
8347 }
8348 else
8349 {
8350 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
8351 code = NE;
8352 }
8353 break;
8354 case EQ:
8355 case UNEQ:
8356 if (code == EQ && TARGET_IEEE_FP)
8357 {
8358 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8359 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
8360 intcmp_mode = CCmode;
8361 code = EQ;
8362 }
8363 else
8364 {
8365 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
8366 code = NE;
8367 break;
8368 }
8369 break;
8370 case NE:
8371 case LTGT:
8372 if (code == NE && TARGET_IEEE_FP)
8373 {
8374 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8375 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
8376 GEN_INT (0x40)));
8377 code = NE;
8378 }
8379 else
8380 {
8381 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
8382 code = EQ;
8383 }
8384 break;
8385
8386 case UNORDERED:
8387 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
8388 code = NE;
8389 break;
8390 case ORDERED:
8391 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
8392 code = EQ;
8393 break;
8394
8395 default:
8396 abort ();
8397 }
8398 }
8399
8400 /* Return the test that should be put into the flags user, i.e.
8401 the bcc, scc, or cmov instruction. */
8402 return gen_rtx_fmt_ee (code, VOIDmode,
8403 gen_rtx_REG (intcmp_mode, FLAGS_REG),
8404 const0_rtx);
8405 }
8406
8407 rtx
8408 ix86_expand_compare (enum rtx_code code, rtx *second_test, rtx *bypass_test)
8409 {
8410 rtx op0, op1, ret;
8411 op0 = ix86_compare_op0;
8412 op1 = ix86_compare_op1;
8413
8414 if (second_test)
8415 *second_test = NULL_RTX;
8416 if (bypass_test)
8417 *bypass_test = NULL_RTX;
8418
8419 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
8420 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
8421 second_test, bypass_test);
8422 else
8423 ret = ix86_expand_int_compare (code, op0, op1);
8424
8425 return ret;
8426 }
8427
8428 /* Return true if the CODE will result in nontrivial jump sequence. */
8429 bool
8430 ix86_fp_jump_nontrivial_p (enum rtx_code code)
8431 {
8432 enum rtx_code bypass_code, first_code, second_code;
8433 if (!TARGET_CMOVE)
8434 return true;
8435 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8436 return bypass_code != UNKNOWN || second_code != UNKNOWN;
8437 }
8438
8439 void
8440 ix86_expand_branch (enum rtx_code code, rtx label)
8441 {
8442 rtx tmp;
8443
8444 switch (GET_MODE (ix86_compare_op0))
8445 {
8446 case QImode:
8447 case HImode:
8448 case SImode:
8449 simple:
8450 tmp = ix86_expand_compare (code, NULL, NULL);
8451 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
8452 gen_rtx_LABEL_REF (VOIDmode, label),
8453 pc_rtx);
8454 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
8455 return;
8456
8457 case SFmode:
8458 case DFmode:
8459 case XFmode:
8460 {
8461 rtvec vec;
8462 int use_fcomi;
8463 enum rtx_code bypass_code, first_code, second_code;
8464
8465 code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0,
8466 &ix86_compare_op1);
8467
8468 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8469
8470 /* Check whether we will use the natural sequence with one jump. If
8471 so, we can expand jump early. Otherwise delay expansion by
8472 creating compound insn to not confuse optimizers. */
8473 if (bypass_code == UNKNOWN && second_code == UNKNOWN
8474 && TARGET_CMOVE)
8475 {
8476 ix86_split_fp_branch (code, ix86_compare_op0, ix86_compare_op1,
8477 gen_rtx_LABEL_REF (VOIDmode, label),
8478 pc_rtx, NULL_RTX);
8479 }
8480 else
8481 {
8482 tmp = gen_rtx_fmt_ee (code, VOIDmode,
8483 ix86_compare_op0, ix86_compare_op1);
8484 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
8485 gen_rtx_LABEL_REF (VOIDmode, label),
8486 pc_rtx);
8487 tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp);
8488
8489 use_fcomi = ix86_use_fcomi_compare (code);
8490 vec = rtvec_alloc (3 + !use_fcomi);
8491 RTVEC_ELT (vec, 0) = tmp;
8492 RTVEC_ELT (vec, 1)
8493 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 18));
8494 RTVEC_ELT (vec, 2)
8495 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 17));
8496 if (! use_fcomi)
8497 RTVEC_ELT (vec, 3)
8498 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode));
8499
8500 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec));
8501 }
8502 return;
8503 }
8504
8505 case DImode:
8506 if (TARGET_64BIT)
8507 goto simple;
8508 /* Expand DImode branch into multiple compare+branch. */
8509 {
8510 rtx lo[2], hi[2], label2;
8511 enum rtx_code code1, code2, code3;
8512
8513 if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
8514 {
8515 tmp = ix86_compare_op0;
8516 ix86_compare_op0 = ix86_compare_op1;
8517 ix86_compare_op1 = tmp;
8518 code = swap_condition (code);
8519 }
8520 split_di (&ix86_compare_op0, 1, lo+0, hi+0);
8521 split_di (&ix86_compare_op1, 1, lo+1, hi+1);
8522
8523 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
8524 avoid two branches. This costs one extra insn, so disable when
8525 optimizing for size. */
8526
8527 if ((code == EQ || code == NE)
8528 && (!optimize_size
8529 || hi[1] == const0_rtx || lo[1] == const0_rtx))
8530 {
8531 rtx xor0, xor1;
8532
8533 xor1 = hi[0];
8534 if (hi[1] != const0_rtx)
8535 xor1 = expand_binop (SImode, xor_optab, xor1, hi[1],
8536 NULL_RTX, 0, OPTAB_WIDEN);
8537
8538 xor0 = lo[0];
8539 if (lo[1] != const0_rtx)
8540 xor0 = expand_binop (SImode, xor_optab, xor0, lo[1],
8541 NULL_RTX, 0, OPTAB_WIDEN);
8542
8543 tmp = expand_binop (SImode, ior_optab, xor1, xor0,
8544 NULL_RTX, 0, OPTAB_WIDEN);
8545
8546 ix86_compare_op0 = tmp;
8547 ix86_compare_op1 = const0_rtx;
8548 ix86_expand_branch (code, label);
8549 return;
8550 }
8551
8552 /* Otherwise, if we are doing less-than or greater-or-equal-than,
8553 op1 is a constant and the low word is zero, then we can just
8554 examine the high word. */
8555
8556 if (GET_CODE (hi[1]) == CONST_INT && lo[1] == const0_rtx)
8557 switch (code)
8558 {
8559 case LT: case LTU: case GE: case GEU:
8560 ix86_compare_op0 = hi[0];
8561 ix86_compare_op1 = hi[1];
8562 ix86_expand_branch (code, label);
8563 return;
8564 default:
8565 break;
8566 }
8567
8568 /* Otherwise, we need two or three jumps. */
8569
8570 label2 = gen_label_rtx ();
8571
8572 code1 = code;
8573 code2 = swap_condition (code);
8574 code3 = unsigned_condition (code);
8575
8576 switch (code)
8577 {
8578 case LT: case GT: case LTU: case GTU:
8579 break;
8580
8581 case LE: code1 = LT; code2 = GT; break;
8582 case GE: code1 = GT; code2 = LT; break;
8583 case LEU: code1 = LTU; code2 = GTU; break;
8584 case GEU: code1 = GTU; code2 = LTU; break;
8585
8586 case EQ: code1 = UNKNOWN; code2 = NE; break;
8587 case NE: code2 = UNKNOWN; break;
8588
8589 default:
8590 abort ();
8591 }
8592
8593 /*
8594 * a < b =>
8595 * if (hi(a) < hi(b)) goto true;
8596 * if (hi(a) > hi(b)) goto false;
8597 * if (lo(a) < lo(b)) goto true;
8598 * false:
8599 */
8600
8601 ix86_compare_op0 = hi[0];
8602 ix86_compare_op1 = hi[1];
8603
8604 if (code1 != UNKNOWN)
8605 ix86_expand_branch (code1, label);
8606 if (code2 != UNKNOWN)
8607 ix86_expand_branch (code2, label2);
8608
8609 ix86_compare_op0 = lo[0];
8610 ix86_compare_op1 = lo[1];
8611 ix86_expand_branch (code3, label);
8612
8613 if (code2 != UNKNOWN)
8614 emit_label (label2);
8615 return;
8616 }
8617
8618 default:
8619 abort ();
8620 }
8621 }
8622
8623 /* Split branch based on floating point condition. */
8624 void
8625 ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
8626 rtx target1, rtx target2, rtx tmp)
8627 {
8628 rtx second, bypass;
8629 rtx label = NULL_RTX;
8630 rtx condition;
8631 int bypass_probability = -1, second_probability = -1, probability = -1;
8632 rtx i;
8633
8634 if (target2 != pc_rtx)
8635 {
8636 rtx tmp = target2;
8637 code = reverse_condition_maybe_unordered (code);
8638 target2 = target1;
8639 target1 = tmp;
8640 }
8641
8642 condition = ix86_expand_fp_compare (code, op1, op2,
8643 tmp, &second, &bypass);
8644
8645 if (split_branch_probability >= 0)
8646 {
8647 /* Distribute the probabilities across the jumps.
8648 Assume the BYPASS and SECOND to be always test
8649 for UNORDERED. */
8650 probability = split_branch_probability;
8651
8652 /* Value of 1 is low enough to make no need for probability
8653 to be updated. Later we may run some experiments and see
8654 if unordered values are more frequent in practice. */
8655 if (bypass)
8656 bypass_probability = 1;
8657 if (second)
8658 second_probability = 1;
8659 }
8660 if (bypass != NULL_RTX)
8661 {
8662 label = gen_label_rtx ();
8663 i = emit_jump_insn (gen_rtx_SET
8664 (VOIDmode, pc_rtx,
8665 gen_rtx_IF_THEN_ELSE (VOIDmode,
8666 bypass,
8667 gen_rtx_LABEL_REF (VOIDmode,
8668 label),
8669 pc_rtx)));
8670 if (bypass_probability >= 0)
8671 REG_NOTES (i)
8672 = gen_rtx_EXPR_LIST (REG_BR_PROB,
8673 GEN_INT (bypass_probability),
8674 REG_NOTES (i));
8675 }
8676 i = emit_jump_insn (gen_rtx_SET
8677 (VOIDmode, pc_rtx,
8678 gen_rtx_IF_THEN_ELSE (VOIDmode,
8679 condition, target1, target2)));
8680 if (probability >= 0)
8681 REG_NOTES (i)
8682 = gen_rtx_EXPR_LIST (REG_BR_PROB,
8683 GEN_INT (probability),
8684 REG_NOTES (i));
8685 if (second != NULL_RTX)
8686 {
8687 i = emit_jump_insn (gen_rtx_SET
8688 (VOIDmode, pc_rtx,
8689 gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1,
8690 target2)));
8691 if (second_probability >= 0)
8692 REG_NOTES (i)
8693 = gen_rtx_EXPR_LIST (REG_BR_PROB,
8694 GEN_INT (second_probability),
8695 REG_NOTES (i));
8696 }
8697 if (label != NULL_RTX)
8698 emit_label (label);
8699 }
8700
8701 int
8702 ix86_expand_setcc (enum rtx_code code, rtx dest)
8703 {
8704 rtx ret, tmp, tmpreg, equiv;
8705 rtx second_test, bypass_test;
8706
8707 if (GET_MODE (ix86_compare_op0) == DImode
8708 && !TARGET_64BIT)
8709 return 0; /* FAIL */
8710
8711 if (GET_MODE (dest) != QImode)
8712 abort ();
8713
8714 ret = ix86_expand_compare (code, &second_test, &bypass_test);
8715 PUT_MODE (ret, QImode);
8716
8717 tmp = dest;
8718 tmpreg = dest;
8719
8720 emit_insn (gen_rtx_SET (VOIDmode, tmp, ret));
8721 if (bypass_test || second_test)
8722 {
8723 rtx test = second_test;
8724 int bypass = 0;
8725 rtx tmp2 = gen_reg_rtx (QImode);
8726 if (bypass_test)
8727 {
8728 if (second_test)
8729 abort ();
8730 test = bypass_test;
8731 bypass = 1;
8732 PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test)));
8733 }
8734 PUT_MODE (test, QImode);
8735 emit_insn (gen_rtx_SET (VOIDmode, tmp2, test));
8736
8737 if (bypass)
8738 emit_insn (gen_andqi3 (tmp, tmpreg, tmp2));
8739 else
8740 emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2));
8741 }
8742
8743 /* Attach a REG_EQUAL note describing the comparison result. */
8744 equiv = simplify_gen_relational (code, QImode,
8745 GET_MODE (ix86_compare_op0),
8746 ix86_compare_op0, ix86_compare_op1);
8747 set_unique_reg_note (get_last_insn (), REG_EQUAL, equiv);
8748
8749 return 1; /* DONE */
8750 }
8751
8752 /* Expand comparison setting or clearing carry flag. Return true when
8753 successful and set pop for the operation. */
8754 static bool
8755 ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
8756 {
8757 enum machine_mode mode =
8758 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
8759
8760 /* Do not handle DImode compares that go trought special path. Also we can't
8761 deal with FP compares yet. This is possible to add. */
8762 if ((mode == DImode && !TARGET_64BIT))
8763 return false;
8764 if (FLOAT_MODE_P (mode))
8765 {
8766 rtx second_test = NULL, bypass_test = NULL;
8767 rtx compare_op, compare_seq;
8768
8769 /* Shortcut: following common codes never translate into carry flag compares. */
8770 if (code == EQ || code == NE || code == UNEQ || code == LTGT
8771 || code == ORDERED || code == UNORDERED)
8772 return false;
8773
8774 /* These comparisons require zero flag; swap operands so they won't. */
8775 if ((code == GT || code == UNLE || code == LE || code == UNGT)
8776 && !TARGET_IEEE_FP)
8777 {
8778 rtx tmp = op0;
8779 op0 = op1;
8780 op1 = tmp;
8781 code = swap_condition (code);
8782 }
8783
8784 /* Try to expand the comparison and verify that we end up with carry flag
8785 based comparison. This is fails to be true only when we decide to expand
8786 comparison using arithmetic that is not too common scenario. */
8787 start_sequence ();
8788 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
8789 &second_test, &bypass_test);
8790 compare_seq = get_insns ();
8791 end_sequence ();
8792
8793 if (second_test || bypass_test)
8794 return false;
8795 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
8796 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
8797 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
8798 else
8799 code = GET_CODE (compare_op);
8800 if (code != LTU && code != GEU)
8801 return false;
8802 emit_insn (compare_seq);
8803 *pop = compare_op;
8804 return true;
8805 }
8806 if (!INTEGRAL_MODE_P (mode))
8807 return false;
8808 switch (code)
8809 {
8810 case LTU:
8811 case GEU:
8812 break;
8813
8814 /* Convert a==0 into (unsigned)a<1. */
8815 case EQ:
8816 case NE:
8817 if (op1 != const0_rtx)
8818 return false;
8819 op1 = const1_rtx;
8820 code = (code == EQ ? LTU : GEU);
8821 break;
8822
8823 /* Convert a>b into b<a or a>=b-1. */
8824 case GTU:
8825 case LEU:
8826 if (GET_CODE (op1) == CONST_INT)
8827 {
8828 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
8829 /* Bail out on overflow. We still can swap operands but that
8830 would force loading of the constant into register. */
8831 if (op1 == const0_rtx
8832 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
8833 return false;
8834 code = (code == GTU ? GEU : LTU);
8835 }
8836 else
8837 {
8838 rtx tmp = op1;
8839 op1 = op0;
8840 op0 = tmp;
8841 code = (code == GTU ? LTU : GEU);
8842 }
8843 break;
8844
8845 /* Convert a>=0 into (unsigned)a<0x80000000. */
8846 case LT:
8847 case GE:
8848 if (mode == DImode || op1 != const0_rtx)
8849 return false;
8850 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
8851 code = (code == LT ? GEU : LTU);
8852 break;
8853 case LE:
8854 case GT:
8855 if (mode == DImode || op1 != constm1_rtx)
8856 return false;
8857 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
8858 code = (code == LE ? GEU : LTU);
8859 break;
8860
8861 default:
8862 return false;
8863 }
8864 /* Swapping operands may cause constant to appear as first operand. */
8865 if (!nonimmediate_operand (op0, VOIDmode))
8866 {
8867 if (no_new_pseudos)
8868 return false;
8869 op0 = force_reg (mode, op0);
8870 }
8871 ix86_compare_op0 = op0;
8872 ix86_compare_op1 = op1;
8873 *pop = ix86_expand_compare (code, NULL, NULL);
8874 if (GET_CODE (*pop) != LTU && GET_CODE (*pop) != GEU)
8875 abort ();
8876 return true;
8877 }
8878
8879 int
8880 ix86_expand_int_movcc (rtx operands[])
8881 {
8882 enum rtx_code code = GET_CODE (operands[1]), compare_code;
8883 rtx compare_seq, compare_op;
8884 rtx second_test, bypass_test;
8885 enum machine_mode mode = GET_MODE (operands[0]);
8886 bool sign_bit_compare_p = false;;
8887
8888 start_sequence ();
8889 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
8890 compare_seq = get_insns ();
8891 end_sequence ();
8892
8893 compare_code = GET_CODE (compare_op);
8894
8895 if ((ix86_compare_op1 == const0_rtx && (code == GE || code == LT))
8896 || (ix86_compare_op1 == constm1_rtx && (code == GT || code == LE)))
8897 sign_bit_compare_p = true;
8898
8899 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
8900 HImode insns, we'd be swallowed in word prefix ops. */
8901
8902 if ((mode != HImode || TARGET_FAST_PREFIX)
8903 && (mode != DImode || TARGET_64BIT)
8904 && GET_CODE (operands[2]) == CONST_INT
8905 && GET_CODE (operands[3]) == CONST_INT)
8906 {
8907 rtx out = operands[0];
8908 HOST_WIDE_INT ct = INTVAL (operands[2]);
8909 HOST_WIDE_INT cf = INTVAL (operands[3]);
8910 HOST_WIDE_INT diff;
8911
8912 diff = ct - cf;
8913 /* Sign bit compares are better done using shifts than we do by using
8914 sbb. */
8915 if (sign_bit_compare_p
8916 || ix86_expand_carry_flag_compare (code, ix86_compare_op0,
8917 ix86_compare_op1, &compare_op))
8918 {
8919 /* Detect overlap between destination and compare sources. */
8920 rtx tmp = out;
8921
8922 if (!sign_bit_compare_p)
8923 {
8924 bool fpcmp = false;
8925
8926 compare_code = GET_CODE (compare_op);
8927
8928 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
8929 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
8930 {
8931 fpcmp = true;
8932 compare_code = ix86_fp_compare_code_to_integer (compare_code);
8933 }
8934
8935 /* To simplify rest of code, restrict to the GEU case. */
8936 if (compare_code == LTU)
8937 {
8938 HOST_WIDE_INT tmp = ct;
8939 ct = cf;
8940 cf = tmp;
8941 compare_code = reverse_condition (compare_code);
8942 code = reverse_condition (code);
8943 }
8944 else
8945 {
8946 if (fpcmp)
8947 PUT_CODE (compare_op,
8948 reverse_condition_maybe_unordered
8949 (GET_CODE (compare_op)));
8950 else
8951 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
8952 }
8953 diff = ct - cf;
8954
8955 if (reg_overlap_mentioned_p (out, ix86_compare_op0)
8956 || reg_overlap_mentioned_p (out, ix86_compare_op1))
8957 tmp = gen_reg_rtx (mode);
8958
8959 if (mode == DImode)
8960 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp, compare_op));
8961 else
8962 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp), compare_op));
8963 }
8964 else
8965 {
8966 if (code == GT || code == GE)
8967 code = reverse_condition (code);
8968 else
8969 {
8970 HOST_WIDE_INT tmp = ct;
8971 ct = cf;
8972 cf = tmp;
8973 diff = ct - cf;
8974 }
8975 tmp = emit_store_flag (tmp, code, ix86_compare_op0,
8976 ix86_compare_op1, VOIDmode, 0, -1);
8977 }
8978
8979 if (diff == 1)
8980 {
8981 /*
8982 * cmpl op0,op1
8983 * sbbl dest,dest
8984 * [addl dest, ct]
8985 *
8986 * Size 5 - 8.
8987 */
8988 if (ct)
8989 tmp = expand_simple_binop (mode, PLUS,
8990 tmp, GEN_INT (ct),
8991 copy_rtx (tmp), 1, OPTAB_DIRECT);
8992 }
8993 else if (cf == -1)
8994 {
8995 /*
8996 * cmpl op0,op1
8997 * sbbl dest,dest
8998 * orl $ct, dest
8999 *
9000 * Size 8.
9001 */
9002 tmp = expand_simple_binop (mode, IOR,
9003 tmp, GEN_INT (ct),
9004 copy_rtx (tmp), 1, OPTAB_DIRECT);
9005 }
9006 else if (diff == -1 && ct)
9007 {
9008 /*
9009 * cmpl op0,op1
9010 * sbbl dest,dest
9011 * notl dest
9012 * [addl dest, cf]
9013 *
9014 * Size 8 - 11.
9015 */
9016 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
9017 if (cf)
9018 tmp = expand_simple_binop (mode, PLUS,
9019 copy_rtx (tmp), GEN_INT (cf),
9020 copy_rtx (tmp), 1, OPTAB_DIRECT);
9021 }
9022 else
9023 {
9024 /*
9025 * cmpl op0,op1
9026 * sbbl dest,dest
9027 * [notl dest]
9028 * andl cf - ct, dest
9029 * [addl dest, ct]
9030 *
9031 * Size 8 - 11.
9032 */
9033
9034 if (cf == 0)
9035 {
9036 cf = ct;
9037 ct = 0;
9038 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
9039 }
9040
9041 tmp = expand_simple_binop (mode, AND,
9042 copy_rtx (tmp),
9043 gen_int_mode (cf - ct, mode),
9044 copy_rtx (tmp), 1, OPTAB_DIRECT);
9045 if (ct)
9046 tmp = expand_simple_binop (mode, PLUS,
9047 copy_rtx (tmp), GEN_INT (ct),
9048 copy_rtx (tmp), 1, OPTAB_DIRECT);
9049 }
9050
9051 if (!rtx_equal_p (tmp, out))
9052 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
9053
9054 return 1; /* DONE */
9055 }
9056
9057 if (diff < 0)
9058 {
9059 HOST_WIDE_INT tmp;
9060 tmp = ct, ct = cf, cf = tmp;
9061 diff = -diff;
9062 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
9063 {
9064 /* We may be reversing unordered compare to normal compare, that
9065 is not valid in general (we may convert non-trapping condition
9066 to trapping one), however on i386 we currently emit all
9067 comparisons unordered. */
9068 compare_code = reverse_condition_maybe_unordered (compare_code);
9069 code = reverse_condition_maybe_unordered (code);
9070 }
9071 else
9072 {
9073 compare_code = reverse_condition (compare_code);
9074 code = reverse_condition (code);
9075 }
9076 }
9077
9078 compare_code = UNKNOWN;
9079 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_INT
9080 && GET_CODE (ix86_compare_op1) == CONST_INT)
9081 {
9082 if (ix86_compare_op1 == const0_rtx
9083 && (code == LT || code == GE))
9084 compare_code = code;
9085 else if (ix86_compare_op1 == constm1_rtx)
9086 {
9087 if (code == LE)
9088 compare_code = LT;
9089 else if (code == GT)
9090 compare_code = GE;
9091 }
9092 }
9093
9094 /* Optimize dest = (op0 < 0) ? -1 : cf. */
9095 if (compare_code != UNKNOWN
9096 && GET_MODE (ix86_compare_op0) == GET_MODE (out)
9097 && (cf == -1 || ct == -1))
9098 {
9099 /* If lea code below could be used, only optimize
9100 if it results in a 2 insn sequence. */
9101
9102 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
9103 || diff == 3 || diff == 5 || diff == 9)
9104 || (compare_code == LT && ct == -1)
9105 || (compare_code == GE && cf == -1))
9106 {
9107 /*
9108 * notl op1 (if necessary)
9109 * sarl $31, op1
9110 * orl cf, op1
9111 */
9112 if (ct != -1)
9113 {
9114 cf = ct;
9115 ct = -1;
9116 code = reverse_condition (code);
9117 }
9118
9119 out = emit_store_flag (out, code, ix86_compare_op0,
9120 ix86_compare_op1, VOIDmode, 0, -1);
9121
9122 out = expand_simple_binop (mode, IOR,
9123 out, GEN_INT (cf),
9124 out, 1, OPTAB_DIRECT);
9125 if (out != operands[0])
9126 emit_move_insn (operands[0], out);
9127
9128 return 1; /* DONE */
9129 }
9130 }
9131
9132
9133 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
9134 || diff == 3 || diff == 5 || diff == 9)
9135 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
9136 && (mode != DImode
9137 || x86_64_immediate_operand (GEN_INT (cf), VOIDmode)))
9138 {
9139 /*
9140 * xorl dest,dest
9141 * cmpl op1,op2
9142 * setcc dest
9143 * lea cf(dest*(ct-cf)),dest
9144 *
9145 * Size 14.
9146 *
9147 * This also catches the degenerate setcc-only case.
9148 */
9149
9150 rtx tmp;
9151 int nops;
9152
9153 out = emit_store_flag (out, code, ix86_compare_op0,
9154 ix86_compare_op1, VOIDmode, 0, 1);
9155
9156 nops = 0;
9157 /* On x86_64 the lea instruction operates on Pmode, so we need
9158 to get arithmetics done in proper mode to match. */
9159 if (diff == 1)
9160 tmp = copy_rtx (out);
9161 else
9162 {
9163 rtx out1;
9164 out1 = copy_rtx (out);
9165 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
9166 nops++;
9167 if (diff & 1)
9168 {
9169 tmp = gen_rtx_PLUS (mode, tmp, out1);
9170 nops++;
9171 }
9172 }
9173 if (cf != 0)
9174 {
9175 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
9176 nops++;
9177 }
9178 if (!rtx_equal_p (tmp, out))
9179 {
9180 if (nops == 1)
9181 out = force_operand (tmp, copy_rtx (out));
9182 else
9183 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
9184 }
9185 if (!rtx_equal_p (out, operands[0]))
9186 emit_move_insn (operands[0], copy_rtx (out));
9187
9188 return 1; /* DONE */
9189 }
9190
9191 /*
9192 * General case: Jumpful:
9193 * xorl dest,dest cmpl op1, op2
9194 * cmpl op1, op2 movl ct, dest
9195 * setcc dest jcc 1f
9196 * decl dest movl cf, dest
9197 * andl (cf-ct),dest 1:
9198 * addl ct,dest
9199 *
9200 * Size 20. Size 14.
9201 *
9202 * This is reasonably steep, but branch mispredict costs are
9203 * high on modern cpus, so consider failing only if optimizing
9204 * for space.
9205 */
9206
9207 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
9208 && BRANCH_COST >= 2)
9209 {
9210 if (cf == 0)
9211 {
9212 cf = ct;
9213 ct = 0;
9214 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
9215 /* We may be reversing unordered compare to normal compare,
9216 that is not valid in general (we may convert non-trapping
9217 condition to trapping one), however on i386 we currently
9218 emit all comparisons unordered. */
9219 code = reverse_condition_maybe_unordered (code);
9220 else
9221 {
9222 code = reverse_condition (code);
9223 if (compare_code != UNKNOWN)
9224 compare_code = reverse_condition (compare_code);
9225 }
9226 }
9227
9228 if (compare_code != UNKNOWN)
9229 {
9230 /* notl op1 (if needed)
9231 sarl $31, op1
9232 andl (cf-ct), op1
9233 addl ct, op1
9234
9235 For x < 0 (resp. x <= -1) there will be no notl,
9236 so if possible swap the constants to get rid of the
9237 complement.
9238 True/false will be -1/0 while code below (store flag
9239 followed by decrement) is 0/-1, so the constants need
9240 to be exchanged once more. */
9241
9242 if (compare_code == GE || !cf)
9243 {
9244 code = reverse_condition (code);
9245 compare_code = LT;
9246 }
9247 else
9248 {
9249 HOST_WIDE_INT tmp = cf;
9250 cf = ct;
9251 ct = tmp;
9252 }
9253
9254 out = emit_store_flag (out, code, ix86_compare_op0,
9255 ix86_compare_op1, VOIDmode, 0, -1);
9256 }
9257 else
9258 {
9259 out = emit_store_flag (out, code, ix86_compare_op0,
9260 ix86_compare_op1, VOIDmode, 0, 1);
9261
9262 out = expand_simple_binop (mode, PLUS, copy_rtx (out), constm1_rtx,
9263 copy_rtx (out), 1, OPTAB_DIRECT);
9264 }
9265
9266 out = expand_simple_binop (mode, AND, copy_rtx (out),
9267 gen_int_mode (cf - ct, mode),
9268 copy_rtx (out), 1, OPTAB_DIRECT);
9269 if (ct)
9270 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
9271 copy_rtx (out), 1, OPTAB_DIRECT);
9272 if (!rtx_equal_p (out, operands[0]))
9273 emit_move_insn (operands[0], copy_rtx (out));
9274
9275 return 1; /* DONE */
9276 }
9277 }
9278
9279 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
9280 {
9281 /* Try a few things more with specific constants and a variable. */
9282
9283 optab op;
9284 rtx var, orig_out, out, tmp;
9285
9286 if (BRANCH_COST <= 2)
9287 return 0; /* FAIL */
9288
9289 /* If one of the two operands is an interesting constant, load a
9290 constant with the above and mask it in with a logical operation. */
9291
9292 if (GET_CODE (operands[2]) == CONST_INT)
9293 {
9294 var = operands[3];
9295 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
9296 operands[3] = constm1_rtx, op = and_optab;
9297 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
9298 operands[3] = const0_rtx, op = ior_optab;
9299 else
9300 return 0; /* FAIL */
9301 }
9302 else if (GET_CODE (operands[3]) == CONST_INT)
9303 {
9304 var = operands[2];
9305 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
9306 operands[2] = constm1_rtx, op = and_optab;
9307 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
9308 operands[2] = const0_rtx, op = ior_optab;
9309 else
9310 return 0; /* FAIL */
9311 }
9312 else
9313 return 0; /* FAIL */
9314
9315 orig_out = operands[0];
9316 tmp = gen_reg_rtx (mode);
9317 operands[0] = tmp;
9318
9319 /* Recurse to get the constant loaded. */
9320 if (ix86_expand_int_movcc (operands) == 0)
9321 return 0; /* FAIL */
9322
9323 /* Mask in the interesting variable. */
9324 out = expand_binop (mode, op, var, tmp, orig_out, 0,
9325 OPTAB_WIDEN);
9326 if (!rtx_equal_p (out, orig_out))
9327 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
9328
9329 return 1; /* DONE */
9330 }
9331
9332 /*
9333 * For comparison with above,
9334 *
9335 * movl cf,dest
9336 * movl ct,tmp
9337 * cmpl op1,op2
9338 * cmovcc tmp,dest
9339 *
9340 * Size 15.
9341 */
9342
9343 if (! nonimmediate_operand (operands[2], mode))
9344 operands[2] = force_reg (mode, operands[2]);
9345 if (! nonimmediate_operand (operands[3], mode))
9346 operands[3] = force_reg (mode, operands[3]);
9347
9348 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
9349 {
9350 rtx tmp = gen_reg_rtx (mode);
9351 emit_move_insn (tmp, operands[3]);
9352 operands[3] = tmp;
9353 }
9354 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
9355 {
9356 rtx tmp = gen_reg_rtx (mode);
9357 emit_move_insn (tmp, operands[2]);
9358 operands[2] = tmp;
9359 }
9360
9361 if (! register_operand (operands[2], VOIDmode)
9362 && (mode == QImode
9363 || ! register_operand (operands[3], VOIDmode)))
9364 operands[2] = force_reg (mode, operands[2]);
9365
9366 if (mode == QImode
9367 && ! register_operand (operands[3], VOIDmode))
9368 operands[3] = force_reg (mode, operands[3]);
9369
9370 emit_insn (compare_seq);
9371 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
9372 gen_rtx_IF_THEN_ELSE (mode,
9373 compare_op, operands[2],
9374 operands[3])));
9375 if (bypass_test)
9376 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
9377 gen_rtx_IF_THEN_ELSE (mode,
9378 bypass_test,
9379 copy_rtx (operands[3]),
9380 copy_rtx (operands[0]))));
9381 if (second_test)
9382 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
9383 gen_rtx_IF_THEN_ELSE (mode,
9384 second_test,
9385 copy_rtx (operands[2]),
9386 copy_rtx (operands[0]))));
9387
9388 return 1; /* DONE */
9389 }
9390
9391 int
9392 ix86_expand_fp_movcc (rtx operands[])
9393 {
9394 enum rtx_code code;
9395 rtx tmp;
9396 rtx compare_op, second_test, bypass_test;
9397
9398 /* For SF/DFmode conditional moves based on comparisons
9399 in same mode, we may want to use SSE min/max instructions. */
9400 if (((TARGET_SSE_MATH && GET_MODE (operands[0]) == SFmode)
9401 || (TARGET_SSE2 && TARGET_SSE_MATH && GET_MODE (operands[0]) == DFmode))
9402 && GET_MODE (ix86_compare_op0) == GET_MODE (operands[0])
9403 /* The SSE comparisons does not support the LTGT/UNEQ pair. */
9404 && (!TARGET_IEEE_FP
9405 || (GET_CODE (operands[1]) != LTGT && GET_CODE (operands[1]) != UNEQ))
9406 /* We may be called from the post-reload splitter. */
9407 && (!REG_P (operands[0])
9408 || SSE_REG_P (operands[0])
9409 || REGNO (operands[0]) >= FIRST_PSEUDO_REGISTER))
9410 {
9411 rtx op0 = ix86_compare_op0, op1 = ix86_compare_op1;
9412 code = GET_CODE (operands[1]);
9413
9414 /* See if we have (cross) match between comparison operands and
9415 conditional move operands. */
9416 if (rtx_equal_p (operands[2], op1))
9417 {
9418 rtx tmp = op0;
9419 op0 = op1;
9420 op1 = tmp;
9421 code = reverse_condition_maybe_unordered (code);
9422 }
9423 if (rtx_equal_p (operands[2], op0) && rtx_equal_p (operands[3], op1))
9424 {
9425 /* Check for min operation. */
9426 if (code == LT || code == UNLE)
9427 {
9428 if (code == UNLE)
9429 {
9430 rtx tmp = op0;
9431 op0 = op1;
9432 op1 = tmp;
9433 }
9434 operands[0] = force_reg (GET_MODE (operands[0]), operands[0]);
9435 if (memory_operand (op0, VOIDmode))
9436 op0 = force_reg (GET_MODE (operands[0]), op0);
9437 if (GET_MODE (operands[0]) == SFmode)
9438 emit_insn (gen_minsf3 (operands[0], op0, op1));
9439 else
9440 emit_insn (gen_mindf3 (operands[0], op0, op1));
9441 return 1;
9442 }
9443 /* Check for max operation. */
9444 if (code == GT || code == UNGE)
9445 {
9446 if (code == UNGE)
9447 {
9448 rtx tmp = op0;
9449 op0 = op1;
9450 op1 = tmp;
9451 }
9452 operands[0] = force_reg (GET_MODE (operands[0]), operands[0]);
9453 if (memory_operand (op0, VOIDmode))
9454 op0 = force_reg (GET_MODE (operands[0]), op0);
9455 if (GET_MODE (operands[0]) == SFmode)
9456 emit_insn (gen_maxsf3 (operands[0], op0, op1));
9457 else
9458 emit_insn (gen_maxdf3 (operands[0], op0, op1));
9459 return 1;
9460 }
9461 }
9462 /* Manage condition to be sse_comparison_operator. In case we are
9463 in non-ieee mode, try to canonicalize the destination operand
9464 to be first in the comparison - this helps reload to avoid extra
9465 moves. */
9466 if (!sse_comparison_operator (operands[1], VOIDmode)
9467 || (rtx_equal_p (operands[0], ix86_compare_op1) && !TARGET_IEEE_FP))
9468 {
9469 rtx tmp = ix86_compare_op0;
9470 ix86_compare_op0 = ix86_compare_op1;
9471 ix86_compare_op1 = tmp;
9472 operands[1] = gen_rtx_fmt_ee (swap_condition (GET_CODE (operands[1])),
9473 VOIDmode, ix86_compare_op0,
9474 ix86_compare_op1);
9475 }
9476 /* Similarly try to manage result to be first operand of conditional
9477 move. We also don't support the NE comparison on SSE, so try to
9478 avoid it. */
9479 if ((rtx_equal_p (operands[0], operands[3])
9480 && (!TARGET_IEEE_FP || GET_CODE (operands[1]) != EQ))
9481 || (GET_CODE (operands[1]) == NE && TARGET_IEEE_FP))
9482 {
9483 rtx tmp = operands[2];
9484 operands[2] = operands[3];
9485 operands[3] = tmp;
9486 operands[1] = gen_rtx_fmt_ee (reverse_condition_maybe_unordered
9487 (GET_CODE (operands[1])),
9488 VOIDmode, ix86_compare_op0,
9489 ix86_compare_op1);
9490 }
9491 if (GET_MODE (operands[0]) == SFmode)
9492 emit_insn (gen_sse_movsfcc (operands[0], operands[1],
9493 operands[2], operands[3],
9494 ix86_compare_op0, ix86_compare_op1));
9495 else
9496 emit_insn (gen_sse_movdfcc (operands[0], operands[1],
9497 operands[2], operands[3],
9498 ix86_compare_op0, ix86_compare_op1));
9499 return 1;
9500 }
9501
9502 /* The floating point conditional move instructions don't directly
9503 support conditions resulting from a signed integer comparison. */
9504
9505 code = GET_CODE (operands[1]);
9506 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
9507
9508 /* The floating point conditional move instructions don't directly
9509 support signed integer comparisons. */
9510
9511 if (!fcmov_comparison_operator (compare_op, VOIDmode))
9512 {
9513 if (second_test != NULL || bypass_test != NULL)
9514 abort ();
9515 tmp = gen_reg_rtx (QImode);
9516 ix86_expand_setcc (code, tmp);
9517 code = NE;
9518 ix86_compare_op0 = tmp;
9519 ix86_compare_op1 = const0_rtx;
9520 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
9521 }
9522 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
9523 {
9524 tmp = gen_reg_rtx (GET_MODE (operands[0]));
9525 emit_move_insn (tmp, operands[3]);
9526 operands[3] = tmp;
9527 }
9528 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
9529 {
9530 tmp = gen_reg_rtx (GET_MODE (operands[0]));
9531 emit_move_insn (tmp, operands[2]);
9532 operands[2] = tmp;
9533 }
9534
9535 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
9536 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
9537 compare_op,
9538 operands[2],
9539 operands[3])));
9540 if (bypass_test)
9541 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
9542 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
9543 bypass_test,
9544 operands[3],
9545 operands[0])));
9546 if (second_test)
9547 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
9548 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
9549 second_test,
9550 operands[2],
9551 operands[0])));
9552
9553 return 1;
9554 }
9555
9556 /* Expand conditional increment or decrement using adb/sbb instructions.
9557 The default case using setcc followed by the conditional move can be
9558 done by generic code. */
9559 int
9560 ix86_expand_int_addcc (rtx operands[])
9561 {
9562 enum rtx_code code = GET_CODE (operands[1]);
9563 rtx compare_op;
9564 rtx val = const0_rtx;
9565 bool fpcmp = false;
9566 enum machine_mode mode = GET_MODE (operands[0]);
9567
9568 if (operands[3] != const1_rtx
9569 && operands[3] != constm1_rtx)
9570 return 0;
9571 if (!ix86_expand_carry_flag_compare (code, ix86_compare_op0,
9572 ix86_compare_op1, &compare_op))
9573 return 0;
9574 code = GET_CODE (compare_op);
9575
9576 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
9577 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
9578 {
9579 fpcmp = true;
9580 code = ix86_fp_compare_code_to_integer (code);
9581 }
9582
9583 if (code != LTU)
9584 {
9585 val = constm1_rtx;
9586 if (fpcmp)
9587 PUT_CODE (compare_op,
9588 reverse_condition_maybe_unordered
9589 (GET_CODE (compare_op)));
9590 else
9591 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
9592 }
9593 PUT_MODE (compare_op, mode);
9594
9595 /* Construct either adc or sbb insn. */
9596 if ((code == LTU) == (operands[3] == constm1_rtx))
9597 {
9598 switch (GET_MODE (operands[0]))
9599 {
9600 case QImode:
9601 emit_insn (gen_subqi3_carry (operands[0], operands[2], val, compare_op));
9602 break;
9603 case HImode:
9604 emit_insn (gen_subhi3_carry (operands[0], operands[2], val, compare_op));
9605 break;
9606 case SImode:
9607 emit_insn (gen_subsi3_carry (operands[0], operands[2], val, compare_op));
9608 break;
9609 case DImode:
9610 emit_insn (gen_subdi3_carry_rex64 (operands[0], operands[2], val, compare_op));
9611 break;
9612 default:
9613 abort ();
9614 }
9615 }
9616 else
9617 {
9618 switch (GET_MODE (operands[0]))
9619 {
9620 case QImode:
9621 emit_insn (gen_addqi3_carry (operands[0], operands[2], val, compare_op));
9622 break;
9623 case HImode:
9624 emit_insn (gen_addhi3_carry (operands[0], operands[2], val, compare_op));
9625 break;
9626 case SImode:
9627 emit_insn (gen_addsi3_carry (operands[0], operands[2], val, compare_op));
9628 break;
9629 case DImode:
9630 emit_insn (gen_adddi3_carry_rex64 (operands[0], operands[2], val, compare_op));
9631 break;
9632 default:
9633 abort ();
9634 }
9635 }
9636 return 1; /* DONE */
9637 }
9638
9639
9640 /* Split operands 0 and 1 into SImode parts. Similar to split_di, but
9641 works for floating pointer parameters and nonoffsetable memories.
9642 For pushes, it returns just stack offsets; the values will be saved
9643 in the right order. Maximally three parts are generated. */
9644
9645 static int
9646 ix86_split_to_parts (rtx operand, rtx *parts, enum machine_mode mode)
9647 {
9648 int size;
9649
9650 if (!TARGET_64BIT)
9651 size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
9652 else
9653 size = (GET_MODE_SIZE (mode) + 4) / 8;
9654
9655 if (GET_CODE (operand) == REG && MMX_REGNO_P (REGNO (operand)))
9656 abort ();
9657 if (size < 2 || size > 3)
9658 abort ();
9659
9660 /* Optimize constant pool reference to immediates. This is used by fp
9661 moves, that force all constants to memory to allow combining. */
9662 if (GET_CODE (operand) == MEM && MEM_READONLY_P (operand))
9663 {
9664 rtx tmp = maybe_get_pool_constant (operand);
9665 if (tmp)
9666 operand = tmp;
9667 }
9668
9669 if (GET_CODE (operand) == MEM && !offsettable_memref_p (operand))
9670 {
9671 /* The only non-offsetable memories we handle are pushes. */
9672 if (! push_operand (operand, VOIDmode))
9673 abort ();
9674
9675 operand = copy_rtx (operand);
9676 PUT_MODE (operand, Pmode);
9677 parts[0] = parts[1] = parts[2] = operand;
9678 }
9679 else if (!TARGET_64BIT)
9680 {
9681 if (mode == DImode)
9682 split_di (&operand, 1, &parts[0], &parts[1]);
9683 else
9684 {
9685 if (REG_P (operand))
9686 {
9687 if (!reload_completed)
9688 abort ();
9689 parts[0] = gen_rtx_REG (SImode, REGNO (operand) + 0);
9690 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
9691 if (size == 3)
9692 parts[2] = gen_rtx_REG (SImode, REGNO (operand) + 2);
9693 }
9694 else if (offsettable_memref_p (operand))
9695 {
9696 operand = adjust_address (operand, SImode, 0);
9697 parts[0] = operand;
9698 parts[1] = adjust_address (operand, SImode, 4);
9699 if (size == 3)
9700 parts[2] = adjust_address (operand, SImode, 8);
9701 }
9702 else if (GET_CODE (operand) == CONST_DOUBLE)
9703 {
9704 REAL_VALUE_TYPE r;
9705 long l[4];
9706
9707 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
9708 switch (mode)
9709 {
9710 case XFmode:
9711 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
9712 parts[2] = gen_int_mode (l[2], SImode);
9713 break;
9714 case DFmode:
9715 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
9716 break;
9717 default:
9718 abort ();
9719 }
9720 parts[1] = gen_int_mode (l[1], SImode);
9721 parts[0] = gen_int_mode (l[0], SImode);
9722 }
9723 else
9724 abort ();
9725 }
9726 }
9727 else
9728 {
9729 if (mode == TImode)
9730 split_ti (&operand, 1, &parts[0], &parts[1]);
9731 if (mode == XFmode || mode == TFmode)
9732 {
9733 enum machine_mode upper_mode = mode==XFmode ? SImode : DImode;
9734 if (REG_P (operand))
9735 {
9736 if (!reload_completed)
9737 abort ();
9738 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
9739 parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
9740 }
9741 else if (offsettable_memref_p (operand))
9742 {
9743 operand = adjust_address (operand, DImode, 0);
9744 parts[0] = operand;
9745 parts[1] = adjust_address (operand, upper_mode, 8);
9746 }
9747 else if (GET_CODE (operand) == CONST_DOUBLE)
9748 {
9749 REAL_VALUE_TYPE r;
9750 long l[3];
9751
9752 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
9753 real_to_target (l, &r, mode);
9754 /* Do not use shift by 32 to avoid warning on 32bit systems. */
9755 if (HOST_BITS_PER_WIDE_INT >= 64)
9756 parts[0]
9757 = gen_int_mode
9758 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
9759 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
9760 DImode);
9761 else
9762 parts[0] = immed_double_const (l[0], l[1], DImode);
9763 if (upper_mode == SImode)
9764 parts[1] = gen_int_mode (l[2], SImode);
9765 else if (HOST_BITS_PER_WIDE_INT >= 64)
9766 parts[1]
9767 = gen_int_mode
9768 ((l[2] & (((HOST_WIDE_INT) 2 << 31) - 1))
9769 + ((((HOST_WIDE_INT) l[3]) << 31) << 1),
9770 DImode);
9771 else
9772 parts[1] = immed_double_const (l[2], l[3], DImode);
9773 }
9774 else
9775 abort ();
9776 }
9777 }
9778
9779 return size;
9780 }
9781
9782 /* Emit insns to perform a move or push of DI, DF, and XF values.
9783 Return false when normal moves are needed; true when all required
9784 insns have been emitted. Operands 2-4 contain the input values
9785 int the correct order; operands 5-7 contain the output values. */
9786
9787 void
9788 ix86_split_long_move (rtx operands[])
9789 {
9790 rtx part[2][3];
9791 int nparts;
9792 int push = 0;
9793 int collisions = 0;
9794 enum machine_mode mode = GET_MODE (operands[0]);
9795
9796 /* The DFmode expanders may ask us to move double.
9797 For 64bit target this is single move. By hiding the fact
9798 here we simplify i386.md splitters. */
9799 if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT)
9800 {
9801 /* Optimize constant pool reference to immediates. This is used by
9802 fp moves, that force all constants to memory to allow combining. */
9803
9804 if (GET_CODE (operands[1]) == MEM
9805 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
9806 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
9807 operands[1] = get_pool_constant (XEXP (operands[1], 0));
9808 if (push_operand (operands[0], VOIDmode))
9809 {
9810 operands[0] = copy_rtx (operands[0]);
9811 PUT_MODE (operands[0], Pmode);
9812 }
9813 else
9814 operands[0] = gen_lowpart (DImode, operands[0]);
9815 operands[1] = gen_lowpart (DImode, operands[1]);
9816 emit_move_insn (operands[0], operands[1]);
9817 return;
9818 }
9819
9820 /* The only non-offsettable memory we handle is push. */
9821 if (push_operand (operands[0], VOIDmode))
9822 push = 1;
9823 else if (GET_CODE (operands[0]) == MEM
9824 && ! offsettable_memref_p (operands[0]))
9825 abort ();
9826
9827 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
9828 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
9829
9830 /* When emitting push, take care for source operands on the stack. */
9831 if (push && GET_CODE (operands[1]) == MEM
9832 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
9833 {
9834 if (nparts == 3)
9835 part[1][1] = change_address (part[1][1], GET_MODE (part[1][1]),
9836 XEXP (part[1][2], 0));
9837 part[1][0] = change_address (part[1][0], GET_MODE (part[1][0]),
9838 XEXP (part[1][1], 0));
9839 }
9840
9841 /* We need to do copy in the right order in case an address register
9842 of the source overlaps the destination. */
9843 if (REG_P (part[0][0]) && GET_CODE (part[1][0]) == MEM)
9844 {
9845 if (reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0)))
9846 collisions++;
9847 if (reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
9848 collisions++;
9849 if (nparts == 3
9850 && reg_overlap_mentioned_p (part[0][2], XEXP (part[1][0], 0)))
9851 collisions++;
9852
9853 /* Collision in the middle part can be handled by reordering. */
9854 if (collisions == 1 && nparts == 3
9855 && reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
9856 {
9857 rtx tmp;
9858 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
9859 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
9860 }
9861
9862 /* If there are more collisions, we can't handle it by reordering.
9863 Do an lea to the last part and use only one colliding move. */
9864 else if (collisions > 1)
9865 {
9866 rtx base;
9867
9868 collisions = 1;
9869
9870 base = part[0][nparts - 1];
9871
9872 /* Handle the case when the last part isn't valid for lea.
9873 Happens in 64-bit mode storing the 12-byte XFmode. */
9874 if (GET_MODE (base) != Pmode)
9875 base = gen_rtx_REG (Pmode, REGNO (base));
9876
9877 emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
9878 part[1][0] = replace_equiv_address (part[1][0], base);
9879 part[1][1] = replace_equiv_address (part[1][1],
9880 plus_constant (base, UNITS_PER_WORD));
9881 if (nparts == 3)
9882 part[1][2] = replace_equiv_address (part[1][2],
9883 plus_constant (base, 8));
9884 }
9885 }
9886
9887 if (push)
9888 {
9889 if (!TARGET_64BIT)
9890 {
9891 if (nparts == 3)
9892 {
9893 if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
9894 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, GEN_INT (-4)));
9895 emit_move_insn (part[0][2], part[1][2]);
9896 }
9897 }
9898 else
9899 {
9900 /* In 64bit mode we don't have 32bit push available. In case this is
9901 register, it is OK - we will just use larger counterpart. We also
9902 retype memory - these comes from attempt to avoid REX prefix on
9903 moving of second half of TFmode value. */
9904 if (GET_MODE (part[1][1]) == SImode)
9905 {
9906 if (GET_CODE (part[1][1]) == MEM)
9907 part[1][1] = adjust_address (part[1][1], DImode, 0);
9908 else if (REG_P (part[1][1]))
9909 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
9910 else
9911 abort ();
9912 if (GET_MODE (part[1][0]) == SImode)
9913 part[1][0] = part[1][1];
9914 }
9915 }
9916 emit_move_insn (part[0][1], part[1][1]);
9917 emit_move_insn (part[0][0], part[1][0]);
9918 return;
9919 }
9920
9921 /* Choose correct order to not overwrite the source before it is copied. */
9922 if ((REG_P (part[0][0])
9923 && REG_P (part[1][1])
9924 && (REGNO (part[0][0]) == REGNO (part[1][1])
9925 || (nparts == 3
9926 && REGNO (part[0][0]) == REGNO (part[1][2]))))
9927 || (collisions > 0
9928 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
9929 {
9930 if (nparts == 3)
9931 {
9932 operands[2] = part[0][2];
9933 operands[3] = part[0][1];
9934 operands[4] = part[0][0];
9935 operands[5] = part[1][2];
9936 operands[6] = part[1][1];
9937 operands[7] = part[1][0];
9938 }
9939 else
9940 {
9941 operands[2] = part[0][1];
9942 operands[3] = part[0][0];
9943 operands[5] = part[1][1];
9944 operands[6] = part[1][0];
9945 }
9946 }
9947 else
9948 {
9949 if (nparts == 3)
9950 {
9951 operands[2] = part[0][0];
9952 operands[3] = part[0][1];
9953 operands[4] = part[0][2];
9954 operands[5] = part[1][0];
9955 operands[6] = part[1][1];
9956 operands[7] = part[1][2];
9957 }
9958 else
9959 {
9960 operands[2] = part[0][0];
9961 operands[3] = part[0][1];
9962 operands[5] = part[1][0];
9963 operands[6] = part[1][1];
9964 }
9965 }
9966
9967 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
9968 if (optimize_size)
9969 {
9970 if (GET_CODE (operands[5]) == CONST_INT
9971 && operands[5] != const0_rtx
9972 && REG_P (operands[2]))
9973 {
9974 if (GET_CODE (operands[6]) == CONST_INT
9975 && INTVAL (operands[6]) == INTVAL (operands[5]))
9976 operands[6] = operands[2];
9977
9978 if (nparts == 3
9979 && GET_CODE (operands[7]) == CONST_INT
9980 && INTVAL (operands[7]) == INTVAL (operands[5]))
9981 operands[7] = operands[2];
9982 }
9983
9984 if (nparts == 3
9985 && GET_CODE (operands[6]) == CONST_INT
9986 && operands[6] != const0_rtx
9987 && REG_P (operands[3])
9988 && GET_CODE (operands[7]) == CONST_INT
9989 && INTVAL (operands[7]) == INTVAL (operands[6]))
9990 operands[7] = operands[3];
9991 }
9992
9993 emit_move_insn (operands[2], operands[5]);
9994 emit_move_insn (operands[3], operands[6]);
9995 if (nparts == 3)
9996 emit_move_insn (operands[4], operands[7]);
9997
9998 return;
9999 }
10000
10001 /* Helper function of ix86_split_ashldi used to generate an SImode
10002 left shift by a constant, either using a single shift or
10003 a sequence of add instructions. */
10004
10005 static void
10006 ix86_expand_ashlsi3_const (rtx operand, int count)
10007 {
10008 if (count == 1)
10009 emit_insn (gen_addsi3 (operand, operand, operand));
10010 else if (!optimize_size
10011 && count * ix86_cost->add <= ix86_cost->shift_const)
10012 {
10013 int i;
10014 for (i=0; i<count; i++)
10015 emit_insn (gen_addsi3 (operand, operand, operand));
10016 }
10017 else
10018 emit_insn (gen_ashlsi3 (operand, operand, GEN_INT (count)));
10019 }
10020
10021 void
10022 ix86_split_ashldi (rtx *operands, rtx scratch)
10023 {
10024 rtx low[2], high[2];
10025 int count;
10026
10027 if (GET_CODE (operands[2]) == CONST_INT)
10028 {
10029 split_di (operands, 2, low, high);
10030 count = INTVAL (operands[2]) & 63;
10031
10032 if (count >= 32)
10033 {
10034 emit_move_insn (high[0], low[1]);
10035 emit_move_insn (low[0], const0_rtx);
10036
10037 if (count > 32)
10038 ix86_expand_ashlsi3_const (high[0], count - 32);
10039 }
10040 else
10041 {
10042 if (!rtx_equal_p (operands[0], operands[1]))
10043 emit_move_insn (operands[0], operands[1]);
10044 emit_insn (gen_x86_shld_1 (high[0], low[0], GEN_INT (count)));
10045 ix86_expand_ashlsi3_const (low[0], count);
10046 }
10047 return;
10048 }
10049
10050 split_di (operands, 1, low, high);
10051
10052 if (operands[1] == const1_rtx)
10053 {
10054 /* Assuming we've chosen a QImode capable registers, then 1LL << N
10055 can be done with two 32-bit shifts, no branches, no cmoves. */
10056 if (ANY_QI_REG_P (low[0]) && ANY_QI_REG_P (high[0]))
10057 {
10058 rtx s, d, flags = gen_rtx_REG (CCZmode, FLAGS_REG);
10059
10060 ix86_expand_clear (low[0]);
10061 ix86_expand_clear (high[0]);
10062 emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (32)));
10063
10064 d = gen_lowpart (QImode, low[0]);
10065 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
10066 s = gen_rtx_EQ (QImode, flags, const0_rtx);
10067 emit_insn (gen_rtx_SET (VOIDmode, d, s));
10068
10069 d = gen_lowpart (QImode, high[0]);
10070 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
10071 s = gen_rtx_NE (QImode, flags, const0_rtx);
10072 emit_insn (gen_rtx_SET (VOIDmode, d, s));
10073 }
10074
10075 /* Otherwise, we can get the same results by manually performing
10076 a bit extract operation on bit 5, and then performing the two
10077 shifts. The two methods of getting 0/1 into low/high are exactly
10078 the same size. Avoiding the shift in the bit extract case helps
10079 pentium4 a bit; no one else seems to care much either way. */
10080 else
10081 {
10082 rtx x;
10083
10084 if (TARGET_PARTIAL_REG_STALL && !optimize_size)
10085 x = gen_rtx_ZERO_EXTEND (SImode, operands[2]);
10086 else
10087 x = gen_lowpart (SImode, operands[2]);
10088 emit_insn (gen_rtx_SET (VOIDmode, high[0], x));
10089
10090 emit_insn (gen_lshrsi3 (high[0], high[0], GEN_INT (5)));
10091 emit_insn (gen_andsi3 (high[0], high[0], GEN_INT (1)));
10092 emit_move_insn (low[0], high[0]);
10093 emit_insn (gen_xorsi3 (low[0], low[0], GEN_INT (1)));
10094 }
10095
10096 emit_insn (gen_ashlsi3 (low[0], low[0], operands[2]));
10097 emit_insn (gen_ashlsi3 (high[0], high[0], operands[2]));
10098 return;
10099 }
10100
10101 if (operands[1] == constm1_rtx)
10102 {
10103 /* For -1LL << N, we can avoid the shld instruction, because we
10104 know that we're shifting 0...31 ones into a -1. */
10105 emit_move_insn (low[0], constm1_rtx);
10106 if (optimize_size)
10107 emit_move_insn (high[0], low[0]);
10108 else
10109 emit_move_insn (high[0], constm1_rtx);
10110 }
10111 else
10112 {
10113 if (!rtx_equal_p (operands[0], operands[1]))
10114 emit_move_insn (operands[0], operands[1]);
10115
10116 split_di (operands, 1, low, high);
10117 emit_insn (gen_x86_shld_1 (high[0], low[0], operands[2]));
10118 }
10119
10120 emit_insn (gen_ashlsi3 (low[0], low[0], operands[2]));
10121
10122 if (TARGET_CMOVE && scratch)
10123 {
10124 ix86_expand_clear (scratch);
10125 emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2], scratch));
10126 }
10127 else
10128 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
10129 }
10130
10131 void
10132 ix86_split_ashrdi (rtx *operands, rtx scratch)
10133 {
10134 rtx low[2], high[2];
10135 int count;
10136
10137 if (GET_CODE (operands[2]) == CONST_INT)
10138 {
10139 split_di (operands, 2, low, high);
10140 count = INTVAL (operands[2]) & 63;
10141
10142 if (count == 63)
10143 {
10144 emit_move_insn (high[0], high[1]);
10145 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (31)));
10146 emit_move_insn (low[0], high[0]);
10147
10148 }
10149 else if (count >= 32)
10150 {
10151 emit_move_insn (low[0], high[1]);
10152 emit_move_insn (high[0], low[0]);
10153 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (31)));
10154 if (count > 32)
10155 emit_insn (gen_ashrsi3 (low[0], low[0], GEN_INT (count - 32)));
10156 }
10157 else
10158 {
10159 if (!rtx_equal_p (operands[0], operands[1]))
10160 emit_move_insn (operands[0], operands[1]);
10161 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
10162 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (count)));
10163 }
10164 }
10165 else
10166 {
10167 if (!rtx_equal_p (operands[0], operands[1]))
10168 emit_move_insn (operands[0], operands[1]);
10169
10170 split_di (operands, 1, low, high);
10171
10172 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
10173 emit_insn (gen_ashrsi3 (high[0], high[0], operands[2]));
10174
10175 if (TARGET_CMOVE && scratch)
10176 {
10177 emit_move_insn (scratch, high[0]);
10178 emit_insn (gen_ashrsi3 (scratch, scratch, GEN_INT (31)));
10179 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
10180 scratch));
10181 }
10182 else
10183 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
10184 }
10185 }
10186
10187 void
10188 ix86_split_lshrdi (rtx *operands, rtx scratch)
10189 {
10190 rtx low[2], high[2];
10191 int count;
10192
10193 if (GET_CODE (operands[2]) == CONST_INT)
10194 {
10195 split_di (operands, 2, low, high);
10196 count = INTVAL (operands[2]) & 63;
10197
10198 if (count >= 32)
10199 {
10200 emit_move_insn (low[0], high[1]);
10201 ix86_expand_clear (high[0]);
10202
10203 if (count > 32)
10204 emit_insn (gen_lshrsi3 (low[0], low[0], GEN_INT (count - 32)));
10205 }
10206 else
10207 {
10208 if (!rtx_equal_p (operands[0], operands[1]))
10209 emit_move_insn (operands[0], operands[1]);
10210 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
10211 emit_insn (gen_lshrsi3 (high[0], high[0], GEN_INT (count)));
10212 }
10213 }
10214 else
10215 {
10216 if (!rtx_equal_p (operands[0], operands[1]))
10217 emit_move_insn (operands[0], operands[1]);
10218
10219 split_di (operands, 1, low, high);
10220
10221 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
10222 emit_insn (gen_lshrsi3 (high[0], high[0], operands[2]));
10223
10224 /* Heh. By reversing the arguments, we can reuse this pattern. */
10225 if (TARGET_CMOVE && scratch)
10226 {
10227 ix86_expand_clear (scratch);
10228 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
10229 scratch));
10230 }
10231 else
10232 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
10233 }
10234 }
10235
10236 /* Helper function for the string operations below. Dest VARIABLE whether
10237 it is aligned to VALUE bytes. If true, jump to the label. */
10238 static rtx
10239 ix86_expand_aligntest (rtx variable, int value)
10240 {
10241 rtx label = gen_label_rtx ();
10242 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
10243 if (GET_MODE (variable) == DImode)
10244 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
10245 else
10246 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
10247 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
10248 1, label);
10249 return label;
10250 }
10251
10252 /* Adjust COUNTER by the VALUE. */
10253 static void
10254 ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
10255 {
10256 if (GET_MODE (countreg) == DImode)
10257 emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value)));
10258 else
10259 emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value)));
10260 }
10261
10262 /* Zero extend possibly SImode EXP to Pmode register. */
10263 rtx
10264 ix86_zero_extend_to_Pmode (rtx exp)
10265 {
10266 rtx r;
10267 if (GET_MODE (exp) == VOIDmode)
10268 return force_reg (Pmode, exp);
10269 if (GET_MODE (exp) == Pmode)
10270 return copy_to_mode_reg (Pmode, exp);
10271 r = gen_reg_rtx (Pmode);
10272 emit_insn (gen_zero_extendsidi2 (r, exp));
10273 return r;
10274 }
10275
10276 /* Expand string move (memcpy) operation. Use i386 string operations when
10277 profitable. expand_clrmem contains similar code. */
10278 int
10279 ix86_expand_movmem (rtx dst, rtx src, rtx count_exp, rtx align_exp)
10280 {
10281 rtx srcreg, destreg, countreg, srcexp, destexp;
10282 enum machine_mode counter_mode;
10283 HOST_WIDE_INT align = 0;
10284 unsigned HOST_WIDE_INT count = 0;
10285
10286 if (GET_CODE (align_exp) == CONST_INT)
10287 align = INTVAL (align_exp);
10288
10289 /* Can't use any of this if the user has appropriated esi or edi. */
10290 if (global_regs[4] || global_regs[5])
10291 return 0;
10292
10293 /* This simple hack avoids all inlining code and simplifies code below. */
10294 if (!TARGET_ALIGN_STRINGOPS)
10295 align = 64;
10296
10297 if (GET_CODE (count_exp) == CONST_INT)
10298 {
10299 count = INTVAL (count_exp);
10300 if (!TARGET_INLINE_ALL_STRINGOPS && count > 64)
10301 return 0;
10302 }
10303
10304 /* Figure out proper mode for counter. For 32bits it is always SImode,
10305 for 64bits use SImode when possible, otherwise DImode.
10306 Set count to number of bytes copied when known at compile time. */
10307 if (!TARGET_64BIT
10308 || GET_MODE (count_exp) == SImode
10309 || x86_64_zext_immediate_operand (count_exp, VOIDmode))
10310 counter_mode = SImode;
10311 else
10312 counter_mode = DImode;
10313
10314 if (counter_mode != SImode && counter_mode != DImode)
10315 abort ();
10316
10317 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
10318 if (destreg != XEXP (dst, 0))
10319 dst = replace_equiv_address_nv (dst, destreg);
10320 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
10321 if (srcreg != XEXP (src, 0))
10322 src = replace_equiv_address_nv (src, srcreg);
10323
10324 /* When optimizing for size emit simple rep ; movsb instruction for
10325 counts not divisible by 4. */
10326
10327 if ((!optimize || optimize_size) && (count == 0 || (count & 0x03)))
10328 {
10329 emit_insn (gen_cld ());
10330 countreg = ix86_zero_extend_to_Pmode (count_exp);
10331 destexp = gen_rtx_PLUS (Pmode, destreg, countreg);
10332 srcexp = gen_rtx_PLUS (Pmode, srcreg, countreg);
10333 emit_insn (gen_rep_mov (destreg, dst, srcreg, src, countreg,
10334 destexp, srcexp));
10335 }
10336
10337 /* For constant aligned (or small unaligned) copies use rep movsl
10338 followed by code copying the rest. For PentiumPro ensure 8 byte
10339 alignment to allow rep movsl acceleration. */
10340
10341 else if (count != 0
10342 && (align >= 8
10343 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
10344 || optimize_size || count < (unsigned int) 64))
10345 {
10346 unsigned HOST_WIDE_INT offset = 0;
10347 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
10348 rtx srcmem, dstmem;
10349
10350 emit_insn (gen_cld ());
10351 if (count & ~(size - 1))
10352 {
10353 countreg = copy_to_mode_reg (counter_mode,
10354 GEN_INT ((count >> (size == 4 ? 2 : 3))
10355 & (TARGET_64BIT ? -1 : 0x3fffffff)));
10356 countreg = ix86_zero_extend_to_Pmode (countreg);
10357
10358 destexp = gen_rtx_ASHIFT (Pmode, countreg,
10359 GEN_INT (size == 4 ? 2 : 3));
10360 srcexp = gen_rtx_PLUS (Pmode, destexp, srcreg);
10361 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
10362
10363 emit_insn (gen_rep_mov (destreg, dst, srcreg, src,
10364 countreg, destexp, srcexp));
10365 offset = count & ~(size - 1);
10366 }
10367 if (size == 8 && (count & 0x04))
10368 {
10369 srcmem = adjust_automodify_address_nv (src, SImode, srcreg,
10370 offset);
10371 dstmem = adjust_automodify_address_nv (dst, SImode, destreg,
10372 offset);
10373 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
10374 offset += 4;
10375 }
10376 if (count & 0x02)
10377 {
10378 srcmem = adjust_automodify_address_nv (src, HImode, srcreg,
10379 offset);
10380 dstmem = adjust_automodify_address_nv (dst, HImode, destreg,
10381 offset);
10382 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
10383 offset += 2;
10384 }
10385 if (count & 0x01)
10386 {
10387 srcmem = adjust_automodify_address_nv (src, QImode, srcreg,
10388 offset);
10389 dstmem = adjust_automodify_address_nv (dst, QImode, destreg,
10390 offset);
10391 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
10392 }
10393 }
10394 /* The generic code based on the glibc implementation:
10395 - align destination to 4 bytes (8 byte alignment is used for PentiumPro
10396 allowing accelerated copying there)
10397 - copy the data using rep movsl
10398 - copy the rest. */
10399 else
10400 {
10401 rtx countreg2;
10402 rtx label = NULL;
10403 rtx srcmem, dstmem;
10404 int desired_alignment = (TARGET_PENTIUMPRO
10405 && (count == 0 || count >= (unsigned int) 260)
10406 ? 8 : UNITS_PER_WORD);
10407 /* Get rid of MEM_OFFSETs, they won't be accurate. */
10408 dst = change_address (dst, BLKmode, destreg);
10409 src = change_address (src, BLKmode, srcreg);
10410
10411 /* In case we don't know anything about the alignment, default to
10412 library version, since it is usually equally fast and result in
10413 shorter code.
10414
10415 Also emit call when we know that the count is large and call overhead
10416 will not be important. */
10417 if (!TARGET_INLINE_ALL_STRINGOPS
10418 && (align < UNITS_PER_WORD || !TARGET_REP_MOVL_OPTIMAL))
10419 return 0;
10420
10421 if (TARGET_SINGLE_STRINGOP)
10422 emit_insn (gen_cld ());
10423
10424 countreg2 = gen_reg_rtx (Pmode);
10425 countreg = copy_to_mode_reg (counter_mode, count_exp);
10426
10427 /* We don't use loops to align destination and to copy parts smaller
10428 than 4 bytes, because gcc is able to optimize such code better (in
10429 the case the destination or the count really is aligned, gcc is often
10430 able to predict the branches) and also it is friendlier to the
10431 hardware branch prediction.
10432
10433 Using loops is beneficial for generic case, because we can
10434 handle small counts using the loops. Many CPUs (such as Athlon)
10435 have large REP prefix setup costs.
10436
10437 This is quite costly. Maybe we can revisit this decision later or
10438 add some customizability to this code. */
10439
10440 if (count == 0 && align < desired_alignment)
10441 {
10442 label = gen_label_rtx ();
10443 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
10444 LEU, 0, counter_mode, 1, label);
10445 }
10446 if (align <= 1)
10447 {
10448 rtx label = ix86_expand_aligntest (destreg, 1);
10449 srcmem = change_address (src, QImode, srcreg);
10450 dstmem = change_address (dst, QImode, destreg);
10451 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
10452 ix86_adjust_counter (countreg, 1);
10453 emit_label (label);
10454 LABEL_NUSES (label) = 1;
10455 }
10456 if (align <= 2)
10457 {
10458 rtx label = ix86_expand_aligntest (destreg, 2);
10459 srcmem = change_address (src, HImode, srcreg);
10460 dstmem = change_address (dst, HImode, destreg);
10461 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
10462 ix86_adjust_counter (countreg, 2);
10463 emit_label (label);
10464 LABEL_NUSES (label) = 1;
10465 }
10466 if (align <= 4 && desired_alignment > 4)
10467 {
10468 rtx label = ix86_expand_aligntest (destreg, 4);
10469 srcmem = change_address (src, SImode, srcreg);
10470 dstmem = change_address (dst, SImode, destreg);
10471 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
10472 ix86_adjust_counter (countreg, 4);
10473 emit_label (label);
10474 LABEL_NUSES (label) = 1;
10475 }
10476
10477 if (label && desired_alignment > 4 && !TARGET_64BIT)
10478 {
10479 emit_label (label);
10480 LABEL_NUSES (label) = 1;
10481 label = NULL_RTX;
10482 }
10483 if (!TARGET_SINGLE_STRINGOP)
10484 emit_insn (gen_cld ());
10485 if (TARGET_64BIT)
10486 {
10487 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
10488 GEN_INT (3)));
10489 destexp = gen_rtx_ASHIFT (Pmode, countreg2, GEN_INT (3));
10490 }
10491 else
10492 {
10493 emit_insn (gen_lshrsi3 (countreg2, countreg, const2_rtx));
10494 destexp = gen_rtx_ASHIFT (Pmode, countreg2, const2_rtx);
10495 }
10496 srcexp = gen_rtx_PLUS (Pmode, destexp, srcreg);
10497 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
10498 emit_insn (gen_rep_mov (destreg, dst, srcreg, src,
10499 countreg2, destexp, srcexp));
10500
10501 if (label)
10502 {
10503 emit_label (label);
10504 LABEL_NUSES (label) = 1;
10505 }
10506 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
10507 {
10508 srcmem = change_address (src, SImode, srcreg);
10509 dstmem = change_address (dst, SImode, destreg);
10510 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
10511 }
10512 if ((align <= 4 || count == 0) && TARGET_64BIT)
10513 {
10514 rtx label = ix86_expand_aligntest (countreg, 4);
10515 srcmem = change_address (src, SImode, srcreg);
10516 dstmem = change_address (dst, SImode, destreg);
10517 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
10518 emit_label (label);
10519 LABEL_NUSES (label) = 1;
10520 }
10521 if (align > 2 && count != 0 && (count & 2))
10522 {
10523 srcmem = change_address (src, HImode, srcreg);
10524 dstmem = change_address (dst, HImode, destreg);
10525 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
10526 }
10527 if (align <= 2 || count == 0)
10528 {
10529 rtx label = ix86_expand_aligntest (countreg, 2);
10530 srcmem = change_address (src, HImode, srcreg);
10531 dstmem = change_address (dst, HImode, destreg);
10532 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
10533 emit_label (label);
10534 LABEL_NUSES (label) = 1;
10535 }
10536 if (align > 1 && count != 0 && (count & 1))
10537 {
10538 srcmem = change_address (src, QImode, srcreg);
10539 dstmem = change_address (dst, QImode, destreg);
10540 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
10541 }
10542 if (align <= 1 || count == 0)
10543 {
10544 rtx label = ix86_expand_aligntest (countreg, 1);
10545 srcmem = change_address (src, QImode, srcreg);
10546 dstmem = change_address (dst, QImode, destreg);
10547 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
10548 emit_label (label);
10549 LABEL_NUSES (label) = 1;
10550 }
10551 }
10552
10553 return 1;
10554 }
10555
10556 /* Expand string clear operation (bzero). Use i386 string operations when
10557 profitable. expand_movmem contains similar code. */
10558 int
10559 ix86_expand_clrmem (rtx dst, rtx count_exp, rtx align_exp)
10560 {
10561 rtx destreg, zeroreg, countreg, destexp;
10562 enum machine_mode counter_mode;
10563 HOST_WIDE_INT align = 0;
10564 unsigned HOST_WIDE_INT count = 0;
10565
10566 if (GET_CODE (align_exp) == CONST_INT)
10567 align = INTVAL (align_exp);
10568
10569 /* Can't use any of this if the user has appropriated esi. */
10570 if (global_regs[4])
10571 return 0;
10572
10573 /* This simple hack avoids all inlining code and simplifies code below. */
10574 if (!TARGET_ALIGN_STRINGOPS)
10575 align = 32;
10576
10577 if (GET_CODE (count_exp) == CONST_INT)
10578 {
10579 count = INTVAL (count_exp);
10580 if (!TARGET_INLINE_ALL_STRINGOPS && count > 64)
10581 return 0;
10582 }
10583 /* Figure out proper mode for counter. For 32bits it is always SImode,
10584 for 64bits use SImode when possible, otherwise DImode.
10585 Set count to number of bytes copied when known at compile time. */
10586 if (!TARGET_64BIT
10587 || GET_MODE (count_exp) == SImode
10588 || x86_64_zext_immediate_operand (count_exp, VOIDmode))
10589 counter_mode = SImode;
10590 else
10591 counter_mode = DImode;
10592
10593 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
10594 if (destreg != XEXP (dst, 0))
10595 dst = replace_equiv_address_nv (dst, destreg);
10596
10597
10598 /* When optimizing for size emit simple rep ; movsb instruction for
10599 counts not divisible by 4. The movl $N, %ecx; rep; stosb
10600 sequence is 7 bytes long, so if optimizing for size and count is
10601 small enough that some stosl, stosw and stosb instructions without
10602 rep are shorter, fall back into the next if. */
10603
10604 if ((!optimize || optimize_size)
10605 && (count == 0
10606 || ((count & 0x03)
10607 && (!optimize_size || (count & 0x03) + (count >> 2) > 7))))
10608 {
10609 emit_insn (gen_cld ());
10610
10611 countreg = ix86_zero_extend_to_Pmode (count_exp);
10612 zeroreg = copy_to_mode_reg (QImode, const0_rtx);
10613 destexp = gen_rtx_PLUS (Pmode, destreg, countreg);
10614 emit_insn (gen_rep_stos (destreg, countreg, dst, zeroreg, destexp));
10615 }
10616 else if (count != 0
10617 && (align >= 8
10618 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
10619 || optimize_size || count < (unsigned int) 64))
10620 {
10621 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
10622 unsigned HOST_WIDE_INT offset = 0;
10623
10624 emit_insn (gen_cld ());
10625
10626 zeroreg = copy_to_mode_reg (size == 4 ? SImode : DImode, const0_rtx);
10627 if (count & ~(size - 1))
10628 {
10629 unsigned HOST_WIDE_INT repcount;
10630 unsigned int max_nonrep;
10631
10632 repcount = count >> (size == 4 ? 2 : 3);
10633 if (!TARGET_64BIT)
10634 repcount &= 0x3fffffff;
10635
10636 /* movl $N, %ecx; rep; stosl is 7 bytes, while N x stosl is N bytes.
10637 movl $N, %ecx; rep; stosq is 8 bytes, while N x stosq is 2xN
10638 bytes. In both cases the latter seems to be faster for small
10639 values of N. */
10640 max_nonrep = size == 4 ? 7 : 4;
10641 if (!optimize_size)
10642 switch (ix86_tune)
10643 {
10644 case PROCESSOR_PENTIUM4:
10645 case PROCESSOR_NOCONA:
10646 max_nonrep = 3;
10647 break;
10648 default:
10649 break;
10650 }
10651
10652 if (repcount <= max_nonrep)
10653 while (repcount-- > 0)
10654 {
10655 rtx mem = adjust_automodify_address_nv (dst,
10656 GET_MODE (zeroreg),
10657 destreg, offset);
10658 emit_insn (gen_strset (destreg, mem, zeroreg));
10659 offset += size;
10660 }
10661 else
10662 {
10663 countreg = copy_to_mode_reg (counter_mode, GEN_INT (repcount));
10664 countreg = ix86_zero_extend_to_Pmode (countreg);
10665 destexp = gen_rtx_ASHIFT (Pmode, countreg,
10666 GEN_INT (size == 4 ? 2 : 3));
10667 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
10668 emit_insn (gen_rep_stos (destreg, countreg, dst, zeroreg,
10669 destexp));
10670 offset = count & ~(size - 1);
10671 }
10672 }
10673 if (size == 8 && (count & 0x04))
10674 {
10675 rtx mem = adjust_automodify_address_nv (dst, SImode, destreg,
10676 offset);
10677 emit_insn (gen_strset (destreg, mem,
10678 gen_rtx_SUBREG (SImode, zeroreg, 0)));
10679 offset += 4;
10680 }
10681 if (count & 0x02)
10682 {
10683 rtx mem = adjust_automodify_address_nv (dst, HImode, destreg,
10684 offset);
10685 emit_insn (gen_strset (destreg, mem,
10686 gen_rtx_SUBREG (HImode, zeroreg, 0)));
10687 offset += 2;
10688 }
10689 if (count & 0x01)
10690 {
10691 rtx mem = adjust_automodify_address_nv (dst, QImode, destreg,
10692 offset);
10693 emit_insn (gen_strset (destreg, mem,
10694 gen_rtx_SUBREG (QImode, zeroreg, 0)));
10695 }
10696 }
10697 else
10698 {
10699 rtx countreg2;
10700 rtx label = NULL;
10701 /* Compute desired alignment of the string operation. */
10702 int desired_alignment = (TARGET_PENTIUMPRO
10703 && (count == 0 || count >= (unsigned int) 260)
10704 ? 8 : UNITS_PER_WORD);
10705
10706 /* In case we don't know anything about the alignment, default to
10707 library version, since it is usually equally fast and result in
10708 shorter code.
10709
10710 Also emit call when we know that the count is large and call overhead
10711 will not be important. */
10712 if (!TARGET_INLINE_ALL_STRINGOPS
10713 && (align < UNITS_PER_WORD || !TARGET_REP_MOVL_OPTIMAL))
10714 return 0;
10715
10716 if (TARGET_SINGLE_STRINGOP)
10717 emit_insn (gen_cld ());
10718
10719 countreg2 = gen_reg_rtx (Pmode);
10720 countreg = copy_to_mode_reg (counter_mode, count_exp);
10721 zeroreg = copy_to_mode_reg (Pmode, const0_rtx);
10722 /* Get rid of MEM_OFFSET, it won't be accurate. */
10723 dst = change_address (dst, BLKmode, destreg);
10724
10725 if (count == 0 && align < desired_alignment)
10726 {
10727 label = gen_label_rtx ();
10728 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
10729 LEU, 0, counter_mode, 1, label);
10730 }
10731 if (align <= 1)
10732 {
10733 rtx label = ix86_expand_aligntest (destreg, 1);
10734 emit_insn (gen_strset (destreg, dst,
10735 gen_rtx_SUBREG (QImode, zeroreg, 0)));
10736 ix86_adjust_counter (countreg, 1);
10737 emit_label (label);
10738 LABEL_NUSES (label) = 1;
10739 }
10740 if (align <= 2)
10741 {
10742 rtx label = ix86_expand_aligntest (destreg, 2);
10743 emit_insn (gen_strset (destreg, dst,
10744 gen_rtx_SUBREG (HImode, zeroreg, 0)));
10745 ix86_adjust_counter (countreg, 2);
10746 emit_label (label);
10747 LABEL_NUSES (label) = 1;
10748 }
10749 if (align <= 4 && desired_alignment > 4)
10750 {
10751 rtx label = ix86_expand_aligntest (destreg, 4);
10752 emit_insn (gen_strset (destreg, dst,
10753 (TARGET_64BIT
10754 ? gen_rtx_SUBREG (SImode, zeroreg, 0)
10755 : zeroreg)));
10756 ix86_adjust_counter (countreg, 4);
10757 emit_label (label);
10758 LABEL_NUSES (label) = 1;
10759 }
10760
10761 if (label && desired_alignment > 4 && !TARGET_64BIT)
10762 {
10763 emit_label (label);
10764 LABEL_NUSES (label) = 1;
10765 label = NULL_RTX;
10766 }
10767
10768 if (!TARGET_SINGLE_STRINGOP)
10769 emit_insn (gen_cld ());
10770 if (TARGET_64BIT)
10771 {
10772 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
10773 GEN_INT (3)));
10774 destexp = gen_rtx_ASHIFT (Pmode, countreg2, GEN_INT (3));
10775 }
10776 else
10777 {
10778 emit_insn (gen_lshrsi3 (countreg2, countreg, const2_rtx));
10779 destexp = gen_rtx_ASHIFT (Pmode, countreg2, const2_rtx);
10780 }
10781 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
10782 emit_insn (gen_rep_stos (destreg, countreg2, dst, zeroreg, destexp));
10783
10784 if (label)
10785 {
10786 emit_label (label);
10787 LABEL_NUSES (label) = 1;
10788 }
10789
10790 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
10791 emit_insn (gen_strset (destreg, dst,
10792 gen_rtx_SUBREG (SImode, zeroreg, 0)));
10793 if (TARGET_64BIT && (align <= 4 || count == 0))
10794 {
10795 rtx label = ix86_expand_aligntest (countreg, 4);
10796 emit_insn (gen_strset (destreg, dst,
10797 gen_rtx_SUBREG (SImode, zeroreg, 0)));
10798 emit_label (label);
10799 LABEL_NUSES (label) = 1;
10800 }
10801 if (align > 2 && count != 0 && (count & 2))
10802 emit_insn (gen_strset (destreg, dst,
10803 gen_rtx_SUBREG (HImode, zeroreg, 0)));
10804 if (align <= 2 || count == 0)
10805 {
10806 rtx label = ix86_expand_aligntest (countreg, 2);
10807 emit_insn (gen_strset (destreg, dst,
10808 gen_rtx_SUBREG (HImode, zeroreg, 0)));
10809 emit_label (label);
10810 LABEL_NUSES (label) = 1;
10811 }
10812 if (align > 1 && count != 0 && (count & 1))
10813 emit_insn (gen_strset (destreg, dst,
10814 gen_rtx_SUBREG (QImode, zeroreg, 0)));
10815 if (align <= 1 || count == 0)
10816 {
10817 rtx label = ix86_expand_aligntest (countreg, 1);
10818 emit_insn (gen_strset (destreg, dst,
10819 gen_rtx_SUBREG (QImode, zeroreg, 0)));
10820 emit_label (label);
10821 LABEL_NUSES (label) = 1;
10822 }
10823 }
10824 return 1;
10825 }
10826
10827 /* Expand strlen. */
10828 int
10829 ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
10830 {
10831 rtx addr, scratch1, scratch2, scratch3, scratch4;
10832
10833 /* The generic case of strlen expander is long. Avoid it's
10834 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
10835
10836 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
10837 && !TARGET_INLINE_ALL_STRINGOPS
10838 && !optimize_size
10839 && (GET_CODE (align) != CONST_INT || INTVAL (align) < 4))
10840 return 0;
10841
10842 addr = force_reg (Pmode, XEXP (src, 0));
10843 scratch1 = gen_reg_rtx (Pmode);
10844
10845 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
10846 && !optimize_size)
10847 {
10848 /* Well it seems that some optimizer does not combine a call like
10849 foo(strlen(bar), strlen(bar));
10850 when the move and the subtraction is done here. It does calculate
10851 the length just once when these instructions are done inside of
10852 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
10853 often used and I use one fewer register for the lifetime of
10854 output_strlen_unroll() this is better. */
10855
10856 emit_move_insn (out, addr);
10857
10858 ix86_expand_strlensi_unroll_1 (out, src, align);
10859
10860 /* strlensi_unroll_1 returns the address of the zero at the end of
10861 the string, like memchr(), so compute the length by subtracting
10862 the start address. */
10863 if (TARGET_64BIT)
10864 emit_insn (gen_subdi3 (out, out, addr));
10865 else
10866 emit_insn (gen_subsi3 (out, out, addr));
10867 }
10868 else
10869 {
10870 rtx unspec;
10871 scratch2 = gen_reg_rtx (Pmode);
10872 scratch3 = gen_reg_rtx (Pmode);
10873 scratch4 = force_reg (Pmode, constm1_rtx);
10874
10875 emit_move_insn (scratch3, addr);
10876 eoschar = force_reg (QImode, eoschar);
10877
10878 emit_insn (gen_cld ());
10879 src = replace_equiv_address_nv (src, scratch3);
10880
10881 /* If .md starts supporting :P, this can be done in .md. */
10882 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
10883 scratch4), UNSPEC_SCAS);
10884 emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
10885 if (TARGET_64BIT)
10886 {
10887 emit_insn (gen_one_cmpldi2 (scratch2, scratch1));
10888 emit_insn (gen_adddi3 (out, scratch2, constm1_rtx));
10889 }
10890 else
10891 {
10892 emit_insn (gen_one_cmplsi2 (scratch2, scratch1));
10893 emit_insn (gen_addsi3 (out, scratch2, constm1_rtx));
10894 }
10895 }
10896 return 1;
10897 }
10898
10899 /* Expand the appropriate insns for doing strlen if not just doing
10900 repnz; scasb
10901
10902 out = result, initialized with the start address
10903 align_rtx = alignment of the address.
10904 scratch = scratch register, initialized with the startaddress when
10905 not aligned, otherwise undefined
10906
10907 This is just the body. It needs the initializations mentioned above and
10908 some address computing at the end. These things are done in i386.md. */
10909
10910 static void
10911 ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
10912 {
10913 int align;
10914 rtx tmp;
10915 rtx align_2_label = NULL_RTX;
10916 rtx align_3_label = NULL_RTX;
10917 rtx align_4_label = gen_label_rtx ();
10918 rtx end_0_label = gen_label_rtx ();
10919 rtx mem;
10920 rtx tmpreg = gen_reg_rtx (SImode);
10921 rtx scratch = gen_reg_rtx (SImode);
10922 rtx cmp;
10923
10924 align = 0;
10925 if (GET_CODE (align_rtx) == CONST_INT)
10926 align = INTVAL (align_rtx);
10927
10928 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
10929
10930 /* Is there a known alignment and is it less than 4? */
10931 if (align < 4)
10932 {
10933 rtx scratch1 = gen_reg_rtx (Pmode);
10934 emit_move_insn (scratch1, out);
10935 /* Is there a known alignment and is it not 2? */
10936 if (align != 2)
10937 {
10938 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
10939 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
10940
10941 /* Leave just the 3 lower bits. */
10942 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
10943 NULL_RTX, 0, OPTAB_WIDEN);
10944
10945 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
10946 Pmode, 1, align_4_label);
10947 emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL,
10948 Pmode, 1, align_2_label);
10949 emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL,
10950 Pmode, 1, align_3_label);
10951 }
10952 else
10953 {
10954 /* Since the alignment is 2, we have to check 2 or 0 bytes;
10955 check if is aligned to 4 - byte. */
10956
10957 align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx,
10958 NULL_RTX, 0, OPTAB_WIDEN);
10959
10960 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
10961 Pmode, 1, align_4_label);
10962 }
10963
10964 mem = change_address (src, QImode, out);
10965
10966 /* Now compare the bytes. */
10967
10968 /* Compare the first n unaligned byte on a byte per byte basis. */
10969 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
10970 QImode, 1, end_0_label);
10971
10972 /* Increment the address. */
10973 if (TARGET_64BIT)
10974 emit_insn (gen_adddi3 (out, out, const1_rtx));
10975 else
10976 emit_insn (gen_addsi3 (out, out, const1_rtx));
10977
10978 /* Not needed with an alignment of 2 */
10979 if (align != 2)
10980 {
10981 emit_label (align_2_label);
10982
10983 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
10984 end_0_label);
10985
10986 if (TARGET_64BIT)
10987 emit_insn (gen_adddi3 (out, out, const1_rtx));
10988 else
10989 emit_insn (gen_addsi3 (out, out, const1_rtx));
10990
10991 emit_label (align_3_label);
10992 }
10993
10994 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
10995 end_0_label);
10996
10997 if (TARGET_64BIT)
10998 emit_insn (gen_adddi3 (out, out, const1_rtx));
10999 else
11000 emit_insn (gen_addsi3 (out, out, const1_rtx));
11001 }
11002
11003 /* Generate loop to check 4 bytes at a time. It is not a good idea to
11004 align this loop. It gives only huge programs, but does not help to
11005 speed up. */
11006 emit_label (align_4_label);
11007
11008 mem = change_address (src, SImode, out);
11009 emit_move_insn (scratch, mem);
11010 if (TARGET_64BIT)
11011 emit_insn (gen_adddi3 (out, out, GEN_INT (4)));
11012 else
11013 emit_insn (gen_addsi3 (out, out, GEN_INT (4)));
11014
11015 /* This formula yields a nonzero result iff one of the bytes is zero.
11016 This saves three branches inside loop and many cycles. */
11017
11018 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
11019 emit_insn (gen_one_cmplsi2 (scratch, scratch));
11020 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
11021 emit_insn (gen_andsi3 (tmpreg, tmpreg,
11022 gen_int_mode (0x80808080, SImode)));
11023 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
11024 align_4_label);
11025
11026 if (TARGET_CMOVE)
11027 {
11028 rtx reg = gen_reg_rtx (SImode);
11029 rtx reg2 = gen_reg_rtx (Pmode);
11030 emit_move_insn (reg, tmpreg);
11031 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
11032
11033 /* If zero is not in the first two bytes, move two bytes forward. */
11034 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
11035 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
11036 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
11037 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
11038 gen_rtx_IF_THEN_ELSE (SImode, tmp,
11039 reg,
11040 tmpreg)));
11041 /* Emit lea manually to avoid clobbering of flags. */
11042 emit_insn (gen_rtx_SET (SImode, reg2,
11043 gen_rtx_PLUS (Pmode, out, const2_rtx)));
11044
11045 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
11046 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
11047 emit_insn (gen_rtx_SET (VOIDmode, out,
11048 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
11049 reg2,
11050 out)));
11051
11052 }
11053 else
11054 {
11055 rtx end_2_label = gen_label_rtx ();
11056 /* Is zero in the first two bytes? */
11057
11058 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
11059 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
11060 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
11061 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
11062 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
11063 pc_rtx);
11064 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
11065 JUMP_LABEL (tmp) = end_2_label;
11066
11067 /* Not in the first two. Move two bytes forward. */
11068 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
11069 if (TARGET_64BIT)
11070 emit_insn (gen_adddi3 (out, out, const2_rtx));
11071 else
11072 emit_insn (gen_addsi3 (out, out, const2_rtx));
11073
11074 emit_label (end_2_label);
11075
11076 }
11077
11078 /* Avoid branch in fixing the byte. */
11079 tmpreg = gen_lowpart (QImode, tmpreg);
11080 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
11081 cmp = gen_rtx_LTU (Pmode, gen_rtx_REG (CCmode, 17), const0_rtx);
11082 if (TARGET_64BIT)
11083 emit_insn (gen_subdi3_carry_rex64 (out, out, GEN_INT (3), cmp));
11084 else
11085 emit_insn (gen_subsi3_carry (out, out, GEN_INT (3), cmp));
11086
11087 emit_label (end_0_label);
11088 }
11089
11090 void
11091 ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
11092 rtx callarg2 ATTRIBUTE_UNUSED,
11093 rtx pop, int sibcall)
11094 {
11095 rtx use = NULL, call;
11096
11097 if (pop == const0_rtx)
11098 pop = NULL;
11099 if (TARGET_64BIT && pop)
11100 abort ();
11101
11102 #if TARGET_MACHO
11103 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
11104 fnaddr = machopic_indirect_call_target (fnaddr);
11105 #else
11106 /* Static functions and indirect calls don't need the pic register. */
11107 if (! TARGET_64BIT && flag_pic
11108 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
11109 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
11110 use_reg (&use, pic_offset_table_rtx);
11111
11112 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
11113 {
11114 rtx al = gen_rtx_REG (QImode, 0);
11115 emit_move_insn (al, callarg2);
11116 use_reg (&use, al);
11117 }
11118 #endif /* TARGET_MACHO */
11119
11120 if (! call_insn_operand (XEXP (fnaddr, 0), Pmode))
11121 {
11122 fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
11123 fnaddr = gen_rtx_MEM (QImode, fnaddr);
11124 }
11125 if (sibcall && TARGET_64BIT
11126 && !constant_call_address_operand (XEXP (fnaddr, 0), Pmode))
11127 {
11128 rtx addr;
11129 addr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
11130 fnaddr = gen_rtx_REG (Pmode, FIRST_REX_INT_REG + 3 /* R11 */);
11131 emit_move_insn (fnaddr, addr);
11132 fnaddr = gen_rtx_MEM (QImode, fnaddr);
11133 }
11134
11135 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
11136 if (retval)
11137 call = gen_rtx_SET (VOIDmode, retval, call);
11138 if (pop)
11139 {
11140 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
11141 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
11142 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop));
11143 }
11144
11145 call = emit_call_insn (call);
11146 if (use)
11147 CALL_INSN_FUNCTION_USAGE (call) = use;
11148 }
11149
11150 \f
11151 /* Clear stack slot assignments remembered from previous functions.
11152 This is called from INIT_EXPANDERS once before RTL is emitted for each
11153 function. */
11154
11155 static struct machine_function *
11156 ix86_init_machine_status (void)
11157 {
11158 struct machine_function *f;
11159
11160 f = ggc_alloc_cleared (sizeof (struct machine_function));
11161 f->use_fast_prologue_epilogue_nregs = -1;
11162
11163 return f;
11164 }
11165
11166 /* Return a MEM corresponding to a stack slot with mode MODE.
11167 Allocate a new slot if necessary.
11168
11169 The RTL for a function can have several slots available: N is
11170 which slot to use. */
11171
11172 rtx
11173 assign_386_stack_local (enum machine_mode mode, int n)
11174 {
11175 struct stack_local_entry *s;
11176
11177 if (n < 0 || n >= MAX_386_STACK_LOCALS)
11178 abort ();
11179
11180 for (s = ix86_stack_locals; s; s = s->next)
11181 if (s->mode == mode && s->n == n)
11182 return s->rtl;
11183
11184 s = (struct stack_local_entry *)
11185 ggc_alloc (sizeof (struct stack_local_entry));
11186 s->n = n;
11187 s->mode = mode;
11188 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
11189
11190 s->next = ix86_stack_locals;
11191 ix86_stack_locals = s;
11192 return s->rtl;
11193 }
11194
11195 /* Construct the SYMBOL_REF for the tls_get_addr function. */
11196
11197 static GTY(()) rtx ix86_tls_symbol;
11198 rtx
11199 ix86_tls_get_addr (void)
11200 {
11201
11202 if (!ix86_tls_symbol)
11203 {
11204 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode,
11205 (TARGET_GNU_TLS && !TARGET_64BIT)
11206 ? "___tls_get_addr"
11207 : "__tls_get_addr");
11208 }
11209
11210 return ix86_tls_symbol;
11211 }
11212 \f
11213 /* Calculate the length of the memory address in the instruction
11214 encoding. Does not include the one-byte modrm, opcode, or prefix. */
11215
11216 int
11217 memory_address_length (rtx addr)
11218 {
11219 struct ix86_address parts;
11220 rtx base, index, disp;
11221 int len;
11222
11223 if (GET_CODE (addr) == PRE_DEC
11224 || GET_CODE (addr) == POST_INC
11225 || GET_CODE (addr) == PRE_MODIFY
11226 || GET_CODE (addr) == POST_MODIFY)
11227 return 0;
11228
11229 if (! ix86_decompose_address (addr, &parts))
11230 abort ();
11231
11232 base = parts.base;
11233 index = parts.index;
11234 disp = parts.disp;
11235 len = 0;
11236
11237 /* Rule of thumb:
11238 - esp as the base always wants an index,
11239 - ebp as the base always wants a displacement. */
11240
11241 /* Register Indirect. */
11242 if (base && !index && !disp)
11243 {
11244 /* esp (for its index) and ebp (for its displacement) need
11245 the two-byte modrm form. */
11246 if (addr == stack_pointer_rtx
11247 || addr == arg_pointer_rtx
11248 || addr == frame_pointer_rtx
11249 || addr == hard_frame_pointer_rtx)
11250 len = 1;
11251 }
11252
11253 /* Direct Addressing. */
11254 else if (disp && !base && !index)
11255 len = 4;
11256
11257 else
11258 {
11259 /* Find the length of the displacement constant. */
11260 if (disp)
11261 {
11262 if (GET_CODE (disp) == CONST_INT
11263 && CONST_OK_FOR_LETTER_P (INTVAL (disp), 'K')
11264 && base)
11265 len = 1;
11266 else
11267 len = 4;
11268 }
11269 /* ebp always wants a displacement. */
11270 else if (base == hard_frame_pointer_rtx)
11271 len = 1;
11272
11273 /* An index requires the two-byte modrm form.... */
11274 if (index
11275 /* ...like esp, which always wants an index. */
11276 || base == stack_pointer_rtx
11277 || base == arg_pointer_rtx
11278 || base == frame_pointer_rtx)
11279 len += 1;
11280 }
11281
11282 return len;
11283 }
11284
11285 /* Compute default value for "length_immediate" attribute. When SHORTFORM
11286 is set, expect that insn have 8bit immediate alternative. */
11287 int
11288 ix86_attr_length_immediate_default (rtx insn, int shortform)
11289 {
11290 int len = 0;
11291 int i;
11292 extract_insn_cached (insn);
11293 for (i = recog_data.n_operands - 1; i >= 0; --i)
11294 if (CONSTANT_P (recog_data.operand[i]))
11295 {
11296 if (len)
11297 abort ();
11298 if (shortform
11299 && GET_CODE (recog_data.operand[i]) == CONST_INT
11300 && CONST_OK_FOR_LETTER_P (INTVAL (recog_data.operand[i]), 'K'))
11301 len = 1;
11302 else
11303 {
11304 switch (get_attr_mode (insn))
11305 {
11306 case MODE_QI:
11307 len+=1;
11308 break;
11309 case MODE_HI:
11310 len+=2;
11311 break;
11312 case MODE_SI:
11313 len+=4;
11314 break;
11315 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
11316 case MODE_DI:
11317 len+=4;
11318 break;
11319 default:
11320 fatal_insn ("unknown insn mode", insn);
11321 }
11322 }
11323 }
11324 return len;
11325 }
11326 /* Compute default value for "length_address" attribute. */
11327 int
11328 ix86_attr_length_address_default (rtx insn)
11329 {
11330 int i;
11331
11332 if (get_attr_type (insn) == TYPE_LEA)
11333 {
11334 rtx set = PATTERN (insn);
11335 if (GET_CODE (set) == SET)
11336 ;
11337 else if (GET_CODE (set) == PARALLEL
11338 && GET_CODE (XVECEXP (set, 0, 0)) == SET)
11339 set = XVECEXP (set, 0, 0);
11340 else
11341 {
11342 #ifdef ENABLE_CHECKING
11343 abort ();
11344 #endif
11345 return 0;
11346 }
11347
11348 return memory_address_length (SET_SRC (set));
11349 }
11350
11351 extract_insn_cached (insn);
11352 for (i = recog_data.n_operands - 1; i >= 0; --i)
11353 if (GET_CODE (recog_data.operand[i]) == MEM)
11354 {
11355 return memory_address_length (XEXP (recog_data.operand[i], 0));
11356 break;
11357 }
11358 return 0;
11359 }
11360 \f
11361 /* Return the maximum number of instructions a cpu can issue. */
11362
11363 static int
11364 ix86_issue_rate (void)
11365 {
11366 switch (ix86_tune)
11367 {
11368 case PROCESSOR_PENTIUM:
11369 case PROCESSOR_K6:
11370 return 2;
11371
11372 case PROCESSOR_PENTIUMPRO:
11373 case PROCESSOR_PENTIUM4:
11374 case PROCESSOR_ATHLON:
11375 case PROCESSOR_K8:
11376 case PROCESSOR_NOCONA:
11377 return 3;
11378
11379 default:
11380 return 1;
11381 }
11382 }
11383
11384 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
11385 by DEP_INSN and nothing set by DEP_INSN. */
11386
11387 static int
11388 ix86_flags_dependant (rtx insn, rtx dep_insn, enum attr_type insn_type)
11389 {
11390 rtx set, set2;
11391
11392 /* Simplify the test for uninteresting insns. */
11393 if (insn_type != TYPE_SETCC
11394 && insn_type != TYPE_ICMOV
11395 && insn_type != TYPE_FCMOV
11396 && insn_type != TYPE_IBR)
11397 return 0;
11398
11399 if ((set = single_set (dep_insn)) != 0)
11400 {
11401 set = SET_DEST (set);
11402 set2 = NULL_RTX;
11403 }
11404 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
11405 && XVECLEN (PATTERN (dep_insn), 0) == 2
11406 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
11407 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
11408 {
11409 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
11410 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
11411 }
11412 else
11413 return 0;
11414
11415 if (GET_CODE (set) != REG || REGNO (set) != FLAGS_REG)
11416 return 0;
11417
11418 /* This test is true if the dependent insn reads the flags but
11419 not any other potentially set register. */
11420 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
11421 return 0;
11422
11423 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
11424 return 0;
11425
11426 return 1;
11427 }
11428
11429 /* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
11430 address with operands set by DEP_INSN. */
11431
11432 static int
11433 ix86_agi_dependant (rtx insn, rtx dep_insn, enum attr_type insn_type)
11434 {
11435 rtx addr;
11436
11437 if (insn_type == TYPE_LEA
11438 && TARGET_PENTIUM)
11439 {
11440 addr = PATTERN (insn);
11441 if (GET_CODE (addr) == SET)
11442 ;
11443 else if (GET_CODE (addr) == PARALLEL
11444 && GET_CODE (XVECEXP (addr, 0, 0)) == SET)
11445 addr = XVECEXP (addr, 0, 0);
11446 else
11447 abort ();
11448 addr = SET_SRC (addr);
11449 }
11450 else
11451 {
11452 int i;
11453 extract_insn_cached (insn);
11454 for (i = recog_data.n_operands - 1; i >= 0; --i)
11455 if (GET_CODE (recog_data.operand[i]) == MEM)
11456 {
11457 addr = XEXP (recog_data.operand[i], 0);
11458 goto found;
11459 }
11460 return 0;
11461 found:;
11462 }
11463
11464 return modified_in_p (addr, dep_insn);
11465 }
11466
11467 static int
11468 ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
11469 {
11470 enum attr_type insn_type, dep_insn_type;
11471 enum attr_memory memory;
11472 rtx set, set2;
11473 int dep_insn_code_number;
11474
11475 /* Anti and output dependencies have zero cost on all CPUs. */
11476 if (REG_NOTE_KIND (link) != 0)
11477 return 0;
11478
11479 dep_insn_code_number = recog_memoized (dep_insn);
11480
11481 /* If we can't recognize the insns, we can't really do anything. */
11482 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
11483 return cost;
11484
11485 insn_type = get_attr_type (insn);
11486 dep_insn_type = get_attr_type (dep_insn);
11487
11488 switch (ix86_tune)
11489 {
11490 case PROCESSOR_PENTIUM:
11491 /* Address Generation Interlock adds a cycle of latency. */
11492 if (ix86_agi_dependant (insn, dep_insn, insn_type))
11493 cost += 1;
11494
11495 /* ??? Compares pair with jump/setcc. */
11496 if (ix86_flags_dependant (insn, dep_insn, insn_type))
11497 cost = 0;
11498
11499 /* Floating point stores require value to be ready one cycle earlier. */
11500 if (insn_type == TYPE_FMOV
11501 && get_attr_memory (insn) == MEMORY_STORE
11502 && !ix86_agi_dependant (insn, dep_insn, insn_type))
11503 cost += 1;
11504 break;
11505
11506 case PROCESSOR_PENTIUMPRO:
11507 memory = get_attr_memory (insn);
11508
11509 /* INT->FP conversion is expensive. */
11510 if (get_attr_fp_int_src (dep_insn))
11511 cost += 5;
11512
11513 /* There is one cycle extra latency between an FP op and a store. */
11514 if (insn_type == TYPE_FMOV
11515 && (set = single_set (dep_insn)) != NULL_RTX
11516 && (set2 = single_set (insn)) != NULL_RTX
11517 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
11518 && GET_CODE (SET_DEST (set2)) == MEM)
11519 cost += 1;
11520
11521 /* Show ability of reorder buffer to hide latency of load by executing
11522 in parallel with previous instruction in case
11523 previous instruction is not needed to compute the address. */
11524 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
11525 && !ix86_agi_dependant (insn, dep_insn, insn_type))
11526 {
11527 /* Claim moves to take one cycle, as core can issue one load
11528 at time and the next load can start cycle later. */
11529 if (dep_insn_type == TYPE_IMOV
11530 || dep_insn_type == TYPE_FMOV)
11531 cost = 1;
11532 else if (cost > 1)
11533 cost--;
11534 }
11535 break;
11536
11537 case PROCESSOR_K6:
11538 memory = get_attr_memory (insn);
11539
11540 /* The esp dependency is resolved before the instruction is really
11541 finished. */
11542 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
11543 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
11544 return 1;
11545
11546 /* INT->FP conversion is expensive. */
11547 if (get_attr_fp_int_src (dep_insn))
11548 cost += 5;
11549
11550 /* Show ability of reorder buffer to hide latency of load by executing
11551 in parallel with previous instruction in case
11552 previous instruction is not needed to compute the address. */
11553 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
11554 && !ix86_agi_dependant (insn, dep_insn, insn_type))
11555 {
11556 /* Claim moves to take one cycle, as core can issue one load
11557 at time and the next load can start cycle later. */
11558 if (dep_insn_type == TYPE_IMOV
11559 || dep_insn_type == TYPE_FMOV)
11560 cost = 1;
11561 else if (cost > 2)
11562 cost -= 2;
11563 else
11564 cost = 1;
11565 }
11566 break;
11567
11568 case PROCESSOR_ATHLON:
11569 case PROCESSOR_K8:
11570 memory = get_attr_memory (insn);
11571
11572 /* Show ability of reorder buffer to hide latency of load by executing
11573 in parallel with previous instruction in case
11574 previous instruction is not needed to compute the address. */
11575 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
11576 && !ix86_agi_dependant (insn, dep_insn, insn_type))
11577 {
11578 enum attr_unit unit = get_attr_unit (insn);
11579 int loadcost = 3;
11580
11581 /* Because of the difference between the length of integer and
11582 floating unit pipeline preparation stages, the memory operands
11583 for floating point are cheaper.
11584
11585 ??? For Athlon it the difference is most probably 2. */
11586 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
11587 loadcost = 3;
11588 else
11589 loadcost = TARGET_ATHLON ? 2 : 0;
11590
11591 if (cost >= loadcost)
11592 cost -= loadcost;
11593 else
11594 cost = 0;
11595 }
11596
11597 default:
11598 break;
11599 }
11600
11601 return cost;
11602 }
11603
11604 /* How many alternative schedules to try. This should be as wide as the
11605 scheduling freedom in the DFA, but no wider. Making this value too
11606 large results extra work for the scheduler. */
11607
11608 static int
11609 ia32_multipass_dfa_lookahead (void)
11610 {
11611 if (ix86_tune == PROCESSOR_PENTIUM)
11612 return 2;
11613
11614 if (ix86_tune == PROCESSOR_PENTIUMPRO
11615 || ix86_tune == PROCESSOR_K6)
11616 return 1;
11617
11618 else
11619 return 0;
11620 }
11621
11622 \f
11623 /* Implement the target hook targetm.vectorize.misaligned_mem_ok. */
11624
11625 static bool
11626 ix86_misaligned_mem_ok (enum machine_mode mode)
11627 {
11628 if (TARGET_MMX && VALID_MMX_REG_MODE (mode))
11629 return true;
11630 else
11631 return false;
11632 }
11633
11634 /* Compute the alignment given to a constant that is being placed in memory.
11635 EXP is the constant and ALIGN is the alignment that the object would
11636 ordinarily have.
11637 The value of this function is used instead of that alignment to align
11638 the object. */
11639
11640 int
11641 ix86_constant_alignment (tree exp, int align)
11642 {
11643 if (TREE_CODE (exp) == REAL_CST)
11644 {
11645 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
11646 return 64;
11647 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
11648 return 128;
11649 }
11650 else if (!optimize_size && TREE_CODE (exp) == STRING_CST
11651 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
11652 return BITS_PER_WORD;
11653
11654 return align;
11655 }
11656
11657 /* Compute the alignment for a static variable.
11658 TYPE is the data type, and ALIGN is the alignment that
11659 the object would ordinarily have. The value of this function is used
11660 instead of that alignment to align the object. */
11661
11662 int
11663 ix86_data_alignment (tree type, int align)
11664 {
11665 if (AGGREGATE_TYPE_P (type)
11666 && TYPE_SIZE (type)
11667 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
11668 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 256
11669 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 256)
11670 return 256;
11671
11672 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
11673 to 16byte boundary. */
11674 if (TARGET_64BIT)
11675 {
11676 if (AGGREGATE_TYPE_P (type)
11677 && TYPE_SIZE (type)
11678 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
11679 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
11680 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
11681 return 128;
11682 }
11683
11684 if (TREE_CODE (type) == ARRAY_TYPE)
11685 {
11686 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
11687 return 64;
11688 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
11689 return 128;
11690 }
11691 else if (TREE_CODE (type) == COMPLEX_TYPE)
11692 {
11693
11694 if (TYPE_MODE (type) == DCmode && align < 64)
11695 return 64;
11696 if (TYPE_MODE (type) == XCmode && align < 128)
11697 return 128;
11698 }
11699 else if ((TREE_CODE (type) == RECORD_TYPE
11700 || TREE_CODE (type) == UNION_TYPE
11701 || TREE_CODE (type) == QUAL_UNION_TYPE)
11702 && TYPE_FIELDS (type))
11703 {
11704 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
11705 return 64;
11706 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
11707 return 128;
11708 }
11709 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
11710 || TREE_CODE (type) == INTEGER_TYPE)
11711 {
11712 if (TYPE_MODE (type) == DFmode && align < 64)
11713 return 64;
11714 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
11715 return 128;
11716 }
11717
11718 return align;
11719 }
11720
11721 /* Compute the alignment for a local variable.
11722 TYPE is the data type, and ALIGN is the alignment that
11723 the object would ordinarily have. The value of this macro is used
11724 instead of that alignment to align the object. */
11725
11726 int
11727 ix86_local_alignment (tree type, int align)
11728 {
11729 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
11730 to 16byte boundary. */
11731 if (TARGET_64BIT)
11732 {
11733 if (AGGREGATE_TYPE_P (type)
11734 && TYPE_SIZE (type)
11735 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
11736 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
11737 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
11738 return 128;
11739 }
11740 if (TREE_CODE (type) == ARRAY_TYPE)
11741 {
11742 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
11743 return 64;
11744 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
11745 return 128;
11746 }
11747 else if (TREE_CODE (type) == COMPLEX_TYPE)
11748 {
11749 if (TYPE_MODE (type) == DCmode && align < 64)
11750 return 64;
11751 if (TYPE_MODE (type) == XCmode && align < 128)
11752 return 128;
11753 }
11754 else if ((TREE_CODE (type) == RECORD_TYPE
11755 || TREE_CODE (type) == UNION_TYPE
11756 || TREE_CODE (type) == QUAL_UNION_TYPE)
11757 && TYPE_FIELDS (type))
11758 {
11759 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
11760 return 64;
11761 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
11762 return 128;
11763 }
11764 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
11765 || TREE_CODE (type) == INTEGER_TYPE)
11766 {
11767
11768 if (TYPE_MODE (type) == DFmode && align < 64)
11769 return 64;
11770 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
11771 return 128;
11772 }
11773 return align;
11774 }
11775 \f
11776 /* Emit RTL insns to initialize the variable parts of a trampoline.
11777 FNADDR is an RTX for the address of the function's pure code.
11778 CXT is an RTX for the static chain value for the function. */
11779 void
11780 x86_initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt)
11781 {
11782 if (!TARGET_64BIT)
11783 {
11784 /* Compute offset from the end of the jmp to the target function. */
11785 rtx disp = expand_binop (SImode, sub_optab, fnaddr,
11786 plus_constant (tramp, 10),
11787 NULL_RTX, 1, OPTAB_DIRECT);
11788 emit_move_insn (gen_rtx_MEM (QImode, tramp),
11789 gen_int_mode (0xb9, QImode));
11790 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 1)), cxt);
11791 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, 5)),
11792 gen_int_mode (0xe9, QImode));
11793 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 6)), disp);
11794 }
11795 else
11796 {
11797 int offset = 0;
11798 /* Try to load address using shorter movl instead of movabs.
11799 We may want to support movq for kernel mode, but kernel does not use
11800 trampolines at the moment. */
11801 if (x86_64_zext_immediate_operand (fnaddr, VOIDmode))
11802 {
11803 fnaddr = copy_to_mode_reg (DImode, fnaddr);
11804 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
11805 gen_int_mode (0xbb41, HImode));
11806 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, offset + 2)),
11807 gen_lowpart (SImode, fnaddr));
11808 offset += 6;
11809 }
11810 else
11811 {
11812 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
11813 gen_int_mode (0xbb49, HImode));
11814 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
11815 fnaddr);
11816 offset += 10;
11817 }
11818 /* Load static chain using movabs to r10. */
11819 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
11820 gen_int_mode (0xba49, HImode));
11821 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
11822 cxt);
11823 offset += 10;
11824 /* Jump to the r11 */
11825 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
11826 gen_int_mode (0xff49, HImode));
11827 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, offset+2)),
11828 gen_int_mode (0xe3, QImode));
11829 offset += 3;
11830 if (offset > TRAMPOLINE_SIZE)
11831 abort ();
11832 }
11833
11834 #ifdef ENABLE_EXECUTE_STACK
11835 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
11836 LCT_NORMAL, VOIDmode, 1, tramp, Pmode);
11837 #endif
11838 }
11839 \f
11840 #define def_builtin(MASK, NAME, TYPE, CODE) \
11841 do { \
11842 if ((MASK) & target_flags \
11843 && (!((MASK) & MASK_64BIT) || TARGET_64BIT)) \
11844 lang_hooks.builtin_function ((NAME), (TYPE), (CODE), BUILT_IN_MD, \
11845 NULL, NULL_TREE); \
11846 } while (0)
11847
11848 struct builtin_description
11849 {
11850 const unsigned int mask;
11851 const enum insn_code icode;
11852 const char *const name;
11853 const enum ix86_builtins code;
11854 const enum rtx_code comparison;
11855 const unsigned int flag;
11856 };
11857
11858 static const struct builtin_description bdesc_comi[] =
11859 {
11860 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
11861 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
11862 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
11863 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
11864 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
11865 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
11866 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
11867 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
11868 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
11869 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
11870 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
11871 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
11872 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
11873 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
11874 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
11875 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
11876 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
11877 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
11878 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
11879 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
11880 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
11881 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
11882 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
11883 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
11884 };
11885
11886 static const struct builtin_description bdesc_2arg[] =
11887 {
11888 /* SSE */
11889 { MASK_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, 0, 0 },
11890 { MASK_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, 0, 0 },
11891 { MASK_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, 0, 0 },
11892 { MASK_SSE, CODE_FOR_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, 0, 0 },
11893 { MASK_SSE, CODE_FOR_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, 0, 0 },
11894 { MASK_SSE, CODE_FOR_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, 0, 0 },
11895 { MASK_SSE, CODE_FOR_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, 0, 0 },
11896 { MASK_SSE, CODE_FOR_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, 0, 0 },
11897
11898 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, 0 },
11899 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, 0 },
11900 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, 0 },
11901 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, 1 },
11902 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, 1 },
11903 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, 0 },
11904 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, EQ, 0 },
11905 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, LT, 0 },
11906 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, LE, 0 },
11907 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, LT, 1 },
11908 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, LE, 1 },
11909 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, UNORDERED, 0 },
11910 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, 0 },
11911 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, 0 },
11912 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, 0 },
11913 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, 0 },
11914 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, EQ, 0 },
11915 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, LT, 0 },
11916 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, LE, 0 },
11917 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, UNORDERED, 0 },
11918
11919 { MASK_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, 0, 0 },
11920 { MASK_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, 0, 0 },
11921 { MASK_SSE, CODE_FOR_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, 0, 0 },
11922 { MASK_SSE, CODE_FOR_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, 0, 0 },
11923
11924 { MASK_SSE, CODE_FOR_sse_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, 0, 0 },
11925 { MASK_SSE, CODE_FOR_sse_nandv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, 0, 0 },
11926 { MASK_SSE, CODE_FOR_sse_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, 0, 0 },
11927 { MASK_SSE, CODE_FOR_sse_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, 0, 0 },
11928
11929 { MASK_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, 0, 0 },
11930 { MASK_SSE, CODE_FOR_sse_movhlps, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, 0, 0 },
11931 { MASK_SSE, CODE_FOR_sse_movlhps, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, 0, 0 },
11932 { MASK_SSE, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, 0, 0 },
11933 { MASK_SSE, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, 0, 0 },
11934
11935 /* MMX */
11936 { MASK_MMX, CODE_FOR_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, 0, 0 },
11937 { MASK_MMX, CODE_FOR_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, 0, 0 },
11938 { MASK_MMX, CODE_FOR_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, 0, 0 },
11939 { MASK_MMX, CODE_FOR_mmx_adddi3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, 0, 0 },
11940 { MASK_MMX, CODE_FOR_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, 0, 0 },
11941 { MASK_MMX, CODE_FOR_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, 0, 0 },
11942 { MASK_MMX, CODE_FOR_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, 0, 0 },
11943 { MASK_MMX, CODE_FOR_mmx_subdi3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, 0, 0 },
11944
11945 { MASK_MMX, CODE_FOR_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, 0, 0 },
11946 { MASK_MMX, CODE_FOR_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, 0, 0 },
11947 { MASK_MMX, CODE_FOR_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, 0, 0 },
11948 { MASK_MMX, CODE_FOR_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, 0, 0 },
11949 { MASK_MMX, CODE_FOR_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, 0, 0 },
11950 { MASK_MMX, CODE_FOR_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, 0, 0 },
11951 { MASK_MMX, CODE_FOR_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, 0, 0 },
11952 { MASK_MMX, CODE_FOR_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, 0, 0 },
11953
11954 { MASK_MMX, CODE_FOR_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, 0, 0 },
11955 { MASK_MMX, CODE_FOR_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, 0, 0 },
11956 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, 0, 0 },
11957
11958 { MASK_MMX, CODE_FOR_mmx_anddi3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, 0, 0 },
11959 { MASK_MMX, CODE_FOR_mmx_nanddi3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, 0, 0 },
11960 { MASK_MMX, CODE_FOR_mmx_iordi3, "__builtin_ia32_por", IX86_BUILTIN_POR, 0, 0 },
11961 { MASK_MMX, CODE_FOR_mmx_xordi3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, 0, 0 },
11962
11963 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, 0, 0 },
11964 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, 0, 0 },
11965
11966 { MASK_MMX, CODE_FOR_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, 0, 0 },
11967 { MASK_MMX, CODE_FOR_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, 0, 0 },
11968 { MASK_MMX, CODE_FOR_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, 0, 0 },
11969 { MASK_MMX, CODE_FOR_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, 0, 0 },
11970 { MASK_MMX, CODE_FOR_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, 0, 0 },
11971 { MASK_MMX, CODE_FOR_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, 0, 0 },
11972
11973 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, 0, 0 },
11974 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, 0, 0 },
11975 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, 0, 0 },
11976 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, 0, 0 },
11977
11978 { MASK_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, 0, 0 },
11979 { MASK_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, 0, 0 },
11980 { MASK_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, 0, 0 },
11981 { MASK_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, 0, 0 },
11982 { MASK_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, 0, 0 },
11983 { MASK_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, 0, 0 },
11984
11985 /* Special. */
11986 { MASK_MMX, CODE_FOR_mmx_packsswb, 0, IX86_BUILTIN_PACKSSWB, 0, 0 },
11987 { MASK_MMX, CODE_FOR_mmx_packssdw, 0, IX86_BUILTIN_PACKSSDW, 0, 0 },
11988 { MASK_MMX, CODE_FOR_mmx_packuswb, 0, IX86_BUILTIN_PACKUSWB, 0, 0 },
11989
11990 { MASK_SSE, CODE_FOR_cvtpi2ps, 0, IX86_BUILTIN_CVTPI2PS, 0, 0 },
11991 { MASK_SSE, CODE_FOR_cvtsi2ss, 0, IX86_BUILTIN_CVTSI2SS, 0, 0 },
11992 { MASK_SSE | MASK_64BIT, CODE_FOR_cvtsi2ssq, 0, IX86_BUILTIN_CVTSI642SS, 0, 0 },
11993
11994 { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLW, 0, 0 },
11995 { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLWI, 0, 0 },
11996 { MASK_MMX, CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLD, 0, 0 },
11997 { MASK_MMX, CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLDI, 0, 0 },
11998 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQ, 0, 0 },
11999 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQI, 0, 0 },
12000
12001 { MASK_MMX, CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLW, 0, 0 },
12002 { MASK_MMX, CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLWI, 0, 0 },
12003 { MASK_MMX, CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLD, 0, 0 },
12004 { MASK_MMX, CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLDI, 0, 0 },
12005 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQ, 0, 0 },
12006 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQI, 0, 0 },
12007
12008 { MASK_MMX, CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAW, 0, 0 },
12009 { MASK_MMX, CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAWI, 0, 0 },
12010 { MASK_MMX, CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRAD, 0, 0 },
12011 { MASK_MMX, CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRADI, 0, 0 },
12012
12013 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_psadbw, 0, IX86_BUILTIN_PSADBW, 0, 0 },
12014 { MASK_MMX, CODE_FOR_mmx_pmaddwd, 0, IX86_BUILTIN_PMADDWD, 0, 0 },
12015
12016 /* SSE2 */
12017 { MASK_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, 0, 0 },
12018 { MASK_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, 0, 0 },
12019 { MASK_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, 0, 0 },
12020 { MASK_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, 0, 0 },
12021 { MASK_SSE2, CODE_FOR_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, 0, 0 },
12022 { MASK_SSE2, CODE_FOR_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, 0, 0 },
12023 { MASK_SSE2, CODE_FOR_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, 0, 0 },
12024 { MASK_SSE2, CODE_FOR_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, 0, 0 },
12025
12026 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, 0 },
12027 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, 0 },
12028 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, 0 },
12029 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, 1 },
12030 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, 1 },
12031 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, 0 },
12032 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, EQ, 0 },
12033 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, LT, 0 },
12034 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, LE, 0 },
12035 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, LT, 1 },
12036 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, LE, 1 },
12037 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, UNORDERED, 0 },
12038 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, 0 },
12039 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, 0 },
12040 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, 0 },
12041 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, 0 },
12042 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, EQ, 0 },
12043 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, LT, 0 },
12044 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, LE, 0 },
12045 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, UNORDERED, 0 },
12046
12047 { MASK_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, 0, 0 },
12048 { MASK_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, 0, 0 },
12049 { MASK_SSE2, CODE_FOR_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, 0, 0 },
12050 { MASK_SSE2, CODE_FOR_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, 0, 0 },
12051
12052 { MASK_SSE2, CODE_FOR_sse2_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, 0, 0 },
12053 { MASK_SSE2, CODE_FOR_sse2_nandv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, 0, 0 },
12054 { MASK_SSE2, CODE_FOR_sse2_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, 0, 0 },
12055 { MASK_SSE2, CODE_FOR_sse2_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, 0, 0 },
12056
12057 { MASK_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, 0, 0 },
12058 { MASK_SSE2, CODE_FOR_sse2_unpckhpd, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, 0, 0 },
12059 { MASK_SSE2, CODE_FOR_sse2_unpcklpd, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, 0, 0 },
12060
12061 /* SSE2 MMX */
12062 { MASK_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, 0, 0 },
12063 { MASK_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, 0, 0 },
12064 { MASK_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, 0, 0 },
12065 { MASK_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, 0, 0 },
12066 { MASK_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, 0, 0 },
12067 { MASK_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, 0, 0 },
12068 { MASK_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, 0, 0 },
12069 { MASK_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, 0, 0 },
12070
12071 { MASK_MMX, CODE_FOR_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, 0, 0 },
12072 { MASK_MMX, CODE_FOR_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, 0, 0 },
12073 { MASK_MMX, CODE_FOR_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, 0, 0 },
12074 { MASK_MMX, CODE_FOR_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, 0, 0 },
12075 { MASK_MMX, CODE_FOR_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, 0, 0 },
12076 { MASK_MMX, CODE_FOR_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, 0, 0 },
12077 { MASK_MMX, CODE_FOR_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, 0, 0 },
12078 { MASK_MMX, CODE_FOR_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, 0, 0 },
12079
12080 { MASK_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, 0, 0 },
12081 { MASK_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, 0, 0 },
12082
12083 { MASK_SSE2, CODE_FOR_sse2_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, 0, 0 },
12084 { MASK_SSE2, CODE_FOR_sse2_nandv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, 0, 0 },
12085 { MASK_SSE2, CODE_FOR_sse2_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, 0, 0 },
12086 { MASK_SSE2, CODE_FOR_sse2_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, 0, 0 },
12087
12088 { MASK_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, 0, 0 },
12089 { MASK_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, 0, 0 },
12090
12091 { MASK_SSE2, CODE_FOR_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, 0, 0 },
12092 { MASK_SSE2, CODE_FOR_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, 0, 0 },
12093 { MASK_SSE2, CODE_FOR_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, 0, 0 },
12094 { MASK_SSE2, CODE_FOR_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, 0, 0 },
12095 { MASK_SSE2, CODE_FOR_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, 0, 0 },
12096 { MASK_SSE2, CODE_FOR_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, 0, 0 },
12097
12098 { MASK_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, 0, 0 },
12099 { MASK_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, 0, 0 },
12100 { MASK_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, 0, 0 },
12101 { MASK_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, 0, 0 },
12102
12103 { MASK_SSE2, CODE_FOR_sse2_punpckhbw, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, 0, 0 },
12104 { MASK_SSE2, CODE_FOR_sse2_punpckhwd, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, 0, 0 },
12105 { MASK_SSE2, CODE_FOR_sse2_punpckhdq, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, 0, 0 },
12106 { MASK_SSE2, CODE_FOR_sse2_punpckhqdq, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, 0, 0 },
12107 { MASK_SSE2, CODE_FOR_sse2_punpcklbw, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, 0, 0 },
12108 { MASK_SSE2, CODE_FOR_sse2_punpcklwd, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, 0, 0 },
12109 { MASK_SSE2, CODE_FOR_sse2_punpckldq, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, 0, 0 },
12110 { MASK_SSE2, CODE_FOR_sse2_punpcklqdq, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, 0, 0 },
12111
12112 { MASK_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, 0, 0 },
12113 { MASK_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, 0, 0 },
12114 { MASK_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, 0, 0 },
12115
12116 { MASK_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, 0, 0 },
12117 { MASK_SSE2, CODE_FOR_sse2_psadbw, 0, IX86_BUILTIN_PSADBW128, 0, 0 },
12118
12119 { MASK_SSE2, CODE_FOR_sse2_umulsidi3, 0, IX86_BUILTIN_PMULUDQ, 0, 0 },
12120 { MASK_SSE2, CODE_FOR_sse2_umulv2siv2di3, 0, IX86_BUILTIN_PMULUDQ128, 0, 0 },
12121
12122 { MASK_SSE2, CODE_FOR_ashlv8hi3_ti, 0, IX86_BUILTIN_PSLLW128, 0, 0 },
12123 { MASK_SSE2, CODE_FOR_ashlv8hi3, 0, IX86_BUILTIN_PSLLWI128, 0, 0 },
12124 { MASK_SSE2, CODE_FOR_ashlv4si3_ti, 0, IX86_BUILTIN_PSLLD128, 0, 0 },
12125 { MASK_SSE2, CODE_FOR_ashlv4si3, 0, IX86_BUILTIN_PSLLDI128, 0, 0 },
12126 { MASK_SSE2, CODE_FOR_ashlv2di3_ti, 0, IX86_BUILTIN_PSLLQ128, 0, 0 },
12127 { MASK_SSE2, CODE_FOR_ashlv2di3, 0, IX86_BUILTIN_PSLLQI128, 0, 0 },
12128
12129 { MASK_SSE2, CODE_FOR_lshrv8hi3_ti, 0, IX86_BUILTIN_PSRLW128, 0, 0 },
12130 { MASK_SSE2, CODE_FOR_lshrv8hi3, 0, IX86_BUILTIN_PSRLWI128, 0, 0 },
12131 { MASK_SSE2, CODE_FOR_lshrv4si3_ti, 0, IX86_BUILTIN_PSRLD128, 0, 0 },
12132 { MASK_SSE2, CODE_FOR_lshrv4si3, 0, IX86_BUILTIN_PSRLDI128, 0, 0 },
12133 { MASK_SSE2, CODE_FOR_lshrv2di3_ti, 0, IX86_BUILTIN_PSRLQ128, 0, 0 },
12134 { MASK_SSE2, CODE_FOR_lshrv2di3, 0, IX86_BUILTIN_PSRLQI128, 0, 0 },
12135
12136 { MASK_SSE2, CODE_FOR_ashrv8hi3_ti, 0, IX86_BUILTIN_PSRAW128, 0, 0 },
12137 { MASK_SSE2, CODE_FOR_ashrv8hi3, 0, IX86_BUILTIN_PSRAWI128, 0, 0 },
12138 { MASK_SSE2, CODE_FOR_ashrv4si3_ti, 0, IX86_BUILTIN_PSRAD128, 0, 0 },
12139 { MASK_SSE2, CODE_FOR_ashrv4si3, 0, IX86_BUILTIN_PSRADI128, 0, 0 },
12140
12141 { MASK_SSE2, CODE_FOR_sse2_pmaddwd, 0, IX86_BUILTIN_PMADDWD128, 0, 0 },
12142
12143 { MASK_SSE2, CODE_FOR_cvtsi2sd, 0, IX86_BUILTIN_CVTSI2SD, 0, 0 },
12144 { MASK_SSE2 | MASK_64BIT, CODE_FOR_cvtsi2sdq, 0, IX86_BUILTIN_CVTSI642SD, 0, 0 },
12145 { MASK_SSE2, CODE_FOR_cvtsd2ss, 0, IX86_BUILTIN_CVTSD2SS, 0, 0 },
12146 { MASK_SSE2, CODE_FOR_cvtss2sd, 0, IX86_BUILTIN_CVTSS2SD, 0, 0 },
12147
12148 /* SSE3 MMX */
12149 { MASK_SSE3, CODE_FOR_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, 0, 0 },
12150 { MASK_SSE3, CODE_FOR_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, 0, 0 },
12151 { MASK_SSE3, CODE_FOR_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, 0, 0 },
12152 { MASK_SSE3, CODE_FOR_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, 0, 0 },
12153 { MASK_SSE3, CODE_FOR_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, 0, 0 },
12154 { MASK_SSE3, CODE_FOR_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, 0, 0 }
12155 };
12156
12157 static const struct builtin_description bdesc_1arg[] =
12158 {
12159 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB, 0, 0 },
12160 { MASK_SSE, CODE_FOR_sse_movmskps, 0, IX86_BUILTIN_MOVMSKPS, 0, 0 },
12161
12162 { MASK_SSE, CODE_FOR_sqrtv4sf2, 0, IX86_BUILTIN_SQRTPS, 0, 0 },
12163 { MASK_SSE, CODE_FOR_rsqrtv4sf2, 0, IX86_BUILTIN_RSQRTPS, 0, 0 },
12164 { MASK_SSE, CODE_FOR_rcpv4sf2, 0, IX86_BUILTIN_RCPPS, 0, 0 },
12165
12166 { MASK_SSE, CODE_FOR_cvtps2pi, 0, IX86_BUILTIN_CVTPS2PI, 0, 0 },
12167 { MASK_SSE, CODE_FOR_cvtss2si, 0, IX86_BUILTIN_CVTSS2SI, 0, 0 },
12168 { MASK_SSE | MASK_64BIT, CODE_FOR_cvtss2siq, 0, IX86_BUILTIN_CVTSS2SI64, 0, 0 },
12169 { MASK_SSE, CODE_FOR_cvttps2pi, 0, IX86_BUILTIN_CVTTPS2PI, 0, 0 },
12170 { MASK_SSE, CODE_FOR_cvttss2si, 0, IX86_BUILTIN_CVTTSS2SI, 0, 0 },
12171 { MASK_SSE | MASK_64BIT, CODE_FOR_cvttss2siq, 0, IX86_BUILTIN_CVTTSS2SI64, 0, 0 },
12172
12173 { MASK_SSE2, CODE_FOR_sse2_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB128, 0, 0 },
12174 { MASK_SSE2, CODE_FOR_sse2_movmskpd, 0, IX86_BUILTIN_MOVMSKPD, 0, 0 },
12175 { MASK_SSE2, CODE_FOR_sse2_movq2dq, 0, IX86_BUILTIN_MOVQ2DQ, 0, 0 },
12176 { MASK_SSE2, CODE_FOR_sse2_movdq2q, 0, IX86_BUILTIN_MOVDQ2Q, 0, 0 },
12177
12178 { MASK_SSE2, CODE_FOR_sqrtv2df2, 0, IX86_BUILTIN_SQRTPD, 0, 0 },
12179
12180 { MASK_SSE2, CODE_FOR_cvtdq2pd, 0, IX86_BUILTIN_CVTDQ2PD, 0, 0 },
12181 { MASK_SSE2, CODE_FOR_cvtdq2ps, 0, IX86_BUILTIN_CVTDQ2PS, 0, 0 },
12182
12183 { MASK_SSE2, CODE_FOR_cvtpd2dq, 0, IX86_BUILTIN_CVTPD2DQ, 0, 0 },
12184 { MASK_SSE2, CODE_FOR_cvtpd2pi, 0, IX86_BUILTIN_CVTPD2PI, 0, 0 },
12185 { MASK_SSE2, CODE_FOR_cvtpd2ps, 0, IX86_BUILTIN_CVTPD2PS, 0, 0 },
12186 { MASK_SSE2, CODE_FOR_cvttpd2dq, 0, IX86_BUILTIN_CVTTPD2DQ, 0, 0 },
12187 { MASK_SSE2, CODE_FOR_cvttpd2pi, 0, IX86_BUILTIN_CVTTPD2PI, 0, 0 },
12188
12189 { MASK_SSE2, CODE_FOR_cvtpi2pd, 0, IX86_BUILTIN_CVTPI2PD, 0, 0 },
12190
12191 { MASK_SSE2, CODE_FOR_cvtsd2si, 0, IX86_BUILTIN_CVTSD2SI, 0, 0 },
12192 { MASK_SSE2, CODE_FOR_cvttsd2si, 0, IX86_BUILTIN_CVTTSD2SI, 0, 0 },
12193 { MASK_SSE2 | MASK_64BIT, CODE_FOR_cvtsd2siq, 0, IX86_BUILTIN_CVTSD2SI64, 0, 0 },
12194 { MASK_SSE2 | MASK_64BIT, CODE_FOR_cvttsd2siq, 0, IX86_BUILTIN_CVTTSD2SI64, 0, 0 },
12195
12196 { MASK_SSE2, CODE_FOR_cvtps2dq, 0, IX86_BUILTIN_CVTPS2DQ, 0, 0 },
12197 { MASK_SSE2, CODE_FOR_cvtps2pd, 0, IX86_BUILTIN_CVTPS2PD, 0, 0 },
12198 { MASK_SSE2, CODE_FOR_cvttps2dq, 0, IX86_BUILTIN_CVTTPS2DQ, 0, 0 },
12199
12200 { MASK_SSE2, CODE_FOR_sse2_movq, 0, IX86_BUILTIN_MOVQ, 0, 0 },
12201
12202 /* SSE3 */
12203 { MASK_SSE3, CODE_FOR_movshdup, 0, IX86_BUILTIN_MOVSHDUP, 0, 0 },
12204 { MASK_SSE3, CODE_FOR_movsldup, 0, IX86_BUILTIN_MOVSLDUP, 0, 0 },
12205 { MASK_SSE3, CODE_FOR_movddup, 0, IX86_BUILTIN_MOVDDUP, 0, 0 }
12206 };
12207
12208 void
12209 ix86_init_builtins (void)
12210 {
12211 if (TARGET_MMX)
12212 ix86_init_mmx_sse_builtins ();
12213 }
12214
12215 /* Set up all the MMX/SSE builtins. This is not called if TARGET_MMX
12216 is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX
12217 builtins. */
12218 static void
12219 ix86_init_mmx_sse_builtins (void)
12220 {
12221 const struct builtin_description * d;
12222 size_t i;
12223
12224 tree V16QI_type_node = build_vector_type_for_mode (intQI_type_node, V16QImode);
12225 tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode);
12226 tree V2SF_type_node = build_vector_type_for_mode (float_type_node, V2SFmode);
12227 tree V2DI_type_node = build_vector_type_for_mode (intDI_type_node, V2DImode);
12228 tree V2DF_type_node = build_vector_type_for_mode (double_type_node, V2DFmode);
12229 tree V4SF_type_node = build_vector_type_for_mode (float_type_node, V4SFmode);
12230 tree V4SI_type_node = build_vector_type_for_mode (intSI_type_node, V4SImode);
12231 tree V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode);
12232 tree V8QI_type_node = build_vector_type_for_mode (intQI_type_node, V8QImode);
12233 tree V8HI_type_node = build_vector_type_for_mode (intHI_type_node, V8HImode);
12234
12235 tree pchar_type_node = build_pointer_type (char_type_node);
12236 tree pcchar_type_node = build_pointer_type (
12237 build_type_variant (char_type_node, 1, 0));
12238 tree pfloat_type_node = build_pointer_type (float_type_node);
12239 tree pcfloat_type_node = build_pointer_type (
12240 build_type_variant (float_type_node, 1, 0));
12241 tree pv2si_type_node = build_pointer_type (V2SI_type_node);
12242 tree pv2di_type_node = build_pointer_type (V2DI_type_node);
12243 tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node);
12244
12245 /* Comparisons. */
12246 tree int_ftype_v4sf_v4sf
12247 = build_function_type_list (integer_type_node,
12248 V4SF_type_node, V4SF_type_node, NULL_TREE);
12249 tree v4si_ftype_v4sf_v4sf
12250 = build_function_type_list (V4SI_type_node,
12251 V4SF_type_node, V4SF_type_node, NULL_TREE);
12252 /* MMX/SSE/integer conversions. */
12253 tree int_ftype_v4sf
12254 = build_function_type_list (integer_type_node,
12255 V4SF_type_node, NULL_TREE);
12256 tree int64_ftype_v4sf
12257 = build_function_type_list (long_long_integer_type_node,
12258 V4SF_type_node, NULL_TREE);
12259 tree int_ftype_v8qi
12260 = build_function_type_list (integer_type_node, V8QI_type_node, NULL_TREE);
12261 tree v4sf_ftype_v4sf_int
12262 = build_function_type_list (V4SF_type_node,
12263 V4SF_type_node, integer_type_node, NULL_TREE);
12264 tree v4sf_ftype_v4sf_int64
12265 = build_function_type_list (V4SF_type_node,
12266 V4SF_type_node, long_long_integer_type_node,
12267 NULL_TREE);
12268 tree v4sf_ftype_v4sf_v2si
12269 = build_function_type_list (V4SF_type_node,
12270 V4SF_type_node, V2SI_type_node, NULL_TREE);
12271 tree int_ftype_v4hi_int
12272 = build_function_type_list (integer_type_node,
12273 V4HI_type_node, integer_type_node, NULL_TREE);
12274 tree v4hi_ftype_v4hi_int_int
12275 = build_function_type_list (V4HI_type_node, V4HI_type_node,
12276 integer_type_node, integer_type_node,
12277 NULL_TREE);
12278 /* Miscellaneous. */
12279 tree v8qi_ftype_v4hi_v4hi
12280 = build_function_type_list (V8QI_type_node,
12281 V4HI_type_node, V4HI_type_node, NULL_TREE);
12282 tree v4hi_ftype_v2si_v2si
12283 = build_function_type_list (V4HI_type_node,
12284 V2SI_type_node, V2SI_type_node, NULL_TREE);
12285 tree v4sf_ftype_v4sf_v4sf_int
12286 = build_function_type_list (V4SF_type_node,
12287 V4SF_type_node, V4SF_type_node,
12288 integer_type_node, NULL_TREE);
12289 tree v2si_ftype_v4hi_v4hi
12290 = build_function_type_list (V2SI_type_node,
12291 V4HI_type_node, V4HI_type_node, NULL_TREE);
12292 tree v4hi_ftype_v4hi_int
12293 = build_function_type_list (V4HI_type_node,
12294 V4HI_type_node, integer_type_node, NULL_TREE);
12295 tree v4hi_ftype_v4hi_di
12296 = build_function_type_list (V4HI_type_node,
12297 V4HI_type_node, long_long_unsigned_type_node,
12298 NULL_TREE);
12299 tree v2si_ftype_v2si_di
12300 = build_function_type_list (V2SI_type_node,
12301 V2SI_type_node, long_long_unsigned_type_node,
12302 NULL_TREE);
12303 tree void_ftype_void
12304 = build_function_type (void_type_node, void_list_node);
12305 tree void_ftype_unsigned
12306 = build_function_type_list (void_type_node, unsigned_type_node, NULL_TREE);
12307 tree void_ftype_unsigned_unsigned
12308 = build_function_type_list (void_type_node, unsigned_type_node,
12309 unsigned_type_node, NULL_TREE);
12310 tree void_ftype_pcvoid_unsigned_unsigned
12311 = build_function_type_list (void_type_node, const_ptr_type_node,
12312 unsigned_type_node, unsigned_type_node,
12313 NULL_TREE);
12314 tree unsigned_ftype_void
12315 = build_function_type (unsigned_type_node, void_list_node);
12316 tree di_ftype_void
12317 = build_function_type (long_long_unsigned_type_node, void_list_node);
12318 tree v4sf_ftype_void
12319 = build_function_type (V4SF_type_node, void_list_node);
12320 tree v2si_ftype_v4sf
12321 = build_function_type_list (V2SI_type_node, V4SF_type_node, NULL_TREE);
12322 /* Loads/stores. */
12323 tree void_ftype_v8qi_v8qi_pchar
12324 = build_function_type_list (void_type_node,
12325 V8QI_type_node, V8QI_type_node,
12326 pchar_type_node, NULL_TREE);
12327 tree v4sf_ftype_pcfloat
12328 = build_function_type_list (V4SF_type_node, pcfloat_type_node, NULL_TREE);
12329 /* @@@ the type is bogus */
12330 tree v4sf_ftype_v4sf_pv2si
12331 = build_function_type_list (V4SF_type_node,
12332 V4SF_type_node, pv2si_type_node, NULL_TREE);
12333 tree void_ftype_pv2si_v4sf
12334 = build_function_type_list (void_type_node,
12335 pv2si_type_node, V4SF_type_node, NULL_TREE);
12336 tree void_ftype_pfloat_v4sf
12337 = build_function_type_list (void_type_node,
12338 pfloat_type_node, V4SF_type_node, NULL_TREE);
12339 tree void_ftype_pdi_di
12340 = build_function_type_list (void_type_node,
12341 pdi_type_node, long_long_unsigned_type_node,
12342 NULL_TREE);
12343 tree void_ftype_pv2di_v2di
12344 = build_function_type_list (void_type_node,
12345 pv2di_type_node, V2DI_type_node, NULL_TREE);
12346 /* Normal vector unops. */
12347 tree v4sf_ftype_v4sf
12348 = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE);
12349
12350 /* Normal vector binops. */
12351 tree v4sf_ftype_v4sf_v4sf
12352 = build_function_type_list (V4SF_type_node,
12353 V4SF_type_node, V4SF_type_node, NULL_TREE);
12354 tree v8qi_ftype_v8qi_v8qi
12355 = build_function_type_list (V8QI_type_node,
12356 V8QI_type_node, V8QI_type_node, NULL_TREE);
12357 tree v4hi_ftype_v4hi_v4hi
12358 = build_function_type_list (V4HI_type_node,
12359 V4HI_type_node, V4HI_type_node, NULL_TREE);
12360 tree v2si_ftype_v2si_v2si
12361 = build_function_type_list (V2SI_type_node,
12362 V2SI_type_node, V2SI_type_node, NULL_TREE);
12363 tree di_ftype_di_di
12364 = build_function_type_list (long_long_unsigned_type_node,
12365 long_long_unsigned_type_node,
12366 long_long_unsigned_type_node, NULL_TREE);
12367
12368 tree v2si_ftype_v2sf
12369 = build_function_type_list (V2SI_type_node, V2SF_type_node, NULL_TREE);
12370 tree v2sf_ftype_v2si
12371 = build_function_type_list (V2SF_type_node, V2SI_type_node, NULL_TREE);
12372 tree v2si_ftype_v2si
12373 = build_function_type_list (V2SI_type_node, V2SI_type_node, NULL_TREE);
12374 tree v2sf_ftype_v2sf
12375 = build_function_type_list (V2SF_type_node, V2SF_type_node, NULL_TREE);
12376 tree v2sf_ftype_v2sf_v2sf
12377 = build_function_type_list (V2SF_type_node,
12378 V2SF_type_node, V2SF_type_node, NULL_TREE);
12379 tree v2si_ftype_v2sf_v2sf
12380 = build_function_type_list (V2SI_type_node,
12381 V2SF_type_node, V2SF_type_node, NULL_TREE);
12382 tree pint_type_node = build_pointer_type (integer_type_node);
12383 tree pcint_type_node = build_pointer_type (
12384 build_type_variant (integer_type_node, 1, 0));
12385 tree pdouble_type_node = build_pointer_type (double_type_node);
12386 tree pcdouble_type_node = build_pointer_type (
12387 build_type_variant (double_type_node, 1, 0));
12388 tree int_ftype_v2df_v2df
12389 = build_function_type_list (integer_type_node,
12390 V2DF_type_node, V2DF_type_node, NULL_TREE);
12391
12392 tree ti_ftype_void
12393 = build_function_type (intTI_type_node, void_list_node);
12394 tree v2di_ftype_void
12395 = build_function_type (V2DI_type_node, void_list_node);
12396 tree ti_ftype_ti_ti
12397 = build_function_type_list (intTI_type_node,
12398 intTI_type_node, intTI_type_node, NULL_TREE);
12399 tree void_ftype_pcvoid
12400 = build_function_type_list (void_type_node, const_ptr_type_node, NULL_TREE);
12401 tree v2di_ftype_di
12402 = build_function_type_list (V2DI_type_node,
12403 long_long_unsigned_type_node, NULL_TREE);
12404 tree di_ftype_v2di
12405 = build_function_type_list (long_long_unsigned_type_node,
12406 V2DI_type_node, NULL_TREE);
12407 tree v4sf_ftype_v4si
12408 = build_function_type_list (V4SF_type_node, V4SI_type_node, NULL_TREE);
12409 tree v4si_ftype_v4sf
12410 = build_function_type_list (V4SI_type_node, V4SF_type_node, NULL_TREE);
12411 tree v2df_ftype_v4si
12412 = build_function_type_list (V2DF_type_node, V4SI_type_node, NULL_TREE);
12413 tree v4si_ftype_v2df
12414 = build_function_type_list (V4SI_type_node, V2DF_type_node, NULL_TREE);
12415 tree v2si_ftype_v2df
12416 = build_function_type_list (V2SI_type_node, V2DF_type_node, NULL_TREE);
12417 tree v4sf_ftype_v2df
12418 = build_function_type_list (V4SF_type_node, V2DF_type_node, NULL_TREE);
12419 tree v2df_ftype_v2si
12420 = build_function_type_list (V2DF_type_node, V2SI_type_node, NULL_TREE);
12421 tree v2df_ftype_v4sf
12422 = build_function_type_list (V2DF_type_node, V4SF_type_node, NULL_TREE);
12423 tree int_ftype_v2df
12424 = build_function_type_list (integer_type_node, V2DF_type_node, NULL_TREE);
12425 tree int64_ftype_v2df
12426 = build_function_type_list (long_long_integer_type_node,
12427 V2DF_type_node, NULL_TREE);
12428 tree v2df_ftype_v2df_int
12429 = build_function_type_list (V2DF_type_node,
12430 V2DF_type_node, integer_type_node, NULL_TREE);
12431 tree v2df_ftype_v2df_int64
12432 = build_function_type_list (V2DF_type_node,
12433 V2DF_type_node, long_long_integer_type_node,
12434 NULL_TREE);
12435 tree v4sf_ftype_v4sf_v2df
12436 = build_function_type_list (V4SF_type_node,
12437 V4SF_type_node, V2DF_type_node, NULL_TREE);
12438 tree v2df_ftype_v2df_v4sf
12439 = build_function_type_list (V2DF_type_node,
12440 V2DF_type_node, V4SF_type_node, NULL_TREE);
12441 tree v2df_ftype_v2df_v2df_int
12442 = build_function_type_list (V2DF_type_node,
12443 V2DF_type_node, V2DF_type_node,
12444 integer_type_node,
12445 NULL_TREE);
12446 tree v2df_ftype_v2df_pv2si
12447 = build_function_type_list (V2DF_type_node,
12448 V2DF_type_node, pv2si_type_node, NULL_TREE);
12449 tree void_ftype_pv2si_v2df
12450 = build_function_type_list (void_type_node,
12451 pv2si_type_node, V2DF_type_node, NULL_TREE);
12452 tree void_ftype_pdouble_v2df
12453 = build_function_type_list (void_type_node,
12454 pdouble_type_node, V2DF_type_node, NULL_TREE);
12455 tree void_ftype_pint_int
12456 = build_function_type_list (void_type_node,
12457 pint_type_node, integer_type_node, NULL_TREE);
12458 tree void_ftype_v16qi_v16qi_pchar
12459 = build_function_type_list (void_type_node,
12460 V16QI_type_node, V16QI_type_node,
12461 pchar_type_node, NULL_TREE);
12462 tree v2df_ftype_pcdouble
12463 = build_function_type_list (V2DF_type_node, pcdouble_type_node, NULL_TREE);
12464 tree v2df_ftype_v2df_v2df
12465 = build_function_type_list (V2DF_type_node,
12466 V2DF_type_node, V2DF_type_node, NULL_TREE);
12467 tree v16qi_ftype_v16qi_v16qi
12468 = build_function_type_list (V16QI_type_node,
12469 V16QI_type_node, V16QI_type_node, NULL_TREE);
12470 tree v8hi_ftype_v8hi_v8hi
12471 = build_function_type_list (V8HI_type_node,
12472 V8HI_type_node, V8HI_type_node, NULL_TREE);
12473 tree v4si_ftype_v4si_v4si
12474 = build_function_type_list (V4SI_type_node,
12475 V4SI_type_node, V4SI_type_node, NULL_TREE);
12476 tree v2di_ftype_v2di_v2di
12477 = build_function_type_list (V2DI_type_node,
12478 V2DI_type_node, V2DI_type_node, NULL_TREE);
12479 tree v2di_ftype_v2df_v2df
12480 = build_function_type_list (V2DI_type_node,
12481 V2DF_type_node, V2DF_type_node, NULL_TREE);
12482 tree v2df_ftype_v2df
12483 = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE);
12484 tree v2df_ftype_double
12485 = build_function_type_list (V2DF_type_node, double_type_node, NULL_TREE);
12486 tree v2df_ftype_double_double
12487 = build_function_type_list (V2DF_type_node,
12488 double_type_node, double_type_node, NULL_TREE);
12489 tree int_ftype_v8hi_int
12490 = build_function_type_list (integer_type_node,
12491 V8HI_type_node, integer_type_node, NULL_TREE);
12492 tree v8hi_ftype_v8hi_int_int
12493 = build_function_type_list (V8HI_type_node,
12494 V8HI_type_node, integer_type_node,
12495 integer_type_node, NULL_TREE);
12496 tree v2di_ftype_v2di_int
12497 = build_function_type_list (V2DI_type_node,
12498 V2DI_type_node, integer_type_node, NULL_TREE);
12499 tree v4si_ftype_v4si_int
12500 = build_function_type_list (V4SI_type_node,
12501 V4SI_type_node, integer_type_node, NULL_TREE);
12502 tree v8hi_ftype_v8hi_int
12503 = build_function_type_list (V8HI_type_node,
12504 V8HI_type_node, integer_type_node, NULL_TREE);
12505 tree v8hi_ftype_v8hi_v2di
12506 = build_function_type_list (V8HI_type_node,
12507 V8HI_type_node, V2DI_type_node, NULL_TREE);
12508 tree v4si_ftype_v4si_v2di
12509 = build_function_type_list (V4SI_type_node,
12510 V4SI_type_node, V2DI_type_node, NULL_TREE);
12511 tree v4si_ftype_v8hi_v8hi
12512 = build_function_type_list (V4SI_type_node,
12513 V8HI_type_node, V8HI_type_node, NULL_TREE);
12514 tree di_ftype_v8qi_v8qi
12515 = build_function_type_list (long_long_unsigned_type_node,
12516 V8QI_type_node, V8QI_type_node, NULL_TREE);
12517 tree di_ftype_v2si_v2si
12518 = build_function_type_list (long_long_unsigned_type_node,
12519 V2SI_type_node, V2SI_type_node, NULL_TREE);
12520 tree v2di_ftype_v16qi_v16qi
12521 = build_function_type_list (V2DI_type_node,
12522 V16QI_type_node, V16QI_type_node, NULL_TREE);
12523 tree v2di_ftype_v4si_v4si
12524 = build_function_type_list (V2DI_type_node,
12525 V4SI_type_node, V4SI_type_node, NULL_TREE);
12526 tree int_ftype_v16qi
12527 = build_function_type_list (integer_type_node, V16QI_type_node, NULL_TREE);
12528 tree v16qi_ftype_pcchar
12529 = build_function_type_list (V16QI_type_node, pcchar_type_node, NULL_TREE);
12530 tree void_ftype_pchar_v16qi
12531 = build_function_type_list (void_type_node,
12532 pchar_type_node, V16QI_type_node, NULL_TREE);
12533 tree v4si_ftype_pcint
12534 = build_function_type_list (V4SI_type_node, pcint_type_node, NULL_TREE);
12535 tree void_ftype_pcint_v4si
12536 = build_function_type_list (void_type_node,
12537 pcint_type_node, V4SI_type_node, NULL_TREE);
12538 tree v2di_ftype_v2di
12539 = build_function_type_list (V2DI_type_node, V2DI_type_node, NULL_TREE);
12540
12541 tree float80_type;
12542 tree float128_type;
12543
12544 /* The __float80 type. */
12545 if (TYPE_MODE (long_double_type_node) == XFmode)
12546 (*lang_hooks.types.register_builtin_type) (long_double_type_node,
12547 "__float80");
12548 else
12549 {
12550 /* The __float80 type. */
12551 float80_type = make_node (REAL_TYPE);
12552 TYPE_PRECISION (float80_type) = 80;
12553 layout_type (float80_type);
12554 (*lang_hooks.types.register_builtin_type) (float80_type, "__float80");
12555 }
12556
12557 float128_type = make_node (REAL_TYPE);
12558 TYPE_PRECISION (float128_type) = 128;
12559 layout_type (float128_type);
12560 (*lang_hooks.types.register_builtin_type) (float128_type, "__float128");
12561
12562 /* Add all builtins that are more or less simple operations on two
12563 operands. */
12564 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
12565 {
12566 /* Use one of the operands; the target can have a different mode for
12567 mask-generating compares. */
12568 enum machine_mode mode;
12569 tree type;
12570
12571 if (d->name == 0)
12572 continue;
12573 mode = insn_data[d->icode].operand[1].mode;
12574
12575 switch (mode)
12576 {
12577 case V16QImode:
12578 type = v16qi_ftype_v16qi_v16qi;
12579 break;
12580 case V8HImode:
12581 type = v8hi_ftype_v8hi_v8hi;
12582 break;
12583 case V4SImode:
12584 type = v4si_ftype_v4si_v4si;
12585 break;
12586 case V2DImode:
12587 type = v2di_ftype_v2di_v2di;
12588 break;
12589 case V2DFmode:
12590 type = v2df_ftype_v2df_v2df;
12591 break;
12592 case TImode:
12593 type = ti_ftype_ti_ti;
12594 break;
12595 case V4SFmode:
12596 type = v4sf_ftype_v4sf_v4sf;
12597 break;
12598 case V8QImode:
12599 type = v8qi_ftype_v8qi_v8qi;
12600 break;
12601 case V4HImode:
12602 type = v4hi_ftype_v4hi_v4hi;
12603 break;
12604 case V2SImode:
12605 type = v2si_ftype_v2si_v2si;
12606 break;
12607 case DImode:
12608 type = di_ftype_di_di;
12609 break;
12610
12611 default:
12612 abort ();
12613 }
12614
12615 /* Override for comparisons. */
12616 if (d->icode == CODE_FOR_maskcmpv4sf3
12617 || d->icode == CODE_FOR_maskncmpv4sf3
12618 || d->icode == CODE_FOR_vmmaskcmpv4sf3
12619 || d->icode == CODE_FOR_vmmaskncmpv4sf3)
12620 type = v4si_ftype_v4sf_v4sf;
12621
12622 if (d->icode == CODE_FOR_maskcmpv2df3
12623 || d->icode == CODE_FOR_maskncmpv2df3
12624 || d->icode == CODE_FOR_vmmaskcmpv2df3
12625 || d->icode == CODE_FOR_vmmaskncmpv2df3)
12626 type = v2di_ftype_v2df_v2df;
12627
12628 def_builtin (d->mask, d->name, type, d->code);
12629 }
12630
12631 /* Add the remaining MMX insns with somewhat more complicated types. */
12632 def_builtin (MASK_MMX, "__builtin_ia32_mmx_zero", di_ftype_void, IX86_BUILTIN_MMX_ZERO);
12633 def_builtin (MASK_MMX, "__builtin_ia32_emms", void_ftype_void, IX86_BUILTIN_EMMS);
12634 def_builtin (MASK_MMX, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSLLW);
12635 def_builtin (MASK_MMX, "__builtin_ia32_pslld", v2si_ftype_v2si_di, IX86_BUILTIN_PSLLD);
12636 def_builtin (MASK_MMX, "__builtin_ia32_psllq", di_ftype_di_di, IX86_BUILTIN_PSLLQ);
12637
12638 def_builtin (MASK_MMX, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRLW);
12639 def_builtin (MASK_MMX, "__builtin_ia32_psrld", v2si_ftype_v2si_di, IX86_BUILTIN_PSRLD);
12640 def_builtin (MASK_MMX, "__builtin_ia32_psrlq", di_ftype_di_di, IX86_BUILTIN_PSRLQ);
12641
12642 def_builtin (MASK_MMX, "__builtin_ia32_psraw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRAW);
12643 def_builtin (MASK_MMX, "__builtin_ia32_psrad", v2si_ftype_v2si_di, IX86_BUILTIN_PSRAD);
12644
12645 def_builtin (MASK_MMX, "__builtin_ia32_pshufw", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSHUFW);
12646 def_builtin (MASK_MMX, "__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi, IX86_BUILTIN_PMADDWD);
12647
12648 /* comi/ucomi insns. */
12649 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
12650 if (d->mask == MASK_SSE2)
12651 def_builtin (d->mask, d->name, int_ftype_v2df_v2df, d->code);
12652 else
12653 def_builtin (d->mask, d->name, int_ftype_v4sf_v4sf, d->code);
12654
12655 def_builtin (MASK_MMX, "__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKSSWB);
12656 def_builtin (MASK_MMX, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si, IX86_BUILTIN_PACKSSDW);
12657 def_builtin (MASK_MMX, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKUSWB);
12658
12659 def_builtin (MASK_SSE, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
12660 def_builtin (MASK_SSE, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
12661 def_builtin (MASK_SSE, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si, IX86_BUILTIN_CVTPI2PS);
12662 def_builtin (MASK_SSE, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTPS2PI);
12663 def_builtin (MASK_SSE, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int, IX86_BUILTIN_CVTSI2SS);
12664 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvtsi642ss", v4sf_ftype_v4sf_int64, IX86_BUILTIN_CVTSI642SS);
12665 def_builtin (MASK_SSE, "__builtin_ia32_cvtss2si", int_ftype_v4sf, IX86_BUILTIN_CVTSS2SI);
12666 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvtss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTSS2SI64);
12667 def_builtin (MASK_SSE, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTTPS2PI);
12668 def_builtin (MASK_SSE, "__builtin_ia32_cvttss2si", int_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI);
12669 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvttss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI64);
12670
12671 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pextrw", int_ftype_v4hi_int, IX86_BUILTIN_PEXTRW);
12672 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pinsrw", v4hi_ftype_v4hi_int_int, IX86_BUILTIN_PINSRW);
12673
12674 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
12675
12676 def_builtin (MASK_SSE, "__builtin_ia32_loadaps", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADAPS);
12677 def_builtin (MASK_SSE, "__builtin_ia32_loadups", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADUPS);
12678 def_builtin (MASK_SSE, "__builtin_ia32_loadss", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADSS);
12679 def_builtin (MASK_SSE, "__builtin_ia32_storeaps", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREAPS);
12680 def_builtin (MASK_SSE, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREUPS);
12681 def_builtin (MASK_SSE, "__builtin_ia32_storess", void_ftype_pfloat_v4sf, IX86_BUILTIN_STORESS);
12682
12683 def_builtin (MASK_SSE, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADHPS);
12684 def_builtin (MASK_SSE, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADLPS);
12685 def_builtin (MASK_SSE, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STOREHPS);
12686 def_builtin (MASK_SSE, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STORELPS);
12687
12688 def_builtin (MASK_SSE, "__builtin_ia32_movmskps", int_ftype_v4sf, IX86_BUILTIN_MOVMSKPS);
12689 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pmovmskb", int_ftype_v8qi, IX86_BUILTIN_PMOVMSKB);
12690 def_builtin (MASK_SSE, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTPS);
12691 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_movntq", void_ftype_pdi_di, IX86_BUILTIN_MOVNTQ);
12692
12693 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_sfence", void_ftype_void, IX86_BUILTIN_SFENCE);
12694
12695 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_psadbw", di_ftype_v8qi_v8qi, IX86_BUILTIN_PSADBW);
12696
12697 def_builtin (MASK_SSE, "__builtin_ia32_rcpps", v4sf_ftype_v4sf, IX86_BUILTIN_RCPPS);
12698 def_builtin (MASK_SSE, "__builtin_ia32_rcpss", v4sf_ftype_v4sf, IX86_BUILTIN_RCPSS);
12699 def_builtin (MASK_SSE, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTPS);
12700 def_builtin (MASK_SSE, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTSS);
12701 def_builtin (MASK_SSE, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTPS);
12702 def_builtin (MASK_SSE, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTSS);
12703
12704 def_builtin (MASK_SSE, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_SHUFPS);
12705
12706 /* Original 3DNow! */
12707 def_builtin (MASK_3DNOW, "__builtin_ia32_femms", void_ftype_void, IX86_BUILTIN_FEMMS);
12708 def_builtin (MASK_3DNOW, "__builtin_ia32_pavgusb", v8qi_ftype_v8qi_v8qi, IX86_BUILTIN_PAVGUSB);
12709 def_builtin (MASK_3DNOW, "__builtin_ia32_pf2id", v2si_ftype_v2sf, IX86_BUILTIN_PF2ID);
12710 def_builtin (MASK_3DNOW, "__builtin_ia32_pfacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFACC);
12711 def_builtin (MASK_3DNOW, "__builtin_ia32_pfadd", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFADD);
12712 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpeq", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPEQ);
12713 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpge", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGE);
12714 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpgt", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGT);
12715 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmax", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMAX);
12716 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmin", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMIN);
12717 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmul", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMUL);
12718 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcp", v2sf_ftype_v2sf, IX86_BUILTIN_PFRCP);
12719 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT1);
12720 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit2", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT2);
12721 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqrt", v2sf_ftype_v2sf, IX86_BUILTIN_PFRSQRT);
12722 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRSQIT1);
12723 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsub", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUB);
12724 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUBR);
12725 def_builtin (MASK_3DNOW, "__builtin_ia32_pi2fd", v2sf_ftype_v2si, IX86_BUILTIN_PI2FD);
12726 def_builtin (MASK_3DNOW, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi, IX86_BUILTIN_PMULHRW);
12727
12728 /* 3DNow! extension as used in the Athlon CPU. */
12729 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pf2iw", v2si_ftype_v2sf, IX86_BUILTIN_PF2IW);
12730 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFNACC);
12731 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfpnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFPNACC);
12732 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pi2fw", v2sf_ftype_v2si, IX86_BUILTIN_PI2FW);
12733 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf, IX86_BUILTIN_PSWAPDSF);
12734 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsi", v2si_ftype_v2si, IX86_BUILTIN_PSWAPDSI);
12735
12736 def_builtin (MASK_SSE, "__builtin_ia32_setzerops", v4sf_ftype_void, IX86_BUILTIN_SSE_ZERO);
12737
12738 /* SSE2 */
12739 def_builtin (MASK_SSE2, "__builtin_ia32_pextrw128", int_ftype_v8hi_int, IX86_BUILTIN_PEXTRW128);
12740 def_builtin (MASK_SSE2, "__builtin_ia32_pinsrw128", v8hi_ftype_v8hi_int_int, IX86_BUILTIN_PINSRW128);
12741
12742 def_builtin (MASK_SSE2, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar, IX86_BUILTIN_MASKMOVDQU);
12743 def_builtin (MASK_SSE2, "__builtin_ia32_movq2dq", v2di_ftype_di, IX86_BUILTIN_MOVQ2DQ);
12744 def_builtin (MASK_SSE2, "__builtin_ia32_movdq2q", di_ftype_v2di, IX86_BUILTIN_MOVDQ2Q);
12745
12746 def_builtin (MASK_SSE2, "__builtin_ia32_loadapd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADAPD);
12747 def_builtin (MASK_SSE2, "__builtin_ia32_loadupd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADUPD);
12748 def_builtin (MASK_SSE2, "__builtin_ia32_loadsd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADSD);
12749 def_builtin (MASK_SSE2, "__builtin_ia32_storeapd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREAPD);
12750 def_builtin (MASK_SSE2, "__builtin_ia32_storeupd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREUPD);
12751 def_builtin (MASK_SSE2, "__builtin_ia32_storesd", void_ftype_pdouble_v2df, IX86_BUILTIN_STORESD);
12752
12753 def_builtin (MASK_SSE2, "__builtin_ia32_loadhpd", v2df_ftype_v2df_pv2si, IX86_BUILTIN_LOADHPD);
12754 def_builtin (MASK_SSE2, "__builtin_ia32_loadlpd", v2df_ftype_v2df_pv2si, IX86_BUILTIN_LOADLPD);
12755 def_builtin (MASK_SSE2, "__builtin_ia32_storehpd", void_ftype_pv2si_v2df, IX86_BUILTIN_STOREHPD);
12756 def_builtin (MASK_SSE2, "__builtin_ia32_storelpd", void_ftype_pv2si_v2df, IX86_BUILTIN_STORELPD);
12757
12758 def_builtin (MASK_SSE2, "__builtin_ia32_movmskpd", int_ftype_v2df, IX86_BUILTIN_MOVMSKPD);
12759 def_builtin (MASK_SSE2, "__builtin_ia32_pmovmskb128", int_ftype_v16qi, IX86_BUILTIN_PMOVMSKB128);
12760 def_builtin (MASK_SSE2, "__builtin_ia32_movnti", void_ftype_pint_int, IX86_BUILTIN_MOVNTI);
12761 def_builtin (MASK_SSE2, "__builtin_ia32_movntpd", void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTPD);
12762 def_builtin (MASK_SSE2, "__builtin_ia32_movntdq", void_ftype_pv2di_v2di, IX86_BUILTIN_MOVNTDQ);
12763
12764 def_builtin (MASK_SSE2, "__builtin_ia32_pshufd", v4si_ftype_v4si_int, IX86_BUILTIN_PSHUFD);
12765 def_builtin (MASK_SSE2, "__builtin_ia32_pshuflw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFLW);
12766 def_builtin (MASK_SSE2, "__builtin_ia32_pshufhw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFHW);
12767 def_builtin (MASK_SSE2, "__builtin_ia32_psadbw128", v2di_ftype_v16qi_v16qi, IX86_BUILTIN_PSADBW128);
12768
12769 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtpd", v2df_ftype_v2df, IX86_BUILTIN_SQRTPD);
12770 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtsd", v2df_ftype_v2df, IX86_BUILTIN_SQRTSD);
12771
12772 def_builtin (MASK_SSE2, "__builtin_ia32_shufpd", v2df_ftype_v2df_v2df_int, IX86_BUILTIN_SHUFPD);
12773
12774 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2pd", v2df_ftype_v4si, IX86_BUILTIN_CVTDQ2PD);
12775 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2ps", v4sf_ftype_v4si, IX86_BUILTIN_CVTDQ2PS);
12776
12777 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTPD2DQ);
12778 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTPD2PI);
12779 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2ps", v4sf_ftype_v2df, IX86_BUILTIN_CVTPD2PS);
12780 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTTPD2DQ);
12781 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTTPD2PI);
12782
12783 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpi2pd", v2df_ftype_v2si, IX86_BUILTIN_CVTPI2PD);
12784
12785 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2si", int_ftype_v2df, IX86_BUILTIN_CVTSD2SI);
12786 def_builtin (MASK_SSE2, "__builtin_ia32_cvttsd2si", int_ftype_v2df, IX86_BUILTIN_CVTTSD2SI);
12787 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvtsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTSD2SI64);
12788 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvttsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTTSD2SI64);
12789
12790 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTPS2DQ);
12791 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2pd", v2df_ftype_v4sf, IX86_BUILTIN_CVTPS2PD);
12792 def_builtin (MASK_SSE2, "__builtin_ia32_cvttps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTTPS2DQ);
12793
12794 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsi2sd", v2df_ftype_v2df_int, IX86_BUILTIN_CVTSI2SD);
12795 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvtsi642sd", v2df_ftype_v2df_int64, IX86_BUILTIN_CVTSI642SD);
12796 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2ss", v4sf_ftype_v4sf_v2df, IX86_BUILTIN_CVTSD2SS);
12797 def_builtin (MASK_SSE2, "__builtin_ia32_cvtss2sd", v2df_ftype_v2df_v4sf, IX86_BUILTIN_CVTSS2SD);
12798
12799 def_builtin (MASK_SSE2, "__builtin_ia32_setpd1", v2df_ftype_double, IX86_BUILTIN_SETPD1);
12800 def_builtin (MASK_SSE2, "__builtin_ia32_setpd", v2df_ftype_double_double, IX86_BUILTIN_SETPD);
12801 def_builtin (MASK_SSE2, "__builtin_ia32_setzeropd", ti_ftype_void, IX86_BUILTIN_CLRPD);
12802 def_builtin (MASK_SSE2, "__builtin_ia32_loadpd1", v2df_ftype_pcdouble, IX86_BUILTIN_LOADPD1);
12803 def_builtin (MASK_SSE2, "__builtin_ia32_loadrpd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADRPD);
12804 def_builtin (MASK_SSE2, "__builtin_ia32_storepd1", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREPD1);
12805 def_builtin (MASK_SSE2, "__builtin_ia32_storerpd", void_ftype_pdouble_v2df, IX86_BUILTIN_STORERPD);
12806
12807 def_builtin (MASK_SSE2, "__builtin_ia32_clflush", void_ftype_pcvoid, IX86_BUILTIN_CLFLUSH);
12808 def_builtin (MASK_SSE2, "__builtin_ia32_lfence", void_ftype_void, IX86_BUILTIN_LFENCE);
12809 def_builtin (MASK_SSE2, "__builtin_ia32_mfence", void_ftype_void, IX86_BUILTIN_MFENCE);
12810
12811 def_builtin (MASK_SSE2, "__builtin_ia32_loaddqa", v16qi_ftype_pcchar, IX86_BUILTIN_LOADDQA);
12812 def_builtin (MASK_SSE2, "__builtin_ia32_loaddqu", v16qi_ftype_pcchar, IX86_BUILTIN_LOADDQU);
12813 def_builtin (MASK_SSE2, "__builtin_ia32_loadd", v4si_ftype_pcint, IX86_BUILTIN_LOADD);
12814 def_builtin (MASK_SSE2, "__builtin_ia32_storedqa", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQA);
12815 def_builtin (MASK_SSE2, "__builtin_ia32_storedqu", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQU);
12816 def_builtin (MASK_SSE2, "__builtin_ia32_stored", void_ftype_pcint_v4si, IX86_BUILTIN_STORED);
12817 def_builtin (MASK_SSE2, "__builtin_ia32_movq", v2di_ftype_v2di, IX86_BUILTIN_MOVQ);
12818
12819 def_builtin (MASK_SSE, "__builtin_ia32_setzero128", v2di_ftype_void, IX86_BUILTIN_CLRTI);
12820
12821 def_builtin (MASK_SSE2, "__builtin_ia32_pmuludq", di_ftype_v2si_v2si, IX86_BUILTIN_PMULUDQ);
12822 def_builtin (MASK_SSE2, "__builtin_ia32_pmuludq128", v2di_ftype_v4si_v4si, IX86_BUILTIN_PMULUDQ128);
12823
12824 def_builtin (MASK_SSE2, "__builtin_ia32_psllw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSLLW128);
12825 def_builtin (MASK_SSE2, "__builtin_ia32_pslld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSLLD128);
12826 def_builtin (MASK_SSE2, "__builtin_ia32_psllq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSLLQ128);
12827
12828 def_builtin (MASK_SSE2, "__builtin_ia32_psrlw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRLW128);
12829 def_builtin (MASK_SSE2, "__builtin_ia32_psrld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRLD128);
12830 def_builtin (MASK_SSE2, "__builtin_ia32_psrlq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSRLQ128);
12831
12832 def_builtin (MASK_SSE2, "__builtin_ia32_psraw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRAW128);
12833 def_builtin (MASK_SSE2, "__builtin_ia32_psrad128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRAD128);
12834
12835 def_builtin (MASK_SSE2, "__builtin_ia32_pslldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLDQI128);
12836 def_builtin (MASK_SSE2, "__builtin_ia32_psllwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSLLWI128);
12837 def_builtin (MASK_SSE2, "__builtin_ia32_pslldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSLLDI128);
12838 def_builtin (MASK_SSE2, "__builtin_ia32_psllqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLQI128);
12839
12840 def_builtin (MASK_SSE2, "__builtin_ia32_psrldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLDQI128);
12841 def_builtin (MASK_SSE2, "__builtin_ia32_psrlwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRLWI128);
12842 def_builtin (MASK_SSE2, "__builtin_ia32_psrldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRLDI128);
12843 def_builtin (MASK_SSE2, "__builtin_ia32_psrlqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLQI128);
12844
12845 def_builtin (MASK_SSE2, "__builtin_ia32_psrawi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRAWI128);
12846 def_builtin (MASK_SSE2, "__builtin_ia32_psradi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRADI128);
12847
12848 def_builtin (MASK_SSE2, "__builtin_ia32_pmaddwd128", v4si_ftype_v8hi_v8hi, IX86_BUILTIN_PMADDWD128);
12849
12850 /* Prescott New Instructions. */
12851 def_builtin (MASK_SSE3, "__builtin_ia32_monitor",
12852 void_ftype_pcvoid_unsigned_unsigned,
12853 IX86_BUILTIN_MONITOR);
12854 def_builtin (MASK_SSE3, "__builtin_ia32_mwait",
12855 void_ftype_unsigned_unsigned,
12856 IX86_BUILTIN_MWAIT);
12857 def_builtin (MASK_SSE3, "__builtin_ia32_movshdup",
12858 v4sf_ftype_v4sf,
12859 IX86_BUILTIN_MOVSHDUP);
12860 def_builtin (MASK_SSE3, "__builtin_ia32_movsldup",
12861 v4sf_ftype_v4sf,
12862 IX86_BUILTIN_MOVSLDUP);
12863 def_builtin (MASK_SSE3, "__builtin_ia32_lddqu",
12864 v16qi_ftype_pcchar, IX86_BUILTIN_LDDQU);
12865 def_builtin (MASK_SSE3, "__builtin_ia32_loadddup",
12866 v2df_ftype_pcdouble, IX86_BUILTIN_LOADDDUP);
12867 def_builtin (MASK_SSE3, "__builtin_ia32_movddup",
12868 v2df_ftype_v2df, IX86_BUILTIN_MOVDDUP);
12869 }
12870
12871 /* Errors in the source file can cause expand_expr to return const0_rtx
12872 where we expect a vector. To avoid crashing, use one of the vector
12873 clear instructions. */
12874 static rtx
12875 safe_vector_operand (rtx x, enum machine_mode mode)
12876 {
12877 if (x != const0_rtx)
12878 return x;
12879 x = gen_reg_rtx (mode);
12880
12881 if (VALID_MMX_REG_MODE (mode) || VALID_MMX_REG_MODE_3DNOW (mode))
12882 emit_insn (gen_mmx_clrdi (mode == DImode ? x
12883 : gen_rtx_SUBREG (DImode, x, 0)));
12884 else
12885 emit_insn (gen_sse_clrv4sf (mode == V4SFmode ? x
12886 : gen_rtx_SUBREG (V4SFmode, x, 0),
12887 CONST0_RTX (V4SFmode)));
12888 return x;
12889 }
12890
12891 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
12892
12893 static rtx
12894 ix86_expand_binop_builtin (enum insn_code icode, tree arglist, rtx target)
12895 {
12896 rtx pat;
12897 tree arg0 = TREE_VALUE (arglist);
12898 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12899 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12900 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12901 enum machine_mode tmode = insn_data[icode].operand[0].mode;
12902 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
12903 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
12904
12905 if (VECTOR_MODE_P (mode0))
12906 op0 = safe_vector_operand (op0, mode0);
12907 if (VECTOR_MODE_P (mode1))
12908 op1 = safe_vector_operand (op1, mode1);
12909
12910 if (! target
12911 || GET_MODE (target) != tmode
12912 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12913 target = gen_reg_rtx (tmode);
12914
12915 if (GET_MODE (op1) == SImode && mode1 == TImode)
12916 {
12917 rtx x = gen_reg_rtx (V4SImode);
12918 emit_insn (gen_sse2_loadd (x, op1));
12919 op1 = gen_lowpart (TImode, x);
12920 }
12921
12922 /* In case the insn wants input operands in modes different from
12923 the result, abort. */
12924 if ((GET_MODE (op0) != mode0 && GET_MODE (op0) != VOIDmode)
12925 || (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode))
12926 abort ();
12927
12928 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
12929 op0 = copy_to_mode_reg (mode0, op0);
12930 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
12931 op1 = copy_to_mode_reg (mode1, op1);
12932
12933 /* In the commutative cases, both op0 and op1 are nonimmediate_operand,
12934 yet one of the two must not be a memory. This is normally enforced
12935 by expanders, but we didn't bother to create one here. */
12936 if (GET_CODE (op0) == MEM && GET_CODE (op1) == MEM)
12937 op0 = copy_to_mode_reg (mode0, op0);
12938
12939 pat = GEN_FCN (icode) (target, op0, op1);
12940 if (! pat)
12941 return 0;
12942 emit_insn (pat);
12943 return target;
12944 }
12945
12946 /* Subroutine of ix86_expand_builtin to take care of stores. */
12947
12948 static rtx
12949 ix86_expand_store_builtin (enum insn_code icode, tree arglist)
12950 {
12951 rtx pat;
12952 tree arg0 = TREE_VALUE (arglist);
12953 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12954 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12955 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12956 enum machine_mode mode0 = insn_data[icode].operand[0].mode;
12957 enum machine_mode mode1 = insn_data[icode].operand[1].mode;
12958
12959 if (VECTOR_MODE_P (mode1))
12960 op1 = safe_vector_operand (op1, mode1);
12961
12962 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
12963 op1 = copy_to_mode_reg (mode1, op1);
12964
12965 pat = GEN_FCN (icode) (op0, op1);
12966 if (pat)
12967 emit_insn (pat);
12968 return 0;
12969 }
12970
12971 /* Subroutine of ix86_expand_builtin to take care of unop insns. */
12972
12973 static rtx
12974 ix86_expand_unop_builtin (enum insn_code icode, tree arglist,
12975 rtx target, int do_load)
12976 {
12977 rtx pat;
12978 tree arg0 = TREE_VALUE (arglist);
12979 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12980 enum machine_mode tmode = insn_data[icode].operand[0].mode;
12981 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
12982
12983 if (! target
12984 || GET_MODE (target) != tmode
12985 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12986 target = gen_reg_rtx (tmode);
12987 if (do_load)
12988 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
12989 else
12990 {
12991 if (VECTOR_MODE_P (mode0))
12992 op0 = safe_vector_operand (op0, mode0);
12993
12994 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
12995 op0 = copy_to_mode_reg (mode0, op0);
12996 }
12997
12998 pat = GEN_FCN (icode) (target, op0);
12999 if (! pat)
13000 return 0;
13001 emit_insn (pat);
13002 return target;
13003 }
13004
13005 /* Subroutine of ix86_expand_builtin to take care of three special unop insns:
13006 sqrtss, rsqrtss, rcpss. */
13007
13008 static rtx
13009 ix86_expand_unop1_builtin (enum insn_code icode, tree arglist, rtx target)
13010 {
13011 rtx pat;
13012 tree arg0 = TREE_VALUE (arglist);
13013 rtx op1, op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13014 enum machine_mode tmode = insn_data[icode].operand[0].mode;
13015 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
13016
13017 if (! target
13018 || GET_MODE (target) != tmode
13019 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13020 target = gen_reg_rtx (tmode);
13021
13022 if (VECTOR_MODE_P (mode0))
13023 op0 = safe_vector_operand (op0, mode0);
13024
13025 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13026 op0 = copy_to_mode_reg (mode0, op0);
13027
13028 op1 = op0;
13029 if (! (*insn_data[icode].operand[2].predicate) (op1, mode0))
13030 op1 = copy_to_mode_reg (mode0, op1);
13031
13032 pat = GEN_FCN (icode) (target, op0, op1);
13033 if (! pat)
13034 return 0;
13035 emit_insn (pat);
13036 return target;
13037 }
13038
13039 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
13040
13041 static rtx
13042 ix86_expand_sse_compare (const struct builtin_description *d, tree arglist,
13043 rtx target)
13044 {
13045 rtx pat;
13046 tree arg0 = TREE_VALUE (arglist);
13047 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13048 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13049 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13050 rtx op2;
13051 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
13052 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
13053 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
13054 enum rtx_code comparison = d->comparison;
13055
13056 if (VECTOR_MODE_P (mode0))
13057 op0 = safe_vector_operand (op0, mode0);
13058 if (VECTOR_MODE_P (mode1))
13059 op1 = safe_vector_operand (op1, mode1);
13060
13061 /* Swap operands if we have a comparison that isn't available in
13062 hardware. */
13063 if (d->flag)
13064 {
13065 rtx tmp = gen_reg_rtx (mode1);
13066 emit_move_insn (tmp, op1);
13067 op1 = op0;
13068 op0 = tmp;
13069 }
13070
13071 if (! target
13072 || GET_MODE (target) != tmode
13073 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode))
13074 target = gen_reg_rtx (tmode);
13075
13076 if (! (*insn_data[d->icode].operand[1].predicate) (op0, mode0))
13077 op0 = copy_to_mode_reg (mode0, op0);
13078 if (! (*insn_data[d->icode].operand[2].predicate) (op1, mode1))
13079 op1 = copy_to_mode_reg (mode1, op1);
13080
13081 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
13082 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
13083 if (! pat)
13084 return 0;
13085 emit_insn (pat);
13086 return target;
13087 }
13088
13089 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
13090
13091 static rtx
13092 ix86_expand_sse_comi (const struct builtin_description *d, tree arglist,
13093 rtx target)
13094 {
13095 rtx pat;
13096 tree arg0 = TREE_VALUE (arglist);
13097 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13098 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13099 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13100 rtx op2;
13101 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
13102 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
13103 enum rtx_code comparison = d->comparison;
13104
13105 if (VECTOR_MODE_P (mode0))
13106 op0 = safe_vector_operand (op0, mode0);
13107 if (VECTOR_MODE_P (mode1))
13108 op1 = safe_vector_operand (op1, mode1);
13109
13110 /* Swap operands if we have a comparison that isn't available in
13111 hardware. */
13112 if (d->flag)
13113 {
13114 rtx tmp = op1;
13115 op1 = op0;
13116 op0 = tmp;
13117 }
13118
13119 target = gen_reg_rtx (SImode);
13120 emit_move_insn (target, const0_rtx);
13121 target = gen_rtx_SUBREG (QImode, target, 0);
13122
13123 if (! (*insn_data[d->icode].operand[0].predicate) (op0, mode0))
13124 op0 = copy_to_mode_reg (mode0, op0);
13125 if (! (*insn_data[d->icode].operand[1].predicate) (op1, mode1))
13126 op1 = copy_to_mode_reg (mode1, op1);
13127
13128 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
13129 pat = GEN_FCN (d->icode) (op0, op1);
13130 if (! pat)
13131 return 0;
13132 emit_insn (pat);
13133 emit_insn (gen_rtx_SET (VOIDmode,
13134 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
13135 gen_rtx_fmt_ee (comparison, QImode,
13136 SET_DEST (pat),
13137 const0_rtx)));
13138
13139 return SUBREG_REG (target);
13140 }
13141
13142 /* Expand an expression EXP that calls a built-in function,
13143 with result going to TARGET if that's convenient
13144 (and in mode MODE if that's convenient).
13145 SUBTARGET may be used as the target for computing one of EXP's operands.
13146 IGNORE is nonzero if the value is to be ignored. */
13147
13148 rtx
13149 ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
13150 enum machine_mode mode ATTRIBUTE_UNUSED,
13151 int ignore ATTRIBUTE_UNUSED)
13152 {
13153 const struct builtin_description *d;
13154 size_t i;
13155 enum insn_code icode;
13156 tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
13157 tree arglist = TREE_OPERAND (exp, 1);
13158 tree arg0, arg1, arg2;
13159 rtx op0, op1, op2, pat;
13160 enum machine_mode tmode, mode0, mode1, mode2;
13161 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
13162
13163 switch (fcode)
13164 {
13165 case IX86_BUILTIN_EMMS:
13166 emit_insn (gen_emms ());
13167 return 0;
13168
13169 case IX86_BUILTIN_SFENCE:
13170 emit_insn (gen_sfence ());
13171 return 0;
13172
13173 case IX86_BUILTIN_PEXTRW:
13174 case IX86_BUILTIN_PEXTRW128:
13175 icode = (fcode == IX86_BUILTIN_PEXTRW
13176 ? CODE_FOR_mmx_pextrw
13177 : CODE_FOR_sse2_pextrw);
13178 arg0 = TREE_VALUE (arglist);
13179 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13180 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13181 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13182 tmode = insn_data[icode].operand[0].mode;
13183 mode0 = insn_data[icode].operand[1].mode;
13184 mode1 = insn_data[icode].operand[2].mode;
13185
13186 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13187 op0 = copy_to_mode_reg (mode0, op0);
13188 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13189 {
13190 error ("selector must be an integer constant in the range 0..%i",
13191 fcode == IX86_BUILTIN_PEXTRW ? 3:7);
13192 return gen_reg_rtx (tmode);
13193 }
13194 if (target == 0
13195 || GET_MODE (target) != tmode
13196 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13197 target = gen_reg_rtx (tmode);
13198 pat = GEN_FCN (icode) (target, op0, op1);
13199 if (! pat)
13200 return 0;
13201 emit_insn (pat);
13202 return target;
13203
13204 case IX86_BUILTIN_PINSRW:
13205 case IX86_BUILTIN_PINSRW128:
13206 icode = (fcode == IX86_BUILTIN_PINSRW
13207 ? CODE_FOR_mmx_pinsrw
13208 : CODE_FOR_sse2_pinsrw);
13209 arg0 = TREE_VALUE (arglist);
13210 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13211 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
13212 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13213 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13214 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
13215 tmode = insn_data[icode].operand[0].mode;
13216 mode0 = insn_data[icode].operand[1].mode;
13217 mode1 = insn_data[icode].operand[2].mode;
13218 mode2 = insn_data[icode].operand[3].mode;
13219
13220 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13221 op0 = copy_to_mode_reg (mode0, op0);
13222 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13223 op1 = copy_to_mode_reg (mode1, op1);
13224 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
13225 {
13226 error ("selector must be an integer constant in the range 0..%i",
13227 fcode == IX86_BUILTIN_PINSRW ? 15:255);
13228 return const0_rtx;
13229 }
13230 if (target == 0
13231 || GET_MODE (target) != tmode
13232 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13233 target = gen_reg_rtx (tmode);
13234 pat = GEN_FCN (icode) (target, op0, op1, op2);
13235 if (! pat)
13236 return 0;
13237 emit_insn (pat);
13238 return target;
13239
13240 case IX86_BUILTIN_MASKMOVQ:
13241 case IX86_BUILTIN_MASKMOVDQU:
13242 icode = (fcode == IX86_BUILTIN_MASKMOVQ
13243 ? (TARGET_64BIT ? CODE_FOR_mmx_maskmovq_rex : CODE_FOR_mmx_maskmovq)
13244 : (TARGET_64BIT ? CODE_FOR_sse2_maskmovdqu_rex64
13245 : CODE_FOR_sse2_maskmovdqu));
13246 /* Note the arg order is different from the operand order. */
13247 arg1 = TREE_VALUE (arglist);
13248 arg2 = TREE_VALUE (TREE_CHAIN (arglist));
13249 arg0 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
13250 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13251 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13252 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
13253 mode0 = insn_data[icode].operand[0].mode;
13254 mode1 = insn_data[icode].operand[1].mode;
13255 mode2 = insn_data[icode].operand[2].mode;
13256
13257 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
13258 op0 = copy_to_mode_reg (mode0, op0);
13259 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
13260 op1 = copy_to_mode_reg (mode1, op1);
13261 if (! (*insn_data[icode].operand[2].predicate) (op2, mode2))
13262 op2 = copy_to_mode_reg (mode2, op2);
13263 pat = GEN_FCN (icode) (op0, op1, op2);
13264 if (! pat)
13265 return 0;
13266 emit_insn (pat);
13267 return 0;
13268
13269 case IX86_BUILTIN_SQRTSS:
13270 return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv4sf2, arglist, target);
13271 case IX86_BUILTIN_RSQRTSS:
13272 return ix86_expand_unop1_builtin (CODE_FOR_vmrsqrtv4sf2, arglist, target);
13273 case IX86_BUILTIN_RCPSS:
13274 return ix86_expand_unop1_builtin (CODE_FOR_vmrcpv4sf2, arglist, target);
13275
13276 case IX86_BUILTIN_LOADAPS:
13277 return ix86_expand_unop_builtin (CODE_FOR_sse_movaps, arglist, target, 1);
13278
13279 case IX86_BUILTIN_LOADUPS:
13280 return ix86_expand_unop_builtin (CODE_FOR_sse_movups, arglist, target, 1);
13281
13282 case IX86_BUILTIN_STOREAPS:
13283 return ix86_expand_store_builtin (CODE_FOR_sse_movaps, arglist);
13284
13285 case IX86_BUILTIN_STOREUPS:
13286 return ix86_expand_store_builtin (CODE_FOR_sse_movups, arglist);
13287
13288 case IX86_BUILTIN_LOADSS:
13289 return ix86_expand_unop_builtin (CODE_FOR_sse_loadss, arglist, target, 1);
13290
13291 case IX86_BUILTIN_STORESS:
13292 return ix86_expand_store_builtin (CODE_FOR_sse_storess, arglist);
13293
13294 case IX86_BUILTIN_LOADHPS:
13295 case IX86_BUILTIN_LOADLPS:
13296 case IX86_BUILTIN_LOADHPD:
13297 case IX86_BUILTIN_LOADLPD:
13298 icode = (fcode == IX86_BUILTIN_LOADHPS ? CODE_FOR_sse_movhps
13299 : fcode == IX86_BUILTIN_LOADLPS ? CODE_FOR_sse_movlps
13300 : fcode == IX86_BUILTIN_LOADHPD ? CODE_FOR_sse2_movhpd
13301 : CODE_FOR_sse2_movsd);
13302 arg0 = TREE_VALUE (arglist);
13303 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13304 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13305 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13306 tmode = insn_data[icode].operand[0].mode;
13307 mode0 = insn_data[icode].operand[1].mode;
13308 mode1 = insn_data[icode].operand[2].mode;
13309
13310 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13311 op0 = copy_to_mode_reg (mode0, op0);
13312 op1 = gen_rtx_MEM (mode1, copy_to_mode_reg (Pmode, op1));
13313 if (target == 0
13314 || GET_MODE (target) != tmode
13315 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13316 target = gen_reg_rtx (tmode);
13317 pat = GEN_FCN (icode) (target, op0, op1);
13318 if (! pat)
13319 return 0;
13320 emit_insn (pat);
13321 return target;
13322
13323 case IX86_BUILTIN_STOREHPS:
13324 case IX86_BUILTIN_STORELPS:
13325 case IX86_BUILTIN_STOREHPD:
13326 case IX86_BUILTIN_STORELPD:
13327 icode = (fcode == IX86_BUILTIN_STOREHPS ? CODE_FOR_sse_movhps
13328 : fcode == IX86_BUILTIN_STORELPS ? CODE_FOR_sse_movlps
13329 : fcode == IX86_BUILTIN_STOREHPD ? CODE_FOR_sse2_movhpd
13330 : CODE_FOR_sse2_movsd);
13331 arg0 = TREE_VALUE (arglist);
13332 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13333 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13334 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13335 mode0 = insn_data[icode].operand[1].mode;
13336 mode1 = insn_data[icode].operand[2].mode;
13337
13338 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
13339 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13340 op1 = copy_to_mode_reg (mode1, op1);
13341
13342 pat = GEN_FCN (icode) (op0, op0, op1);
13343 if (! pat)
13344 return 0;
13345 emit_insn (pat);
13346 return 0;
13347
13348 case IX86_BUILTIN_MOVNTPS:
13349 return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf, arglist);
13350 case IX86_BUILTIN_MOVNTQ:
13351 return ix86_expand_store_builtin (CODE_FOR_sse_movntdi, arglist);
13352
13353 case IX86_BUILTIN_LDMXCSR:
13354 op0 = expand_expr (TREE_VALUE (arglist), NULL_RTX, VOIDmode, 0);
13355 target = assign_386_stack_local (SImode, 0);
13356 emit_move_insn (target, op0);
13357 emit_insn (gen_ldmxcsr (target));
13358 return 0;
13359
13360 case IX86_BUILTIN_STMXCSR:
13361 target = assign_386_stack_local (SImode, 0);
13362 emit_insn (gen_stmxcsr (target));
13363 return copy_to_mode_reg (SImode, target);
13364
13365 case IX86_BUILTIN_SHUFPS:
13366 case IX86_BUILTIN_SHUFPD:
13367 icode = (fcode == IX86_BUILTIN_SHUFPS
13368 ? CODE_FOR_sse_shufps
13369 : CODE_FOR_sse2_shufpd);
13370 arg0 = TREE_VALUE (arglist);
13371 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13372 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
13373 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13374 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13375 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
13376 tmode = insn_data[icode].operand[0].mode;
13377 mode0 = insn_data[icode].operand[1].mode;
13378 mode1 = insn_data[icode].operand[2].mode;
13379 mode2 = insn_data[icode].operand[3].mode;
13380
13381 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13382 op0 = copy_to_mode_reg (mode0, op0);
13383 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13384 op1 = copy_to_mode_reg (mode1, op1);
13385 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
13386 {
13387 /* @@@ better error message */
13388 error ("mask must be an immediate");
13389 return gen_reg_rtx (tmode);
13390 }
13391 if (target == 0
13392 || GET_MODE (target) != tmode
13393 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13394 target = gen_reg_rtx (tmode);
13395 pat = GEN_FCN (icode) (target, op0, op1, op2);
13396 if (! pat)
13397 return 0;
13398 emit_insn (pat);
13399 return target;
13400
13401 case IX86_BUILTIN_PSHUFW:
13402 case IX86_BUILTIN_PSHUFD:
13403 case IX86_BUILTIN_PSHUFHW:
13404 case IX86_BUILTIN_PSHUFLW:
13405 icode = ( fcode == IX86_BUILTIN_PSHUFHW ? CODE_FOR_sse2_pshufhw
13406 : fcode == IX86_BUILTIN_PSHUFLW ? CODE_FOR_sse2_pshuflw
13407 : fcode == IX86_BUILTIN_PSHUFD ? CODE_FOR_sse2_pshufd
13408 : CODE_FOR_mmx_pshufw);
13409 arg0 = TREE_VALUE (arglist);
13410 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13411 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13412 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13413 tmode = insn_data[icode].operand[0].mode;
13414 mode1 = insn_data[icode].operand[1].mode;
13415 mode2 = insn_data[icode].operand[2].mode;
13416
13417 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
13418 op0 = copy_to_mode_reg (mode1, op0);
13419 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
13420 {
13421 /* @@@ better error message */
13422 error ("mask must be an immediate");
13423 return const0_rtx;
13424 }
13425 if (target == 0
13426 || GET_MODE (target) != tmode
13427 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13428 target = gen_reg_rtx (tmode);
13429 pat = GEN_FCN (icode) (target, op0, op1);
13430 if (! pat)
13431 return 0;
13432 emit_insn (pat);
13433 return target;
13434
13435 case IX86_BUILTIN_PSLLDQI128:
13436 case IX86_BUILTIN_PSRLDQI128:
13437 icode = ( fcode == IX86_BUILTIN_PSLLDQI128 ? CODE_FOR_sse2_ashlti3
13438 : CODE_FOR_sse2_lshrti3);
13439 arg0 = TREE_VALUE (arglist);
13440 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13441 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13442 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13443 tmode = insn_data[icode].operand[0].mode;
13444 mode1 = insn_data[icode].operand[1].mode;
13445 mode2 = insn_data[icode].operand[2].mode;
13446
13447 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
13448 {
13449 op0 = copy_to_reg (op0);
13450 op0 = simplify_gen_subreg (mode1, op0, GET_MODE (op0), 0);
13451 }
13452 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
13453 {
13454 error ("shift must be an immediate");
13455 return const0_rtx;
13456 }
13457 target = gen_reg_rtx (V2DImode);
13458 pat = GEN_FCN (icode) (simplify_gen_subreg (tmode, target, V2DImode, 0), op0, op1);
13459 if (! pat)
13460 return 0;
13461 emit_insn (pat);
13462 return target;
13463
13464 case IX86_BUILTIN_FEMMS:
13465 emit_insn (gen_femms ());
13466 return NULL_RTX;
13467
13468 case IX86_BUILTIN_PAVGUSB:
13469 return ix86_expand_binop_builtin (CODE_FOR_pavgusb, arglist, target);
13470
13471 case IX86_BUILTIN_PF2ID:
13472 return ix86_expand_unop_builtin (CODE_FOR_pf2id, arglist, target, 0);
13473
13474 case IX86_BUILTIN_PFACC:
13475 return ix86_expand_binop_builtin (CODE_FOR_pfacc, arglist, target);
13476
13477 case IX86_BUILTIN_PFADD:
13478 return ix86_expand_binop_builtin (CODE_FOR_addv2sf3, arglist, target);
13479
13480 case IX86_BUILTIN_PFCMPEQ:
13481 return ix86_expand_binop_builtin (CODE_FOR_eqv2sf3, arglist, target);
13482
13483 case IX86_BUILTIN_PFCMPGE:
13484 return ix86_expand_binop_builtin (CODE_FOR_gev2sf3, arglist, target);
13485
13486 case IX86_BUILTIN_PFCMPGT:
13487 return ix86_expand_binop_builtin (CODE_FOR_gtv2sf3, arglist, target);
13488
13489 case IX86_BUILTIN_PFMAX:
13490 return ix86_expand_binop_builtin (CODE_FOR_pfmaxv2sf3, arglist, target);
13491
13492 case IX86_BUILTIN_PFMIN:
13493 return ix86_expand_binop_builtin (CODE_FOR_pfminv2sf3, arglist, target);
13494
13495 case IX86_BUILTIN_PFMUL:
13496 return ix86_expand_binop_builtin (CODE_FOR_mulv2sf3, arglist, target);
13497
13498 case IX86_BUILTIN_PFRCP:
13499 return ix86_expand_unop_builtin (CODE_FOR_pfrcpv2sf2, arglist, target, 0);
13500
13501 case IX86_BUILTIN_PFRCPIT1:
13502 return ix86_expand_binop_builtin (CODE_FOR_pfrcpit1v2sf3, arglist, target);
13503
13504 case IX86_BUILTIN_PFRCPIT2:
13505 return ix86_expand_binop_builtin (CODE_FOR_pfrcpit2v2sf3, arglist, target);
13506
13507 case IX86_BUILTIN_PFRSQIT1:
13508 return ix86_expand_binop_builtin (CODE_FOR_pfrsqit1v2sf3, arglist, target);
13509
13510 case IX86_BUILTIN_PFRSQRT:
13511 return ix86_expand_unop_builtin (CODE_FOR_pfrsqrtv2sf2, arglist, target, 0);
13512
13513 case IX86_BUILTIN_PFSUB:
13514 return ix86_expand_binop_builtin (CODE_FOR_subv2sf3, arglist, target);
13515
13516 case IX86_BUILTIN_PFSUBR:
13517 return ix86_expand_binop_builtin (CODE_FOR_subrv2sf3, arglist, target);
13518
13519 case IX86_BUILTIN_PI2FD:
13520 return ix86_expand_unop_builtin (CODE_FOR_floatv2si2, arglist, target, 0);
13521
13522 case IX86_BUILTIN_PMULHRW:
13523 return ix86_expand_binop_builtin (CODE_FOR_pmulhrwv4hi3, arglist, target);
13524
13525 case IX86_BUILTIN_PF2IW:
13526 return ix86_expand_unop_builtin (CODE_FOR_pf2iw, arglist, target, 0);
13527
13528 case IX86_BUILTIN_PFNACC:
13529 return ix86_expand_binop_builtin (CODE_FOR_pfnacc, arglist, target);
13530
13531 case IX86_BUILTIN_PFPNACC:
13532 return ix86_expand_binop_builtin (CODE_FOR_pfpnacc, arglist, target);
13533
13534 case IX86_BUILTIN_PI2FW:
13535 return ix86_expand_unop_builtin (CODE_FOR_pi2fw, arglist, target, 0);
13536
13537 case IX86_BUILTIN_PSWAPDSI:
13538 return ix86_expand_unop_builtin (CODE_FOR_pswapdv2si2, arglist, target, 0);
13539
13540 case IX86_BUILTIN_PSWAPDSF:
13541 return ix86_expand_unop_builtin (CODE_FOR_pswapdv2sf2, arglist, target, 0);
13542
13543 case IX86_BUILTIN_SSE_ZERO:
13544 target = gen_reg_rtx (V4SFmode);
13545 emit_insn (gen_sse_clrv4sf (target, CONST0_RTX (V4SFmode)));
13546 return target;
13547
13548 case IX86_BUILTIN_MMX_ZERO:
13549 target = gen_reg_rtx (DImode);
13550 emit_insn (gen_mmx_clrdi (target));
13551 return target;
13552
13553 case IX86_BUILTIN_CLRTI:
13554 target = gen_reg_rtx (V2DImode);
13555 emit_insn (gen_sse2_clrti (simplify_gen_subreg (TImode, target, V2DImode, 0)));
13556 return target;
13557
13558
13559 case IX86_BUILTIN_SQRTSD:
13560 return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv2df2, arglist, target);
13561 case IX86_BUILTIN_LOADAPD:
13562 return ix86_expand_unop_builtin (CODE_FOR_sse2_movapd, arglist, target, 1);
13563 case IX86_BUILTIN_LOADUPD:
13564 return ix86_expand_unop_builtin (CODE_FOR_sse2_movupd, arglist, target, 1);
13565
13566 case IX86_BUILTIN_STOREAPD:
13567 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
13568 case IX86_BUILTIN_STOREUPD:
13569 return ix86_expand_store_builtin (CODE_FOR_sse2_movupd, arglist);
13570
13571 case IX86_BUILTIN_LOADSD:
13572 return ix86_expand_unop_builtin (CODE_FOR_sse2_loadsd, arglist, target, 1);
13573
13574 case IX86_BUILTIN_STORESD:
13575 return ix86_expand_store_builtin (CODE_FOR_sse2_storesd, arglist);
13576
13577 case IX86_BUILTIN_SETPD1:
13578 target = assign_386_stack_local (DFmode, 0);
13579 arg0 = TREE_VALUE (arglist);
13580 emit_move_insn (adjust_address (target, DFmode, 0),
13581 expand_expr (arg0, NULL_RTX, VOIDmode, 0));
13582 op0 = gen_reg_rtx (V2DFmode);
13583 emit_insn (gen_sse2_loadsd (op0, adjust_address (target, V2DFmode, 0)));
13584 emit_insn (gen_sse2_shufpd (op0, op0, op0, const0_rtx));
13585 return op0;
13586
13587 case IX86_BUILTIN_SETPD:
13588 target = assign_386_stack_local (V2DFmode, 0);
13589 arg0 = TREE_VALUE (arglist);
13590 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13591 emit_move_insn (adjust_address (target, DFmode, 0),
13592 expand_expr (arg0, NULL_RTX, VOIDmode, 0));
13593 emit_move_insn (adjust_address (target, DFmode, 8),
13594 expand_expr (arg1, NULL_RTX, VOIDmode, 0));
13595 op0 = gen_reg_rtx (V2DFmode);
13596 emit_insn (gen_sse2_movapd (op0, target));
13597 return op0;
13598
13599 case IX86_BUILTIN_LOADRPD:
13600 target = ix86_expand_unop_builtin (CODE_FOR_sse2_movapd, arglist,
13601 gen_reg_rtx (V2DFmode), 1);
13602 emit_insn (gen_sse2_shufpd (target, target, target, const1_rtx));
13603 return target;
13604
13605 case IX86_BUILTIN_LOADPD1:
13606 target = ix86_expand_unop_builtin (CODE_FOR_sse2_loadsd, arglist,
13607 gen_reg_rtx (V2DFmode), 1);
13608 emit_insn (gen_sse2_shufpd (target, target, target, const0_rtx));
13609 return target;
13610
13611 case IX86_BUILTIN_STOREPD1:
13612 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
13613 case IX86_BUILTIN_STORERPD:
13614 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
13615
13616 case IX86_BUILTIN_CLRPD:
13617 target = gen_reg_rtx (V2DFmode);
13618 emit_insn (gen_sse_clrv2df (target));
13619 return target;
13620
13621 case IX86_BUILTIN_MFENCE:
13622 emit_insn (gen_sse2_mfence ());
13623 return 0;
13624 case IX86_BUILTIN_LFENCE:
13625 emit_insn (gen_sse2_lfence ());
13626 return 0;
13627
13628 case IX86_BUILTIN_CLFLUSH:
13629 arg0 = TREE_VALUE (arglist);
13630 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13631 icode = CODE_FOR_sse2_clflush;
13632 if (! (*insn_data[icode].operand[0].predicate) (op0, Pmode))
13633 op0 = copy_to_mode_reg (Pmode, op0);
13634
13635 emit_insn (gen_sse2_clflush (op0));
13636 return 0;
13637
13638 case IX86_BUILTIN_MOVNTPD:
13639 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2df, arglist);
13640 case IX86_BUILTIN_MOVNTDQ:
13641 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2di, arglist);
13642 case IX86_BUILTIN_MOVNTI:
13643 return ix86_expand_store_builtin (CODE_FOR_sse2_movntsi, arglist);
13644
13645 case IX86_BUILTIN_LOADDQA:
13646 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqa, arglist, target, 1);
13647 case IX86_BUILTIN_LOADDQU:
13648 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqu, arglist, target, 1);
13649 case IX86_BUILTIN_LOADD:
13650 return ix86_expand_unop_builtin (CODE_FOR_sse2_loadd, arglist, target, 1);
13651
13652 case IX86_BUILTIN_STOREDQA:
13653 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqa, arglist);
13654 case IX86_BUILTIN_STOREDQU:
13655 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqu, arglist);
13656 case IX86_BUILTIN_STORED:
13657 return ix86_expand_store_builtin (CODE_FOR_sse2_stored, arglist);
13658
13659 case IX86_BUILTIN_MONITOR:
13660 arg0 = TREE_VALUE (arglist);
13661 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13662 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
13663 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13664 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13665 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
13666 if (!REG_P (op0))
13667 op0 = copy_to_mode_reg (SImode, op0);
13668 if (!REG_P (op1))
13669 op1 = copy_to_mode_reg (SImode, op1);
13670 if (!REG_P (op2))
13671 op2 = copy_to_mode_reg (SImode, op2);
13672 emit_insn (gen_monitor (op0, op1, op2));
13673 return 0;
13674
13675 case IX86_BUILTIN_MWAIT:
13676 arg0 = TREE_VALUE (arglist);
13677 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13678 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13679 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13680 if (!REG_P (op0))
13681 op0 = copy_to_mode_reg (SImode, op0);
13682 if (!REG_P (op1))
13683 op1 = copy_to_mode_reg (SImode, op1);
13684 emit_insn (gen_mwait (op0, op1));
13685 return 0;
13686
13687 case IX86_BUILTIN_LOADDDUP:
13688 return ix86_expand_unop_builtin (CODE_FOR_loadddup, arglist, target, 1);
13689
13690 case IX86_BUILTIN_LDDQU:
13691 return ix86_expand_unop_builtin (CODE_FOR_lddqu, arglist, target,
13692 1);
13693
13694 default:
13695 break;
13696 }
13697
13698 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
13699 if (d->code == fcode)
13700 {
13701 /* Compares are treated specially. */
13702 if (d->icode == CODE_FOR_maskcmpv4sf3
13703 || d->icode == CODE_FOR_vmmaskcmpv4sf3
13704 || d->icode == CODE_FOR_maskncmpv4sf3
13705 || d->icode == CODE_FOR_vmmaskncmpv4sf3
13706 || d->icode == CODE_FOR_maskcmpv2df3
13707 || d->icode == CODE_FOR_vmmaskcmpv2df3
13708 || d->icode == CODE_FOR_maskncmpv2df3
13709 || d->icode == CODE_FOR_vmmaskncmpv2df3)
13710 return ix86_expand_sse_compare (d, arglist, target);
13711
13712 return ix86_expand_binop_builtin (d->icode, arglist, target);
13713 }
13714
13715 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
13716 if (d->code == fcode)
13717 return ix86_expand_unop_builtin (d->icode, arglist, target, 0);
13718
13719 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
13720 if (d->code == fcode)
13721 return ix86_expand_sse_comi (d, arglist, target);
13722
13723 /* @@@ Should really do something sensible here. */
13724 return 0;
13725 }
13726
13727 /* Store OPERAND to the memory after reload is completed. This means
13728 that we can't easily use assign_stack_local. */
13729 rtx
13730 ix86_force_to_memory (enum machine_mode mode, rtx operand)
13731 {
13732 rtx result;
13733 if (!reload_completed)
13734 abort ();
13735 if (TARGET_RED_ZONE)
13736 {
13737 result = gen_rtx_MEM (mode,
13738 gen_rtx_PLUS (Pmode,
13739 stack_pointer_rtx,
13740 GEN_INT (-RED_ZONE_SIZE)));
13741 emit_move_insn (result, operand);
13742 }
13743 else if (!TARGET_RED_ZONE && TARGET_64BIT)
13744 {
13745 switch (mode)
13746 {
13747 case HImode:
13748 case SImode:
13749 operand = gen_lowpart (DImode, operand);
13750 /* FALLTHRU */
13751 case DImode:
13752 emit_insn (
13753 gen_rtx_SET (VOIDmode,
13754 gen_rtx_MEM (DImode,
13755 gen_rtx_PRE_DEC (DImode,
13756 stack_pointer_rtx)),
13757 operand));
13758 break;
13759 default:
13760 abort ();
13761 }
13762 result = gen_rtx_MEM (mode, stack_pointer_rtx);
13763 }
13764 else
13765 {
13766 switch (mode)
13767 {
13768 case DImode:
13769 {
13770 rtx operands[2];
13771 split_di (&operand, 1, operands, operands + 1);
13772 emit_insn (
13773 gen_rtx_SET (VOIDmode,
13774 gen_rtx_MEM (SImode,
13775 gen_rtx_PRE_DEC (Pmode,
13776 stack_pointer_rtx)),
13777 operands[1]));
13778 emit_insn (
13779 gen_rtx_SET (VOIDmode,
13780 gen_rtx_MEM (SImode,
13781 gen_rtx_PRE_DEC (Pmode,
13782 stack_pointer_rtx)),
13783 operands[0]));
13784 }
13785 break;
13786 case HImode:
13787 /* It is better to store HImodes as SImodes. */
13788 if (!TARGET_PARTIAL_REG_STALL)
13789 operand = gen_lowpart (SImode, operand);
13790 /* FALLTHRU */
13791 case SImode:
13792 emit_insn (
13793 gen_rtx_SET (VOIDmode,
13794 gen_rtx_MEM (GET_MODE (operand),
13795 gen_rtx_PRE_DEC (SImode,
13796 stack_pointer_rtx)),
13797 operand));
13798 break;
13799 default:
13800 abort ();
13801 }
13802 result = gen_rtx_MEM (mode, stack_pointer_rtx);
13803 }
13804 return result;
13805 }
13806
13807 /* Free operand from the memory. */
13808 void
13809 ix86_free_from_memory (enum machine_mode mode)
13810 {
13811 if (!TARGET_RED_ZONE)
13812 {
13813 int size;
13814
13815 if (mode == DImode || TARGET_64BIT)
13816 size = 8;
13817 else if (mode == HImode && TARGET_PARTIAL_REG_STALL)
13818 size = 2;
13819 else
13820 size = 4;
13821 /* Use LEA to deallocate stack space. In peephole2 it will be converted
13822 to pop or add instruction if registers are available. */
13823 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
13824 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
13825 GEN_INT (size))));
13826 }
13827 }
13828
13829 /* Put float CONST_DOUBLE in the constant pool instead of fp regs.
13830 QImode must go into class Q_REGS.
13831 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
13832 movdf to do mem-to-mem moves through integer regs. */
13833 enum reg_class
13834 ix86_preferred_reload_class (rtx x, enum reg_class class)
13835 {
13836 if (GET_CODE (x) == CONST_VECTOR && x != CONST0_RTX (GET_MODE (x)))
13837 return NO_REGS;
13838 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
13839 {
13840 /* SSE can't load any constant directly yet. */
13841 if (SSE_CLASS_P (class))
13842 return NO_REGS;
13843 /* Floats can load 0 and 1. */
13844 if (MAYBE_FLOAT_CLASS_P (class) && standard_80387_constant_p (x))
13845 {
13846 /* Limit class to non-SSE. Use GENERAL_REGS if possible. */
13847 if (MAYBE_SSE_CLASS_P (class))
13848 return (reg_class_subset_p (class, GENERAL_REGS)
13849 ? GENERAL_REGS : FLOAT_REGS);
13850 else
13851 return class;
13852 }
13853 /* General regs can load everything. */
13854 if (reg_class_subset_p (class, GENERAL_REGS))
13855 return GENERAL_REGS;
13856 /* In case we haven't resolved FLOAT or SSE yet, give up. */
13857 if (MAYBE_FLOAT_CLASS_P (class) || MAYBE_SSE_CLASS_P (class))
13858 return NO_REGS;
13859 }
13860 if (MAYBE_MMX_CLASS_P (class) && CONSTANT_P (x))
13861 return NO_REGS;
13862 if (GET_MODE (x) == QImode && ! reg_class_subset_p (class, Q_REGS))
13863 return Q_REGS;
13864 return class;
13865 }
13866
13867 /* If we are copying between general and FP registers, we need a memory
13868 location. The same is true for SSE and MMX registers.
13869
13870 The macro can't work reliably when one of the CLASSES is class containing
13871 registers from multiple units (SSE, MMX, integer). We avoid this by never
13872 combining those units in single alternative in the machine description.
13873 Ensure that this constraint holds to avoid unexpected surprises.
13874
13875 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
13876 enforce these sanity checks. */
13877 int
13878 ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
13879 enum machine_mode mode, int strict)
13880 {
13881 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
13882 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
13883 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
13884 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
13885 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
13886 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
13887 {
13888 if (strict)
13889 abort ();
13890 else
13891 return 1;
13892 }
13893 return (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2)
13894 || ((SSE_CLASS_P (class1) != SSE_CLASS_P (class2)
13895 || MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
13896 && ((mode != SImode && (mode != DImode || !TARGET_64BIT))
13897 || (!TARGET_INTER_UNIT_MOVES && !optimize_size))));
13898 }
13899 /* Return the cost of moving data from a register in class CLASS1 to
13900 one in class CLASS2.
13901
13902 It is not required that the cost always equal 2 when FROM is the same as TO;
13903 on some machines it is expensive to move between registers if they are not
13904 general registers. */
13905 int
13906 ix86_register_move_cost (enum machine_mode mode, enum reg_class class1,
13907 enum reg_class class2)
13908 {
13909 /* In case we require secondary memory, compute cost of the store followed
13910 by load. In order to avoid bad register allocation choices, we need
13911 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
13912
13913 if (ix86_secondary_memory_needed (class1, class2, mode, 0))
13914 {
13915 int cost = 1;
13916
13917 cost += MAX (MEMORY_MOVE_COST (mode, class1, 0),
13918 MEMORY_MOVE_COST (mode, class1, 1));
13919 cost += MAX (MEMORY_MOVE_COST (mode, class2, 0),
13920 MEMORY_MOVE_COST (mode, class2, 1));
13921
13922 /* In case of copying from general_purpose_register we may emit multiple
13923 stores followed by single load causing memory size mismatch stall.
13924 Count this as arbitrarily high cost of 20. */
13925 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
13926 cost += 20;
13927
13928 /* In the case of FP/MMX moves, the registers actually overlap, and we
13929 have to switch modes in order to treat them differently. */
13930 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
13931 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
13932 cost += 20;
13933
13934 return cost;
13935 }
13936
13937 /* Moves between SSE/MMX and integer unit are expensive. */
13938 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
13939 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
13940 return ix86_cost->mmxsse_to_integer;
13941 if (MAYBE_FLOAT_CLASS_P (class1))
13942 return ix86_cost->fp_move;
13943 if (MAYBE_SSE_CLASS_P (class1))
13944 return ix86_cost->sse_move;
13945 if (MAYBE_MMX_CLASS_P (class1))
13946 return ix86_cost->mmx_move;
13947 return 2;
13948 }
13949
13950 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
13951 int
13952 ix86_hard_regno_mode_ok (int regno, enum machine_mode mode)
13953 {
13954 /* Flags and only flags can only hold CCmode values. */
13955 if (CC_REGNO_P (regno))
13956 return GET_MODE_CLASS (mode) == MODE_CC;
13957 if (GET_MODE_CLASS (mode) == MODE_CC
13958 || GET_MODE_CLASS (mode) == MODE_RANDOM
13959 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
13960 return 0;
13961 if (FP_REGNO_P (regno))
13962 return VALID_FP_MODE_P (mode);
13963 if (SSE_REGNO_P (regno))
13964 return (TARGET_SSE ? VALID_SSE_REG_MODE (mode) : 0);
13965 if (MMX_REGNO_P (regno))
13966 return (TARGET_MMX
13967 ? VALID_MMX_REG_MODE (mode) || VALID_MMX_REG_MODE_3DNOW (mode) : 0);
13968 /* We handle both integer and floats in the general purpose registers.
13969 In future we should be able to handle vector modes as well. */
13970 if (!VALID_INT_MODE_P (mode) && !VALID_FP_MODE_P (mode))
13971 return 0;
13972 /* Take care for QImode values - they can be in non-QI regs, but then
13973 they do cause partial register stalls. */
13974 if (regno < 4 || mode != QImode || TARGET_64BIT)
13975 return 1;
13976 return reload_in_progress || reload_completed || !TARGET_PARTIAL_REG_STALL;
13977 }
13978
13979 /* Return the cost of moving data of mode M between a
13980 register and memory. A value of 2 is the default; this cost is
13981 relative to those in `REGISTER_MOVE_COST'.
13982
13983 If moving between registers and memory is more expensive than
13984 between two registers, you should define this macro to express the
13985 relative cost.
13986
13987 Model also increased moving costs of QImode registers in non
13988 Q_REGS classes.
13989 */
13990 int
13991 ix86_memory_move_cost (enum machine_mode mode, enum reg_class class, int in)
13992 {
13993 if (FLOAT_CLASS_P (class))
13994 {
13995 int index;
13996 switch (mode)
13997 {
13998 case SFmode:
13999 index = 0;
14000 break;
14001 case DFmode:
14002 index = 1;
14003 break;
14004 case XFmode:
14005 index = 2;
14006 break;
14007 default:
14008 return 100;
14009 }
14010 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
14011 }
14012 if (SSE_CLASS_P (class))
14013 {
14014 int index;
14015 switch (GET_MODE_SIZE (mode))
14016 {
14017 case 4:
14018 index = 0;
14019 break;
14020 case 8:
14021 index = 1;
14022 break;
14023 case 16:
14024 index = 2;
14025 break;
14026 default:
14027 return 100;
14028 }
14029 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
14030 }
14031 if (MMX_CLASS_P (class))
14032 {
14033 int index;
14034 switch (GET_MODE_SIZE (mode))
14035 {
14036 case 4:
14037 index = 0;
14038 break;
14039 case 8:
14040 index = 1;
14041 break;
14042 default:
14043 return 100;
14044 }
14045 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
14046 }
14047 switch (GET_MODE_SIZE (mode))
14048 {
14049 case 1:
14050 if (in)
14051 return (Q_CLASS_P (class) ? ix86_cost->int_load[0]
14052 : ix86_cost->movzbl_load);
14053 else
14054 return (Q_CLASS_P (class) ? ix86_cost->int_store[0]
14055 : ix86_cost->int_store[0] + 4);
14056 break;
14057 case 2:
14058 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
14059 default:
14060 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
14061 if (mode == TFmode)
14062 mode = XFmode;
14063 return ((in ? ix86_cost->int_load[2] : ix86_cost->int_store[2])
14064 * (((int) GET_MODE_SIZE (mode)
14065 + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
14066 }
14067 }
14068
14069 /* Compute a (partial) cost for rtx X. Return true if the complete
14070 cost has been computed, and false if subexpressions should be
14071 scanned. In either case, *TOTAL contains the cost result. */
14072
14073 static bool
14074 ix86_rtx_costs (rtx x, int code, int outer_code, int *total)
14075 {
14076 enum machine_mode mode = GET_MODE (x);
14077
14078 switch (code)
14079 {
14080 case CONST_INT:
14081 case CONST:
14082 case LABEL_REF:
14083 case SYMBOL_REF:
14084 if (TARGET_64BIT && !x86_64_immediate_operand (x, VOIDmode))
14085 *total = 3;
14086 else if (TARGET_64BIT && !x86_64_zext_immediate_operand (x, VOIDmode))
14087 *total = 2;
14088 else if (flag_pic && SYMBOLIC_CONST (x)
14089 && (!TARGET_64BIT
14090 || (!GET_CODE (x) != LABEL_REF
14091 && (GET_CODE (x) != SYMBOL_REF
14092 || !SYMBOL_REF_LOCAL_P (x)))))
14093 *total = 1;
14094 else
14095 *total = 0;
14096 return true;
14097
14098 case CONST_DOUBLE:
14099 if (mode == VOIDmode)
14100 *total = 0;
14101 else
14102 switch (standard_80387_constant_p (x))
14103 {
14104 case 1: /* 0.0 */
14105 *total = 1;
14106 break;
14107 default: /* Other constants */
14108 *total = 2;
14109 break;
14110 case 0:
14111 case -1:
14112 /* Start with (MEM (SYMBOL_REF)), since that's where
14113 it'll probably end up. Add a penalty for size. */
14114 *total = (COSTS_N_INSNS (1)
14115 + (flag_pic != 0 && !TARGET_64BIT)
14116 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
14117 break;
14118 }
14119 return true;
14120
14121 case ZERO_EXTEND:
14122 /* The zero extensions is often completely free on x86_64, so make
14123 it as cheap as possible. */
14124 if (TARGET_64BIT && mode == DImode
14125 && GET_MODE (XEXP (x, 0)) == SImode)
14126 *total = 1;
14127 else if (TARGET_ZERO_EXTEND_WITH_AND)
14128 *total = COSTS_N_INSNS (ix86_cost->add);
14129 else
14130 *total = COSTS_N_INSNS (ix86_cost->movzx);
14131 return false;
14132
14133 case SIGN_EXTEND:
14134 *total = COSTS_N_INSNS (ix86_cost->movsx);
14135 return false;
14136
14137 case ASHIFT:
14138 if (GET_CODE (XEXP (x, 1)) == CONST_INT
14139 && (GET_MODE (XEXP (x, 0)) != DImode || TARGET_64BIT))
14140 {
14141 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
14142 if (value == 1)
14143 {
14144 *total = COSTS_N_INSNS (ix86_cost->add);
14145 return false;
14146 }
14147 if ((value == 2 || value == 3)
14148 && ix86_cost->lea <= ix86_cost->shift_const)
14149 {
14150 *total = COSTS_N_INSNS (ix86_cost->lea);
14151 return false;
14152 }
14153 }
14154 /* FALLTHRU */
14155
14156 case ROTATE:
14157 case ASHIFTRT:
14158 case LSHIFTRT:
14159 case ROTATERT:
14160 if (!TARGET_64BIT && GET_MODE (XEXP (x, 0)) == DImode)
14161 {
14162 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
14163 {
14164 if (INTVAL (XEXP (x, 1)) > 32)
14165 *total = COSTS_N_INSNS(ix86_cost->shift_const + 2);
14166 else
14167 *total = COSTS_N_INSNS(ix86_cost->shift_const * 2);
14168 }
14169 else
14170 {
14171 if (GET_CODE (XEXP (x, 1)) == AND)
14172 *total = COSTS_N_INSNS(ix86_cost->shift_var * 2);
14173 else
14174 *total = COSTS_N_INSNS(ix86_cost->shift_var * 6 + 2);
14175 }
14176 }
14177 else
14178 {
14179 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
14180 *total = COSTS_N_INSNS (ix86_cost->shift_const);
14181 else
14182 *total = COSTS_N_INSNS (ix86_cost->shift_var);
14183 }
14184 return false;
14185
14186 case MULT:
14187 if (FLOAT_MODE_P (mode))
14188 {
14189 *total = COSTS_N_INSNS (ix86_cost->fmul);
14190 return false;
14191 }
14192 else
14193 {
14194 rtx op0 = XEXP (x, 0);
14195 rtx op1 = XEXP (x, 1);
14196 int nbits;
14197 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
14198 {
14199 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
14200 for (nbits = 0; value != 0; value &= value - 1)
14201 nbits++;
14202 }
14203 else
14204 /* This is arbitrary. */
14205 nbits = 7;
14206
14207 /* Compute costs correctly for widening multiplication. */
14208 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op1) == ZERO_EXTEND)
14209 && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
14210 == GET_MODE_SIZE (mode))
14211 {
14212 int is_mulwiden = 0;
14213 enum machine_mode inner_mode = GET_MODE (op0);
14214
14215 if (GET_CODE (op0) == GET_CODE (op1))
14216 is_mulwiden = 1, op1 = XEXP (op1, 0);
14217 else if (GET_CODE (op1) == CONST_INT)
14218 {
14219 if (GET_CODE (op0) == SIGN_EXTEND)
14220 is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
14221 == INTVAL (op1);
14222 else
14223 is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
14224 }
14225
14226 if (is_mulwiden)
14227 op0 = XEXP (op0, 0), mode = GET_MODE (op0);
14228 }
14229
14230 *total = COSTS_N_INSNS (ix86_cost->mult_init[MODE_INDEX (mode)]
14231 + nbits * ix86_cost->mult_bit)
14232 + rtx_cost (op0, outer_code) + rtx_cost (op1, outer_code);
14233
14234 return true;
14235 }
14236
14237 case DIV:
14238 case UDIV:
14239 case MOD:
14240 case UMOD:
14241 if (FLOAT_MODE_P (mode))
14242 *total = COSTS_N_INSNS (ix86_cost->fdiv);
14243 else
14244 *total = COSTS_N_INSNS (ix86_cost->divide[MODE_INDEX (mode)]);
14245 return false;
14246
14247 case PLUS:
14248 if (FLOAT_MODE_P (mode))
14249 *total = COSTS_N_INSNS (ix86_cost->fadd);
14250 else if (GET_MODE_CLASS (mode) == MODE_INT
14251 && GET_MODE_BITSIZE (mode) <= GET_MODE_BITSIZE (Pmode))
14252 {
14253 if (GET_CODE (XEXP (x, 0)) == PLUS
14254 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
14255 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == CONST_INT
14256 && CONSTANT_P (XEXP (x, 1)))
14257 {
14258 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
14259 if (val == 2 || val == 4 || val == 8)
14260 {
14261 *total = COSTS_N_INSNS (ix86_cost->lea);
14262 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
14263 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
14264 outer_code);
14265 *total += rtx_cost (XEXP (x, 1), outer_code);
14266 return true;
14267 }
14268 }
14269 else if (GET_CODE (XEXP (x, 0)) == MULT
14270 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)
14271 {
14272 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
14273 if (val == 2 || val == 4 || val == 8)
14274 {
14275 *total = COSTS_N_INSNS (ix86_cost->lea);
14276 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
14277 *total += rtx_cost (XEXP (x, 1), outer_code);
14278 return true;
14279 }
14280 }
14281 else if (GET_CODE (XEXP (x, 0)) == PLUS)
14282 {
14283 *total = COSTS_N_INSNS (ix86_cost->lea);
14284 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
14285 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
14286 *total += rtx_cost (XEXP (x, 1), outer_code);
14287 return true;
14288 }
14289 }
14290 /* FALLTHRU */
14291
14292 case MINUS:
14293 if (FLOAT_MODE_P (mode))
14294 {
14295 *total = COSTS_N_INSNS (ix86_cost->fadd);
14296 return false;
14297 }
14298 /* FALLTHRU */
14299
14300 case AND:
14301 case IOR:
14302 case XOR:
14303 if (!TARGET_64BIT && mode == DImode)
14304 {
14305 *total = (COSTS_N_INSNS (ix86_cost->add) * 2
14306 + (rtx_cost (XEXP (x, 0), outer_code)
14307 << (GET_MODE (XEXP (x, 0)) != DImode))
14308 + (rtx_cost (XEXP (x, 1), outer_code)
14309 << (GET_MODE (XEXP (x, 1)) != DImode)));
14310 return true;
14311 }
14312 /* FALLTHRU */
14313
14314 case NEG:
14315 if (FLOAT_MODE_P (mode))
14316 {
14317 *total = COSTS_N_INSNS (ix86_cost->fchs);
14318 return false;
14319 }
14320 /* FALLTHRU */
14321
14322 case NOT:
14323 if (!TARGET_64BIT && mode == DImode)
14324 *total = COSTS_N_INSNS (ix86_cost->add * 2);
14325 else
14326 *total = COSTS_N_INSNS (ix86_cost->add);
14327 return false;
14328
14329 case COMPARE:
14330 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT
14331 && XEXP (XEXP (x, 0), 1) == const1_rtx
14332 && GET_CODE (XEXP (XEXP (x, 0), 2)) == CONST_INT
14333 && XEXP (x, 1) == const0_rtx)
14334 {
14335 /* This kind of construct is implemented using test[bwl].
14336 Treat it as if we had an AND. */
14337 *total = (COSTS_N_INSNS (ix86_cost->add)
14338 + rtx_cost (XEXP (XEXP (x, 0), 0), outer_code)
14339 + rtx_cost (const1_rtx, outer_code));
14340 return true;
14341 }
14342 return false;
14343
14344 case FLOAT_EXTEND:
14345 if (!TARGET_SSE_MATH || !VALID_SSE_REG_MODE (mode))
14346 *total = 0;
14347 return false;
14348
14349 case ABS:
14350 if (FLOAT_MODE_P (mode))
14351 *total = COSTS_N_INSNS (ix86_cost->fabs);
14352 return false;
14353
14354 case SQRT:
14355 if (FLOAT_MODE_P (mode))
14356 *total = COSTS_N_INSNS (ix86_cost->fsqrt);
14357 return false;
14358
14359 case UNSPEC:
14360 if (XINT (x, 1) == UNSPEC_TP)
14361 *total = 0;
14362 return false;
14363
14364 default:
14365 return false;
14366 }
14367 }
14368
14369 #if defined (DO_GLOBAL_CTORS_BODY) && defined (HAS_INIT_SECTION)
14370 static void
14371 ix86_svr3_asm_out_constructor (rtx symbol, int priority ATTRIBUTE_UNUSED)
14372 {
14373 init_section ();
14374 fputs ("\tpushl $", asm_out_file);
14375 assemble_name (asm_out_file, XSTR (symbol, 0));
14376 fputc ('\n', asm_out_file);
14377 }
14378 #endif
14379
14380 #if TARGET_MACHO
14381
14382 static int current_machopic_label_num;
14383
14384 /* Given a symbol name and its associated stub, write out the
14385 definition of the stub. */
14386
14387 void
14388 machopic_output_stub (FILE *file, const char *symb, const char *stub)
14389 {
14390 unsigned int length;
14391 char *binder_name, *symbol_name, lazy_ptr_name[32];
14392 int label = ++current_machopic_label_num;
14393
14394 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
14395 symb = (*targetm.strip_name_encoding) (symb);
14396
14397 length = strlen (stub);
14398 binder_name = alloca (length + 32);
14399 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
14400
14401 length = strlen (symb);
14402 symbol_name = alloca (length + 32);
14403 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
14404
14405 sprintf (lazy_ptr_name, "L%d$lz", label);
14406
14407 if (MACHOPIC_PURE)
14408 machopic_picsymbol_stub_section ();
14409 else
14410 machopic_symbol_stub_section ();
14411
14412 fprintf (file, "%s:\n", stub);
14413 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
14414
14415 if (MACHOPIC_PURE)
14416 {
14417 fprintf (file, "\tcall LPC$%d\nLPC$%d:\tpopl %%eax\n", label, label);
14418 fprintf (file, "\tmovl %s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name, label);
14419 fprintf (file, "\tjmp %%edx\n");
14420 }
14421 else
14422 fprintf (file, "\tjmp *%s\n", lazy_ptr_name);
14423
14424 fprintf (file, "%s:\n", binder_name);
14425
14426 if (MACHOPIC_PURE)
14427 {
14428 fprintf (file, "\tlea %s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name, label);
14429 fprintf (file, "\tpushl %%eax\n");
14430 }
14431 else
14432 fprintf (file, "\t pushl $%s\n", lazy_ptr_name);
14433
14434 fprintf (file, "\tjmp dyld_stub_binding_helper\n");
14435
14436 machopic_lazy_symbol_ptr_section ();
14437 fprintf (file, "%s:\n", lazy_ptr_name);
14438 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
14439 fprintf (file, "\t.long %s\n", binder_name);
14440 }
14441 #endif /* TARGET_MACHO */
14442
14443 /* Order the registers for register allocator. */
14444
14445 void
14446 x86_order_regs_for_local_alloc (void)
14447 {
14448 int pos = 0;
14449 int i;
14450
14451 /* First allocate the local general purpose registers. */
14452 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
14453 if (GENERAL_REGNO_P (i) && call_used_regs[i])
14454 reg_alloc_order [pos++] = i;
14455
14456 /* Global general purpose registers. */
14457 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
14458 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
14459 reg_alloc_order [pos++] = i;
14460
14461 /* x87 registers come first in case we are doing FP math
14462 using them. */
14463 if (!TARGET_SSE_MATH)
14464 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
14465 reg_alloc_order [pos++] = i;
14466
14467 /* SSE registers. */
14468 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
14469 reg_alloc_order [pos++] = i;
14470 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
14471 reg_alloc_order [pos++] = i;
14472
14473 /* x87 registers. */
14474 if (TARGET_SSE_MATH)
14475 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
14476 reg_alloc_order [pos++] = i;
14477
14478 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
14479 reg_alloc_order [pos++] = i;
14480
14481 /* Initialize the rest of array as we do not allocate some registers
14482 at all. */
14483 while (pos < FIRST_PSEUDO_REGISTER)
14484 reg_alloc_order [pos++] = 0;
14485 }
14486
14487 #ifndef TARGET_USE_MS_BITFIELD_LAYOUT
14488 #define TARGET_USE_MS_BITFIELD_LAYOUT 0
14489 #endif
14490
14491 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
14492 struct attribute_spec.handler. */
14493 static tree
14494 ix86_handle_struct_attribute (tree *node, tree name,
14495 tree args ATTRIBUTE_UNUSED,
14496 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
14497 {
14498 tree *type = NULL;
14499 if (DECL_P (*node))
14500 {
14501 if (TREE_CODE (*node) == TYPE_DECL)
14502 type = &TREE_TYPE (*node);
14503 }
14504 else
14505 type = node;
14506
14507 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
14508 || TREE_CODE (*type) == UNION_TYPE)))
14509 {
14510 warning ("%qs attribute ignored", IDENTIFIER_POINTER (name));
14511 *no_add_attrs = true;
14512 }
14513
14514 else if ((is_attribute_p ("ms_struct", name)
14515 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
14516 || ((is_attribute_p ("gcc_struct", name)
14517 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
14518 {
14519 warning ("%qs incompatible attribute ignored",
14520 IDENTIFIER_POINTER (name));
14521 *no_add_attrs = true;
14522 }
14523
14524 return NULL_TREE;
14525 }
14526
14527 static bool
14528 ix86_ms_bitfield_layout_p (tree record_type)
14529 {
14530 return (TARGET_USE_MS_BITFIELD_LAYOUT &&
14531 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
14532 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type));
14533 }
14534
14535 /* Returns an expression indicating where the this parameter is
14536 located on entry to the FUNCTION. */
14537
14538 static rtx
14539 x86_this_parameter (tree function)
14540 {
14541 tree type = TREE_TYPE (function);
14542
14543 if (TARGET_64BIT)
14544 {
14545 int n = aggregate_value_p (TREE_TYPE (type), type) != 0;
14546 return gen_rtx_REG (DImode, x86_64_int_parameter_registers[n]);
14547 }
14548
14549 if (ix86_function_regparm (type, function) > 0)
14550 {
14551 tree parm;
14552
14553 parm = TYPE_ARG_TYPES (type);
14554 /* Figure out whether or not the function has a variable number of
14555 arguments. */
14556 for (; parm; parm = TREE_CHAIN (parm))
14557 if (TREE_VALUE (parm) == void_type_node)
14558 break;
14559 /* If not, the this parameter is in the first argument. */
14560 if (parm)
14561 {
14562 int regno = 0;
14563 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
14564 regno = 2;
14565 return gen_rtx_REG (SImode, regno);
14566 }
14567 }
14568
14569 if (aggregate_value_p (TREE_TYPE (type), type))
14570 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 8));
14571 else
14572 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 4));
14573 }
14574
14575 /* Determine whether x86_output_mi_thunk can succeed. */
14576
14577 static bool
14578 x86_can_output_mi_thunk (tree thunk ATTRIBUTE_UNUSED,
14579 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
14580 HOST_WIDE_INT vcall_offset, tree function)
14581 {
14582 /* 64-bit can handle anything. */
14583 if (TARGET_64BIT)
14584 return true;
14585
14586 /* For 32-bit, everything's fine if we have one free register. */
14587 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
14588 return true;
14589
14590 /* Need a free register for vcall_offset. */
14591 if (vcall_offset)
14592 return false;
14593
14594 /* Need a free register for GOT references. */
14595 if (flag_pic && !(*targetm.binds_local_p) (function))
14596 return false;
14597
14598 /* Otherwise ok. */
14599 return true;
14600 }
14601
14602 /* Output the assembler code for a thunk function. THUNK_DECL is the
14603 declaration for the thunk function itself, FUNCTION is the decl for
14604 the target function. DELTA is an immediate constant offset to be
14605 added to THIS. If VCALL_OFFSET is nonzero, the word at
14606 *(*this + vcall_offset) should be added to THIS. */
14607
14608 static void
14609 x86_output_mi_thunk (FILE *file ATTRIBUTE_UNUSED,
14610 tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta,
14611 HOST_WIDE_INT vcall_offset, tree function)
14612 {
14613 rtx xops[3];
14614 rtx this = x86_this_parameter (function);
14615 rtx this_reg, tmp;
14616
14617 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
14618 pull it in now and let DELTA benefit. */
14619 if (REG_P (this))
14620 this_reg = this;
14621 else if (vcall_offset)
14622 {
14623 /* Put the this parameter into %eax. */
14624 xops[0] = this;
14625 xops[1] = this_reg = gen_rtx_REG (Pmode, 0);
14626 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
14627 }
14628 else
14629 this_reg = NULL_RTX;
14630
14631 /* Adjust the this parameter by a fixed constant. */
14632 if (delta)
14633 {
14634 xops[0] = GEN_INT (delta);
14635 xops[1] = this_reg ? this_reg : this;
14636 if (TARGET_64BIT)
14637 {
14638 if (!x86_64_general_operand (xops[0], DImode))
14639 {
14640 tmp = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 2 /* R10 */);
14641 xops[1] = tmp;
14642 output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops);
14643 xops[0] = tmp;
14644 xops[1] = this;
14645 }
14646 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
14647 }
14648 else
14649 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
14650 }
14651
14652 /* Adjust the this parameter by a value stored in the vtable. */
14653 if (vcall_offset)
14654 {
14655 if (TARGET_64BIT)
14656 tmp = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 2 /* R10 */);
14657 else
14658 {
14659 int tmp_regno = 2 /* ECX */;
14660 if (lookup_attribute ("fastcall",
14661 TYPE_ATTRIBUTES (TREE_TYPE (function))))
14662 tmp_regno = 0 /* EAX */;
14663 tmp = gen_rtx_REG (SImode, tmp_regno);
14664 }
14665
14666 xops[0] = gen_rtx_MEM (Pmode, this_reg);
14667 xops[1] = tmp;
14668 if (TARGET_64BIT)
14669 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
14670 else
14671 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
14672
14673 /* Adjust the this parameter. */
14674 xops[0] = gen_rtx_MEM (Pmode, plus_constant (tmp, vcall_offset));
14675 if (TARGET_64BIT && !memory_operand (xops[0], Pmode))
14676 {
14677 rtx tmp2 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
14678 xops[0] = GEN_INT (vcall_offset);
14679 xops[1] = tmp2;
14680 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
14681 xops[0] = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, tmp, tmp2));
14682 }
14683 xops[1] = this_reg;
14684 if (TARGET_64BIT)
14685 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
14686 else
14687 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
14688 }
14689
14690 /* If necessary, drop THIS back to its stack slot. */
14691 if (this_reg && this_reg != this)
14692 {
14693 xops[0] = this_reg;
14694 xops[1] = this;
14695 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
14696 }
14697
14698 xops[0] = XEXP (DECL_RTL (function), 0);
14699 if (TARGET_64BIT)
14700 {
14701 if (!flag_pic || (*targetm.binds_local_p) (function))
14702 output_asm_insn ("jmp\t%P0", xops);
14703 else
14704 {
14705 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, xops[0]), UNSPEC_GOTPCREL);
14706 tmp = gen_rtx_CONST (Pmode, tmp);
14707 tmp = gen_rtx_MEM (QImode, tmp);
14708 xops[0] = tmp;
14709 output_asm_insn ("jmp\t%A0", xops);
14710 }
14711 }
14712 else
14713 {
14714 if (!flag_pic || (*targetm.binds_local_p) (function))
14715 output_asm_insn ("jmp\t%P0", xops);
14716 else
14717 #if TARGET_MACHO
14718 if (TARGET_MACHO)
14719 {
14720 rtx sym_ref = XEXP (DECL_RTL (function), 0);
14721 tmp = (gen_rtx_SYMBOL_REF
14722 (Pmode,
14723 machopic_indirection_name (sym_ref, /*stub_p=*/true)));
14724 tmp = gen_rtx_MEM (QImode, tmp);
14725 xops[0] = tmp;
14726 output_asm_insn ("jmp\t%0", xops);
14727 }
14728 else
14729 #endif /* TARGET_MACHO */
14730 {
14731 tmp = gen_rtx_REG (SImode, 2 /* ECX */);
14732 output_set_got (tmp);
14733
14734 xops[1] = tmp;
14735 output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops);
14736 output_asm_insn ("jmp\t{*}%1", xops);
14737 }
14738 }
14739 }
14740
14741 static void
14742 x86_file_start (void)
14743 {
14744 default_file_start ();
14745 if (X86_FILE_START_VERSION_DIRECTIVE)
14746 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
14747 if (X86_FILE_START_FLTUSED)
14748 fputs ("\t.global\t__fltused\n", asm_out_file);
14749 if (ix86_asm_dialect == ASM_INTEL)
14750 fputs ("\t.intel_syntax\n", asm_out_file);
14751 }
14752
14753 int
14754 x86_field_alignment (tree field, int computed)
14755 {
14756 enum machine_mode mode;
14757 tree type = TREE_TYPE (field);
14758
14759 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
14760 return computed;
14761 mode = TYPE_MODE (TREE_CODE (type) == ARRAY_TYPE
14762 ? get_inner_array_type (type) : type);
14763 if (mode == DFmode || mode == DCmode
14764 || GET_MODE_CLASS (mode) == MODE_INT
14765 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
14766 return MIN (32, computed);
14767 return computed;
14768 }
14769
14770 /* Output assembler code to FILE to increment profiler label # LABELNO
14771 for profiling a function entry. */
14772 void
14773 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
14774 {
14775 if (TARGET_64BIT)
14776 if (flag_pic)
14777 {
14778 #ifndef NO_PROFILE_COUNTERS
14779 fprintf (file, "\tleaq\t%sP%d@(%%rip),%%r11\n", LPREFIX, labelno);
14780 #endif
14781 fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", MCOUNT_NAME);
14782 }
14783 else
14784 {
14785 #ifndef NO_PROFILE_COUNTERS
14786 fprintf (file, "\tmovq\t$%sP%d,%%r11\n", LPREFIX, labelno);
14787 #endif
14788 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
14789 }
14790 else if (flag_pic)
14791 {
14792 #ifndef NO_PROFILE_COUNTERS
14793 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%%s\n",
14794 LPREFIX, labelno, PROFILE_COUNT_REGISTER);
14795 #endif
14796 fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", MCOUNT_NAME);
14797 }
14798 else
14799 {
14800 #ifndef NO_PROFILE_COUNTERS
14801 fprintf (file, "\tmovl\t$%sP%d,%%%s\n", LPREFIX, labelno,
14802 PROFILE_COUNT_REGISTER);
14803 #endif
14804 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
14805 }
14806 }
14807
14808 /* We don't have exact information about the insn sizes, but we may assume
14809 quite safely that we are informed about all 1 byte insns and memory
14810 address sizes. This is enough to eliminate unnecessary padding in
14811 99% of cases. */
14812
14813 static int
14814 min_insn_size (rtx insn)
14815 {
14816 int l = 0;
14817
14818 if (!INSN_P (insn) || !active_insn_p (insn))
14819 return 0;
14820
14821 /* Discard alignments we've emit and jump instructions. */
14822 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
14823 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
14824 return 0;
14825 if (GET_CODE (insn) == JUMP_INSN
14826 && (GET_CODE (PATTERN (insn)) == ADDR_VEC
14827 || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC))
14828 return 0;
14829
14830 /* Important case - calls are always 5 bytes.
14831 It is common to have many calls in the row. */
14832 if (GET_CODE (insn) == CALL_INSN
14833 && symbolic_reference_mentioned_p (PATTERN (insn))
14834 && !SIBLING_CALL_P (insn))
14835 return 5;
14836 if (get_attr_length (insn) <= 1)
14837 return 1;
14838
14839 /* For normal instructions we may rely on the sizes of addresses
14840 and the presence of symbol to require 4 bytes of encoding.
14841 This is not the case for jumps where references are PC relative. */
14842 if (GET_CODE (insn) != JUMP_INSN)
14843 {
14844 l = get_attr_length_address (insn);
14845 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
14846 l = 4;
14847 }
14848 if (l)
14849 return 1+l;
14850 else
14851 return 2;
14852 }
14853
14854 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
14855 window. */
14856
14857 static void
14858 ix86_avoid_jump_misspredicts (void)
14859 {
14860 rtx insn, start = get_insns ();
14861 int nbytes = 0, njumps = 0;
14862 int isjump = 0;
14863
14864 /* Look for all minimal intervals of instructions containing 4 jumps.
14865 The intervals are bounded by START and INSN. NBYTES is the total
14866 size of instructions in the interval including INSN and not including
14867 START. When the NBYTES is smaller than 16 bytes, it is possible
14868 that the end of START and INSN ends up in the same 16byte page.
14869
14870 The smallest offset in the page INSN can start is the case where START
14871 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
14872 We add p2align to 16byte window with maxskip 17 - NBYTES + sizeof (INSN).
14873 */
14874 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
14875 {
14876
14877 nbytes += min_insn_size (insn);
14878 if (dump_file)
14879 fprintf(dump_file, "Insn %i estimated to %i bytes\n",
14880 INSN_UID (insn), min_insn_size (insn));
14881 if ((GET_CODE (insn) == JUMP_INSN
14882 && GET_CODE (PATTERN (insn)) != ADDR_VEC
14883 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
14884 || GET_CODE (insn) == CALL_INSN)
14885 njumps++;
14886 else
14887 continue;
14888
14889 while (njumps > 3)
14890 {
14891 start = NEXT_INSN (start);
14892 if ((GET_CODE (start) == JUMP_INSN
14893 && GET_CODE (PATTERN (start)) != ADDR_VEC
14894 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
14895 || GET_CODE (start) == CALL_INSN)
14896 njumps--, isjump = 1;
14897 else
14898 isjump = 0;
14899 nbytes -= min_insn_size (start);
14900 }
14901 if (njumps < 0)
14902 abort ();
14903 if (dump_file)
14904 fprintf (dump_file, "Interval %i to %i has %i bytes\n",
14905 INSN_UID (start), INSN_UID (insn), nbytes);
14906
14907 if (njumps == 3 && isjump && nbytes < 16)
14908 {
14909 int padsize = 15 - nbytes + min_insn_size (insn);
14910
14911 if (dump_file)
14912 fprintf (dump_file, "Padding insn %i by %i bytes!\n",
14913 INSN_UID (insn), padsize);
14914 emit_insn_before (gen_align (GEN_INT (padsize)), insn);
14915 }
14916 }
14917 }
14918
14919 /* AMD Athlon works faster
14920 when RET is not destination of conditional jump or directly preceded
14921 by other jump instruction. We avoid the penalty by inserting NOP just
14922 before the RET instructions in such cases. */
14923 static void
14924 ix86_pad_returns (void)
14925 {
14926 edge e;
14927 edge_iterator ei;
14928
14929 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
14930 {
14931 basic_block bb = e->src;
14932 rtx ret = BB_END (bb);
14933 rtx prev;
14934 bool replace = false;
14935
14936 if (GET_CODE (ret) != JUMP_INSN || GET_CODE (PATTERN (ret)) != RETURN
14937 || !maybe_hot_bb_p (bb))
14938 continue;
14939 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
14940 if (active_insn_p (prev) || GET_CODE (prev) == CODE_LABEL)
14941 break;
14942 if (prev && GET_CODE (prev) == CODE_LABEL)
14943 {
14944 edge e;
14945 edge_iterator ei;
14946
14947 FOR_EACH_EDGE (e, ei, bb->preds)
14948 if (EDGE_FREQUENCY (e) && e->src->index >= 0
14949 && !(e->flags & EDGE_FALLTHRU))
14950 replace = true;
14951 }
14952 if (!replace)
14953 {
14954 prev = prev_active_insn (ret);
14955 if (prev
14956 && ((GET_CODE (prev) == JUMP_INSN && any_condjump_p (prev))
14957 || GET_CODE (prev) == CALL_INSN))
14958 replace = true;
14959 /* Empty functions get branch mispredict even when the jump destination
14960 is not visible to us. */
14961 if (!prev && cfun->function_frequency > FUNCTION_FREQUENCY_UNLIKELY_EXECUTED)
14962 replace = true;
14963 }
14964 if (replace)
14965 {
14966 emit_insn_before (gen_return_internal_long (), ret);
14967 delete_insn (ret);
14968 }
14969 }
14970 }
14971
14972 /* Implement machine specific optimizations. We implement padding of returns
14973 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
14974 static void
14975 ix86_reorg (void)
14976 {
14977 if (TARGET_ATHLON_K8 && optimize && !optimize_size)
14978 ix86_pad_returns ();
14979 if (TARGET_FOUR_JUMP_LIMIT && optimize && !optimize_size)
14980 ix86_avoid_jump_misspredicts ();
14981 }
14982
14983 /* Return nonzero when QImode register that must be represented via REX prefix
14984 is used. */
14985 bool
14986 x86_extended_QIreg_mentioned_p (rtx insn)
14987 {
14988 int i;
14989 extract_insn_cached (insn);
14990 for (i = 0; i < recog_data.n_operands; i++)
14991 if (REG_P (recog_data.operand[i])
14992 && REGNO (recog_data.operand[i]) >= 4)
14993 return true;
14994 return false;
14995 }
14996
14997 /* Return nonzero when P points to register encoded via REX prefix.
14998 Called via for_each_rtx. */
14999 static int
15000 extended_reg_mentioned_1 (rtx *p, void *data ATTRIBUTE_UNUSED)
15001 {
15002 unsigned int regno;
15003 if (!REG_P (*p))
15004 return 0;
15005 regno = REGNO (*p);
15006 return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
15007 }
15008
15009 /* Return true when INSN mentions register that must be encoded using REX
15010 prefix. */
15011 bool
15012 x86_extended_reg_mentioned_p (rtx insn)
15013 {
15014 return for_each_rtx (&PATTERN (insn), extended_reg_mentioned_1, NULL);
15015 }
15016
15017 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
15018 optabs would emit if we didn't have TFmode patterns. */
15019
15020 void
15021 x86_emit_floatuns (rtx operands[2])
15022 {
15023 rtx neglab, donelab, i0, i1, f0, in, out;
15024 enum machine_mode mode, inmode;
15025
15026 inmode = GET_MODE (operands[1]);
15027 if (inmode != SImode
15028 && inmode != DImode)
15029 abort ();
15030
15031 out = operands[0];
15032 in = force_reg (inmode, operands[1]);
15033 mode = GET_MODE (out);
15034 neglab = gen_label_rtx ();
15035 donelab = gen_label_rtx ();
15036 i1 = gen_reg_rtx (Pmode);
15037 f0 = gen_reg_rtx (mode);
15038
15039 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, Pmode, 0, neglab);
15040
15041 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_FLOAT (mode, in)));
15042 emit_jump_insn (gen_jump (donelab));
15043 emit_barrier ();
15044
15045 emit_label (neglab);
15046
15047 i0 = expand_simple_binop (Pmode, LSHIFTRT, in, const1_rtx, NULL, 1, OPTAB_DIRECT);
15048 i1 = expand_simple_binop (Pmode, AND, in, const1_rtx, NULL, 1, OPTAB_DIRECT);
15049 i0 = expand_simple_binop (Pmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
15050 expand_float (f0, i0, 0);
15051 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
15052
15053 emit_label (donelab);
15054 }
15055
15056 /* Initialize vector TARGET via VALS. */
15057 void
15058 ix86_expand_vector_init (rtx target, rtx vals)
15059 {
15060 enum machine_mode mode = GET_MODE (target);
15061 int elt_size = GET_MODE_SIZE (GET_MODE_INNER (mode));
15062 int n_elts = (GET_MODE_SIZE (mode) / elt_size);
15063 int i;
15064
15065 for (i = n_elts - 1; i >= 0; i--)
15066 if (GET_CODE (XVECEXP (vals, 0, i)) != CONST_INT
15067 && GET_CODE (XVECEXP (vals, 0, i)) != CONST_DOUBLE)
15068 break;
15069
15070 /* Few special cases first...
15071 ... constants are best loaded from constant pool. */
15072 if (i < 0)
15073 {
15074 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
15075 return;
15076 }
15077
15078 /* ... values where only first field is non-constant are best loaded
15079 from the pool and overwritten via move later. */
15080 if (!i)
15081 {
15082 rtx op = simplify_gen_subreg (mode, XVECEXP (vals, 0, 0),
15083 GET_MODE_INNER (mode), 0);
15084
15085 op = force_reg (mode, op);
15086 XVECEXP (vals, 0, 0) = CONST0_RTX (GET_MODE_INNER (mode));
15087 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
15088 switch (GET_MODE (target))
15089 {
15090 case V2DFmode:
15091 emit_insn (gen_sse2_movsd (target, target, op));
15092 break;
15093 case V4SFmode:
15094 emit_insn (gen_sse_movss (target, target, op));
15095 break;
15096 default:
15097 break;
15098 }
15099 return;
15100 }
15101
15102 /* And the busy sequence doing rotations. */
15103 switch (GET_MODE (target))
15104 {
15105 case V2DFmode:
15106 {
15107 rtx vecop0 =
15108 simplify_gen_subreg (V2DFmode, XVECEXP (vals, 0, 0), DFmode, 0);
15109 rtx vecop1 =
15110 simplify_gen_subreg (V2DFmode, XVECEXP (vals, 0, 1), DFmode, 0);
15111
15112 vecop0 = force_reg (V2DFmode, vecop0);
15113 vecop1 = force_reg (V2DFmode, vecop1);
15114 emit_insn (gen_sse2_unpcklpd (target, vecop0, vecop1));
15115 }
15116 break;
15117 case V4SFmode:
15118 {
15119 rtx vecop0 =
15120 simplify_gen_subreg (V4SFmode, XVECEXP (vals, 0, 0), SFmode, 0);
15121 rtx vecop1 =
15122 simplify_gen_subreg (V4SFmode, XVECEXP (vals, 0, 1), SFmode, 0);
15123 rtx vecop2 =
15124 simplify_gen_subreg (V4SFmode, XVECEXP (vals, 0, 2), SFmode, 0);
15125 rtx vecop3 =
15126 simplify_gen_subreg (V4SFmode, XVECEXP (vals, 0, 3), SFmode, 0);
15127 rtx tmp1 = gen_reg_rtx (V4SFmode);
15128 rtx tmp2 = gen_reg_rtx (V4SFmode);
15129
15130 vecop0 = force_reg (V4SFmode, vecop0);
15131 vecop1 = force_reg (V4SFmode, vecop1);
15132 vecop2 = force_reg (V4SFmode, vecop2);
15133 vecop3 = force_reg (V4SFmode, vecop3);
15134 emit_insn (gen_sse_unpcklps (tmp1, vecop1, vecop3));
15135 emit_insn (gen_sse_unpcklps (tmp2, vecop0, vecop2));
15136 emit_insn (gen_sse_unpcklps (target, tmp2, tmp1));
15137 }
15138 break;
15139 default:
15140 abort ();
15141 }
15142 }
15143
15144 /* Implements target hook vector_mode_supported_p. */
15145 static bool
15146 ix86_vector_mode_supported_p (enum machine_mode mode)
15147 {
15148 if (TARGET_SSE
15149 && VALID_SSE_REG_MODE (mode))
15150 return true;
15151
15152 else if (TARGET_MMX
15153 && VALID_MMX_REG_MODE (mode))
15154 return true;
15155
15156 else if (TARGET_3DNOW
15157 && VALID_MMX_REG_MODE_3DNOW (mode))
15158 return true;
15159
15160 else
15161 return false;
15162 }
15163
15164 /* Worker function for TARGET_MD_ASM_CLOBBERS.
15165
15166 We do this in the new i386 backend to maintain source compatibility
15167 with the old cc0-based compiler. */
15168
15169 static tree
15170 ix86_md_asm_clobbers (tree clobbers)
15171 {
15172 clobbers = tree_cons (NULL_TREE, build_string (5, "flags"),
15173 clobbers);
15174 clobbers = tree_cons (NULL_TREE, build_string (4, "fpsr"),
15175 clobbers);
15176 clobbers = tree_cons (NULL_TREE, build_string (7, "dirflag"),
15177 clobbers);
15178 return clobbers;
15179 }
15180
15181 /* Worker function for REVERSE_CONDITION. */
15182
15183 enum rtx_code
15184 ix86_reverse_condition (enum rtx_code code, enum machine_mode mode)
15185 {
15186 return (mode != CCFPmode && mode != CCFPUmode
15187 ? reverse_condition (code)
15188 : reverse_condition_maybe_unordered (code));
15189 }
15190
15191 /* Output code to perform an x87 FP register move, from OPERANDS[1]
15192 to OPERANDS[0]. */
15193
15194 const char *
15195 output_387_reg_move (rtx insn, rtx *operands)
15196 {
15197 if (REG_P (operands[1])
15198 && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
15199 {
15200 if (REGNO (operands[0]) == FIRST_STACK_REG
15201 && TARGET_USE_FFREEP)
15202 return "ffreep\t%y0";
15203 return "fstp\t%y0";
15204 }
15205 if (STACK_TOP_P (operands[0]))
15206 return "fld%z1\t%y1";
15207 return "fst\t%y0";
15208 }
15209
15210 /* Output code to perform a conditional jump to LABEL, if C2 flag in
15211 FP status register is set. */
15212
15213 void
15214 ix86_emit_fp_unordered_jump (rtx label)
15215 {
15216 rtx reg = gen_reg_rtx (HImode);
15217 rtx temp;
15218
15219 emit_insn (gen_x86_fnstsw_1 (reg));
15220
15221 if (TARGET_USE_SAHF)
15222 {
15223 emit_insn (gen_x86_sahf_1 (reg));
15224
15225 temp = gen_rtx_REG (CCmode, FLAGS_REG);
15226 temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx);
15227 }
15228 else
15229 {
15230 emit_insn (gen_testqi_ext_ccno_0 (reg, GEN_INT (0x04)));
15231
15232 temp = gen_rtx_REG (CCNOmode, FLAGS_REG);
15233 temp = gen_rtx_NE (VOIDmode, temp, const0_rtx);
15234 }
15235
15236 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
15237 gen_rtx_LABEL_REF (VOIDmode, label),
15238 pc_rtx);
15239 temp = gen_rtx_SET (VOIDmode, pc_rtx, temp);
15240 emit_jump_insn (temp);
15241 }
15242
15243 /* Output code to perform a log1p XFmode calculation. */
15244
15245 void ix86_emit_i387_log1p (rtx op0, rtx op1)
15246 {
15247 rtx label1 = gen_label_rtx ();
15248 rtx label2 = gen_label_rtx ();
15249
15250 rtx tmp = gen_reg_rtx (XFmode);
15251 rtx tmp2 = gen_reg_rtx (XFmode);
15252
15253 emit_insn (gen_absxf2 (tmp, op1));
15254 emit_insn (gen_cmpxf (tmp,
15255 CONST_DOUBLE_FROM_REAL_VALUE (
15256 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode),
15257 XFmode)));
15258 emit_jump_insn (gen_bge (label1));
15259
15260 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
15261 emit_insn (gen_fyl2xp1_xf3 (op0, tmp2, op1));
15262 emit_jump (label2);
15263
15264 emit_label (label1);
15265 emit_move_insn (tmp, CONST1_RTX (XFmode));
15266 emit_insn (gen_addxf3 (tmp, op1, tmp));
15267 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
15268 emit_insn (gen_fyl2x_xf3 (op0, tmp2, tmp));
15269
15270 emit_label (label2);
15271 }
15272
15273 #include "gt-i386.h"